From f87c1da2cb5e6d72c8e3020b98d366b6dcd1645a Mon Sep 17 00:00:00 2001 From: Aleksandra Malinowska Date: Fri, 8 Dec 2017 12:54:30 +0100 Subject: [PATCH 1/5] Use allocatable instead of capacity to calculate node utilization --- cluster-autoscaler/FAQ.md | 3 ++- cluster-autoscaler/simulator/cluster.go | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md index e7aab6d3e547..bf20537576cd 100644 --- a/cluster-autoscaler/FAQ.md +++ b/cluster-autoscaler/FAQ.md @@ -351,7 +351,8 @@ Every 10 seconds (configurable by `--scan-interval` flag), if no scale-up is needed, Cluster Autoscaler checks which nodes are unneeded. A node is considered for removal when: * The sum of cpu and memory requests of all pods running on this node is smaller - than 50% of the node's capacity. Utilization threshold can be configured using + than 50% of the node's allocatable. (Before 1.1.0, node capacity was used + instead of allocatable.) Utilization threshold can be configured using `--scale-down-utilization-threshold` flag. * All pods running on the node (except these that run on all nodes by default, like manifest-run pods diff --git a/cluster-autoscaler/simulator/cluster.go b/cluster-autoscaler/simulator/cluster.go index e1f3c13862f0..f49c426af354 100644 --- a/cluster-autoscaler/simulator/cluster.go +++ b/cluster-autoscaler/simulator/cluster.go @@ -137,7 +137,8 @@ func FindEmptyNodesToRemove(candidates []*apiv1.Node, pods []*apiv1.Pod) []*apiv return result } -// CalculateUtilization calculates utilization of a node, defined as total amount of requested resources divided by capacity. +// CalculateUtilization calculates utilization of a node, defined as maximum of (cpu, memory) utilization. +// Per resource utilization is the sum of requests for it divided by allocatable. func CalculateUtilization(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo) (float64, error) { cpu, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceCPU) if err != nil { @@ -151,11 +152,11 @@ func CalculateUtilization(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo) ( } func calculateUtilizationOfResource(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo, resourceName apiv1.ResourceName) (float64, error) { - nodeCapacity, found := node.Status.Capacity[resourceName] + nodeAllocatable, found := node.Status.Allocatable[resourceName] if !found { return 0, fmt.Errorf("Failed to get %v from %s", resourceName, node.Name) } - if nodeCapacity.MilliValue() == 0 { + if nodeAllocatable.MilliValue() == 0 { return 0, fmt.Errorf("%v is 0 at %s", resourceName, node.Name) } podsRequest := resource.MustParse("0") @@ -166,7 +167,7 @@ func calculateUtilizationOfResource(node *apiv1.Node, nodeInfo *schedulercache.N } } } - return float64(podsRequest.MilliValue()) / float64(nodeCapacity.MilliValue()), nil + return float64(podsRequest.MilliValue()) / float64(nodeAllocatable.MilliValue()), nil } // TODO: We don't need to pass list of nodes here as they are already available in nodeInfos. From c7a8bee5895ca7afa9692901c58461b307f32fad Mon Sep 17 00:00:00 2001 From: Tim Hockin Date: Fri, 22 Dec 2017 09:55:16 -0800 Subject: [PATCH 2/5] Convert registry to k8s.gcr.io --- addon-resizer/Makefile | 2 +- addon-resizer/README.md | 2 +- cluster-autoscaler/cloudprovider/aws/README.md | 6 +++--- cluster-autoscaler/cloudprovider/azure/README.md | 4 ++-- vertical-pod-autoscaler/initializer/Dockerfile | 2 +- vertical-pod-autoscaler/updater/Dockerfile | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/addon-resizer/Makefile b/addon-resizer/Makefile index c1efa43e1010..a6b4df93941c 100644 --- a/addon-resizer/Makefile +++ b/addon-resizer/Makefile @@ -22,7 +22,7 @@ ARCH ?= amd64 GOARM=7 GOLANG_VERSION = 1.8.3 -REGISTRY = gcr.io/google_containers +REGISTRY = k8s.gcr.io IMGNAME = addon-resizer IMAGE = $(REGISTRY)/$(IMGNAME) MULTI_ARCH_IMG = $(IMAGE)-$(ARCH) diff --git a/addon-resizer/README.md b/addon-resizer/README.md index e3dae72f0538..d27a539cbe7f 100644 --- a/addon-resizer/README.md +++ b/addon-resizer/README.md @@ -55,7 +55,7 @@ spec: kubernetes.io/cluster-service: "true" spec: containers: - - image: gcr.io/google_containers/addon-resizer:1.0 + - image: k8s.gcr.io/addon-resizer:1.0 imagePullPolicy: Always name: pod-nanny resources: diff --git a/cluster-autoscaler/cloudprovider/aws/README.md b/cluster-autoscaler/cloudprovider/aws/README.md index 57549f530ad0..3da8550aff20 100644 --- a/cluster-autoscaler/cloudprovider/aws/README.md +++ b/cluster-autoscaler/cloudprovider/aws/README.md @@ -72,7 +72,7 @@ spec: app: cluster-autoscaler spec: containers: - - image: gcr.io/google_containers/cluster-autoscaler:v0.6.0 + - image: k8s.gcr.io/cluster-autoscaler:v0.6.0 name: cluster-autoscaler resources: limits: @@ -123,7 +123,7 @@ spec: app: cluster-autoscaler spec: containers: - - image: gcr.io/google_containers/cluster-autoscaler:v0.6.0 + - image: k8s.gcr.io/cluster-autoscaler:v0.6.0 name: cluster-autoscaler resources: limits: @@ -183,7 +183,7 @@ spec: nodeSelector: kubernetes.io/role: master containers: - - image: gcr.io/google_containers/cluster-autoscaler:{{ ca_version }} + - image: k8s.gcr.io/cluster-autoscaler:{{ ca_version }} name: cluster-autoscaler resources: limits: diff --git a/cluster-autoscaler/cloudprovider/azure/README.md b/cluster-autoscaler/cloudprovider/azure/README.md index 0922642d57bd..d9430527e5b4 100644 --- a/cluster-autoscaler/cloudprovider/azure/README.md +++ b/cluster-autoscaler/cloudprovider/azure/README.md @@ -59,7 +59,7 @@ spec: app: cluster-autoscaler spec: containers: - - image: gcr.io/google_containers/cluster-autoscaler:{{ ca_version }} + - image: k8s.gcr.io/cluster-autoscaler:{{ ca_version }} name: cluster-autoscaler resources: limits: @@ -144,7 +144,7 @@ spec: nodeSelector: kubernetes.io/role: master containers: - - image: gcr.io/google_containers/cluster-autoscaler:{{ ca_version }} + - image: k8s.gcr.io/cluster-autoscaler:{{ ca_version }} name: cluster-autoscaler resources: limits: diff --git a/vertical-pod-autoscaler/initializer/Dockerfile b/vertical-pod-autoscaler/initializer/Dockerfile index 5a7fd5ad4002..dbe9b39beafd 100644 --- a/vertical-pod-autoscaler/initializer/Dockerfile +++ b/vertical-pod-autoscaler/initializer/Dockerfile @@ -13,7 +13,7 @@ # limitations under the License. -FROM gcr.io/google_containers/ubuntu-slim:0.1 +FROM k8s.gcr.io/ubuntu-slim:0.1 MAINTAINER Beata Skiba "bskiba@google.com" MAINTAINER Marcin Wielgus "mwielgus@google.com" diff --git a/vertical-pod-autoscaler/updater/Dockerfile b/vertical-pod-autoscaler/updater/Dockerfile index fcce056aa1ba..bf27d63693f2 100644 --- a/vertical-pod-autoscaler/updater/Dockerfile +++ b/vertical-pod-autoscaler/updater/Dockerfile @@ -13,7 +13,7 @@ # limitations under the License. -FROM gcr.io/google_containers/ubuntu-slim:0.1 +FROM k8s.gcr.io/ubuntu-slim:0.1 MAINTAINER Gabriela Filipek "gfilipek@google.com" MAINTAINER Marcin Wielgus "mwielgus@google.com" From 847bd797269834b52188443ef410f7cb7c3fb53e Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Thu, 28 Dec 2017 13:27:06 +0800 Subject: [PATCH 3/5] Support go1.9 when verifying gofmt --- hack/verify-gofmt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/verify-gofmt.sh b/hack/verify-gofmt.sh index fd8527c36d07..5d5ebd132ed5 100755 --- a/hack/verify-gofmt.sh +++ b/hack/verify-gofmt.sh @@ -24,7 +24,7 @@ KUBE_ROOT=$(dirname "${BASH_SOURCE}")/.. GO_VERSION=($(go version)) -if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.2|go1.3|go1.4|go1.5|go1.6|go1.7|go1.8') ]]; then +if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.2|go1.3|go1.4|go1.5|go1.6|go1.7|go1.8|go1.9') ]]; then echo "Unknown go version '${GO_VERSION}', skipping gofmt." exit 1 fi From 62ff748f09bf13dc6516f978937cd7f4a58cff01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomek=20Kulczy=C5=84ski?= Date: Thu, 28 Dec 2017 16:33:09 +0100 Subject: [PATCH 4/5] Fix error branch in pod initialization --- vertical-pod-autoscaler/initializer/core/initializer.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vertical-pod-autoscaler/initializer/core/initializer.go b/vertical-pod-autoscaler/initializer/core/initializer.go index bb63c4dedd9a..25001da7647d 100644 --- a/vertical-pod-autoscaler/initializer/core/initializer.go +++ b/vertical-pod-autoscaler/initializer/core/initializer.go @@ -130,11 +130,13 @@ func (initializer *initializer) updateResourceRequests(obj interface{}) { failedPod, err := markAsFailed(pod) if err != nil { glog.Errorf("unable to mark failed initialization for pod %v: %v", pod.Name, err) + return } err = initializer.doUpdatePod(failedPod) if err != nil { - glog.Errorf("unable to mark failed initialization for pod %v: %v", pod.Name, err) + glog.Errorf("unable to update pod %v after failed initialization: %v", pod.Name, err) } + return } err = initializer.doUpdatePod(initializedPod) if err != nil { From 15b10c8f6760f3afc213af11c9e5ad4898c0fd0c Mon Sep 17 00:00:00 2001 From: Marcin Wielgus Date: Wed, 20 Dec 2017 21:19:10 +0100 Subject: [PATCH 5/5] Skip iteration if pending pods are too new --- cluster-autoscaler/core/static_autoscaler.go | 17 ++++++++++++++++- cluster-autoscaler/core/utils.go | 10 ++++++++++ cluster-autoscaler/core/utils_test.go | 12 ++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 7cc1ad47bae4..719849c854b4 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -32,6 +32,11 @@ import ( "github.com/golang/glog" ) +const ( + // How old the oldest unschedulable pod should be before starting scale up. + unschedulablePodTimeBuffer = 2 * time.Second +) + // StaticAutoscaler is an autoscaler which has all the core functionality of a CA but without the reconfiguration feature type StaticAutoscaler struct { // AutoscalingContext consists of validated settings and options for this autoscaler @@ -240,10 +245,19 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError glog.V(4).Info("No schedulable pods") } + // If all pending pods are new we may want to skip a real scale down (just like if the pods were handled). + allPendingPodsToHelpAreNew := false + if len(unschedulablePodsToHelp) == 0 { glog.V(1).Info("No unschedulable pods") } else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal { glog.V(1).Info("Max total nodes in cluster reached") + } else if getOldestCreateTime(unschedulablePodsToHelp).Add(unschedulablePodTimeBuffer).After(currentTime) { + // The assumption here is that these pods have been created very recently and probably there + // is more pods to come. In theory we could check the newest pod time but then if pod were created + // slowly but at the pace of 1 every 2 seconds then no scale up would be triggered for long time. + allPendingPodsToHelpAreNew = true + glog.V(1).Info("Unschedulable pods are very new, waiting one iteration for more") } else { daemonsets, err := a.ListerRegistry.DaemonSetLister().List() if err != nil { @@ -301,7 +315,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError a.lastScaleDownFailTime.Add(a.ScaleDownDelayAfterFailure).After(currentTime) || a.lastScaleDownDeleteTime.Add(a.ScaleDownDelayAfterDelete).After(currentTime) || schedulablePodsPresent || - scaleDown.nodeDeleteStatus.IsDeleteInProgress() + scaleDown.nodeDeleteStatus.IsDeleteInProgress() || + allPendingPodsToHelpAreNew glog.V(4).Infof("Scale down status: unneededOnly=%v lastScaleUpTime=%s "+ "lastScaleDownDeleteTime=%v lastScaleDownFailTime=%s schedulablePodsPresent=%v isDeleteInProgress=%v", diff --git a/cluster-autoscaler/core/utils.go b/cluster-autoscaler/core/utils.go index 8c2c9ccf9d4a..9aeb1301dfa4 100644 --- a/cluster-autoscaler/core/utils.go +++ b/cluster-autoscaler/core/utils.go @@ -491,3 +491,13 @@ func UpdateClusterStateMetrics(csr *clusterstate.ClusterStateRegistry) { readiness := csr.GetClusterReadiness() metrics.UpdateNodesCount(readiness.Ready, readiness.Unready+readiness.LongNotStarted, readiness.NotStarted) } + +func getOldestCreateTime(pods []*apiv1.Pod) time.Time { + oldest := time.Now() + for _, pod := range pods { + if oldest.After(pod.CreationTimestamp.Time) { + oldest = pod.CreationTimestamp.Time + } + } + return oldest +} diff --git a/cluster-autoscaler/core/utils_test.go b/cluster-autoscaler/core/utils_test.go index 8bb1a74fe251..d596d2c08a4f 100644 --- a/cluster-autoscaler/core/utils_test.go +++ b/cluster-autoscaler/core/utils_test.go @@ -527,3 +527,15 @@ func TestGetNodeCoresAndMemory(t *testing.T) { _, _, err = getNodeCoresAndMemory(node) assert.Error(t, err) } + +func TestGetOldestPod(t *testing.T) { + p1 := BuildTestPod("p1", 500, 1000) + p1.CreationTimestamp = metav1.NewTime(time.Now().Add(-1 * time.Minute)) + p2 := BuildTestPod("p2", 500, 1000) + p2.CreationTimestamp = metav1.NewTime(time.Now().Add(+1 * time.Minute)) + p3 := BuildTestPod("p3", 500, 1000) + p3.CreationTimestamp = metav1.NewTime(time.Now()) + + assert.Equal(t, p1.CreationTimestamp.Time, getOldestCreateTime([]*apiv1.Pod{p1, p2, p3})) + assert.Equal(t, p1.CreationTimestamp.Time, getOldestCreateTime([]*apiv1.Pod{p3, p2, p1})) +}