Merge branch 'master' into node-pools

kubernetes · Dec 29, 2017 · 8bd103b · 8bd103b
2 parents 56ec7fa + 58f4e6b
commit 8bd103b
Show file tree

Hide file tree

Showing 12 changed files with 56 additions and 15 deletions.
diff --git a/addon-resizer/Makefile b/addon-resizer/Makefile
@@ -22,7 +22,7 @@ ARCH ?= amd64
 
 GOARM=7
 GOLANG_VERSION = 1.8.3
-REGISTRY = gcr.io/google_containers
+REGISTRY = k8s.gcr.io
 IMGNAME = addon-resizer
 IMAGE = $(REGISTRY)/$(IMGNAME)
 MULTI_ARCH_IMG = $(IMAGE)-$(ARCH)

diff --git a/addon-resizer/README.md b/addon-resizer/README.md
@@ -55,7 +55,7 @@ spec:
         kubernetes.io/cluster-service: "true"
     spec:
       containers:
-        - image: gcr.io/google_containers/addon-resizer:1.0
+        - image: k8s.gcr.io/addon-resizer:1.0
           imagePullPolicy: Always
           name: pod-nanny
           resources:

diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md
@@ -351,7 +351,8 @@ Every 10 seconds (configurable by `--scan-interval` flag), if no scale-up is
 needed, Cluster Autoscaler checks which nodes are unneeded. A node is considered for removal when:
 
 * The sum of cpu and memory requests of all pods running on this node is smaller
-  than 50% of the node's capacity. Utilization threshold can be configured using
+  than 50% of the node's allocatable. (Before 1.1.0, node capacity was used
+  instead of allocatable.) Utilization threshold can be configured using
   `--scale-down-utilization-threshold` flag.
 
 * All pods running on the node (except these that run on all nodes by default, like manifest-run pods

diff --git a/cluster-autoscaler/cloudprovider/aws/README.md b/cluster-autoscaler/cloudprovider/aws/README.md
@@ -72,7 +72,7 @@ spec:
         app: cluster-autoscaler
     spec:
       containers:
-        - image: gcr.io/google_containers/cluster-autoscaler:v0.6.0
+        - image: k8s.gcr.io/cluster-autoscaler:v0.6.0
           name: cluster-autoscaler
           resources:
             limits:
@@ -123,7 +123,7 @@ spec:
         app: cluster-autoscaler
     spec:
       containers:
-        - image: gcr.io/google_containers/cluster-autoscaler:v0.6.0
+        - image: k8s.gcr.io/cluster-autoscaler:v0.6.0
           name: cluster-autoscaler
           resources:
             limits:
@@ -183,7 +183,7 @@ spec:
       nodeSelector:
         kubernetes.io/role: master
       containers:
-        - image: gcr.io/google_containers/cluster-autoscaler:{{ ca_version }}
+        - image: k8s.gcr.io/cluster-autoscaler:{{ ca_version }}
           name: cluster-autoscaler
           resources:
             limits:

diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go
@@ -32,6 +32,11 @@ import (
 	"github.com/golang/glog"
 )
 
+const (
+	// How old the oldest unschedulable pod should be before starting scale up.
+	unschedulablePodTimeBuffer = 2 * time.Second
+)
+
 // StaticAutoscaler is an autoscaler which has all the core functionality of a CA but without the reconfiguration feature
 type StaticAutoscaler struct {
 	// AutoscalingContext consists of validated settings and options for this autoscaler
@@ -240,10 +245,19 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
 		glog.V(4).Info("No schedulable pods")
 	}
 
+	// If all pending pods are new we may want to skip a real scale down (just like if the pods were handled).
+	allPendingPodsToHelpAreNew := false
+
 	if len(unschedulablePodsToHelp) == 0 {
 		glog.V(1).Info("No unschedulable pods")
 	} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal {
 		glog.V(1).Info("Max total nodes in cluster reached")
+	} else if getOldestCreateTime(unschedulablePodsToHelp).Add(unschedulablePodTimeBuffer).After(currentTime) {
+		// The assumption here is that these pods have been created very recently and probably there
+		// is more pods to come. In theory we could check the newest pod time but then if pod were created
+		// slowly but at the pace of 1 every 2 seconds then no scale up would be triggered for long time.
+		allPendingPodsToHelpAreNew = true
+		glog.V(1).Info("Unschedulable pods are very new, waiting one iteration for more")
 	} else {
 		daemonsets, err := a.ListerRegistry.DaemonSetLister().List()
 		if err != nil {
@@ -301,7 +315,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
 			a.lastScaleDownFailTime.Add(a.ScaleDownDelayAfterFailure).After(currentTime) ||
 			a.lastScaleDownDeleteTime.Add(a.ScaleDownDelayAfterDelete).After(currentTime) ||
 			schedulablePodsPresent ||
-			scaleDown.nodeDeleteStatus.IsDeleteInProgress()
+			scaleDown.nodeDeleteStatus.IsDeleteInProgress() ||
+			allPendingPodsToHelpAreNew
 
 		glog.V(4).Infof("Scale down status: unneededOnly=%v lastScaleUpTime=%s "+
 			"lastScaleDownDeleteTime=%v lastScaleDownFailTime=%s schedulablePodsPresent=%v isDeleteInProgress=%v",

diff --git a/cluster-autoscaler/core/utils.go b/cluster-autoscaler/core/utils.go
@@ -491,3 +491,13 @@ func UpdateClusterStateMetrics(csr *clusterstate.ClusterStateRegistry) {
 	readiness := csr.GetClusterReadiness()
 	metrics.UpdateNodesCount(readiness.Ready, readiness.Unready+readiness.LongNotStarted, readiness.NotStarted)
 }
+
+func getOldestCreateTime(pods []*apiv1.Pod) time.Time {
+	oldest := time.Now()
+	for _, pod := range pods {
+		if oldest.After(pod.CreationTimestamp.Time) {
+			oldest = pod.CreationTimestamp.Time
+		}
+	}
+	return oldest
+}
diff --git a/cluster-autoscaler/core/utils_test.go b/cluster-autoscaler/core/utils_test.go
@@ -527,3 +527,15 @@ func TestGetNodeCoresAndMemory(t *testing.T) {
 	_, _, err = getNodeCoresAndMemory(node)
 	assert.Error(t, err)
 }
+
+func TestGetOldestPod(t *testing.T) {
+	p1 := BuildTestPod("p1", 500, 1000)
+	p1.CreationTimestamp = metav1.NewTime(time.Now().Add(-1 * time.Minute))
+	p2 := BuildTestPod("p2", 500, 1000)
+	p2.CreationTimestamp = metav1.NewTime(time.Now().Add(+1 * time.Minute))
+	p3 := BuildTestPod("p3", 500, 1000)
+	p3.CreationTimestamp = metav1.NewTime(time.Now())
+
+	assert.Equal(t, p1.CreationTimestamp.Time, getOldestCreateTime([]*apiv1.Pod{p1, p2, p3}))
+	assert.Equal(t, p1.CreationTimestamp.Time, getOldestCreateTime([]*apiv1.Pod{p3, p2, p1}))
+}
diff --git a/cluster-autoscaler/simulator/cluster.go b/cluster-autoscaler/simulator/cluster.go
@@ -137,7 +137,8 @@ func FindEmptyNodesToRemove(candidates []*apiv1.Node, pods []*apiv1.Pod) []*apiv
 	return result
 }
 
-// CalculateUtilization calculates utilization of a node, defined as total amount of requested resources divided by capacity.
+// CalculateUtilization calculates utilization of a node, defined as maximum of (cpu, memory) utilization.
+// Per resource utilization is the sum of requests for it divided by allocatable.
 func CalculateUtilization(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo) (float64, error) {
 	cpu, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceCPU)
 	if err != nil {
@@ -151,11 +152,11 @@ func CalculateUtilization(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo) (
 }
 
 func calculateUtilizationOfResource(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo, resourceName apiv1.ResourceName) (float64, error) {
-	nodeCapacity, found := node.Status.Capacity[resourceName]
+	nodeAllocatable, found := node.Status.Allocatable[resourceName]
 	if !found {
 		return 0, fmt.Errorf("Failed to get %v from %s", resourceName, node.Name)
 	}
-	if nodeCapacity.MilliValue() == 0 {
+	if nodeAllocatable.MilliValue() == 0 {
 		return 0, fmt.Errorf("%v is 0 at %s", resourceName, node.Name)
 	}
 	podsRequest := resource.MustParse("0")
@@ -166,7 +167,7 @@ func calculateUtilizationOfResource(node *apiv1.Node, nodeInfo *schedulercache.N
 			}
 		}
 	}
-	return float64(podsRequest.MilliValue()) / float64(nodeCapacity.MilliValue()), nil
+	return float64(podsRequest.MilliValue()) / float64(nodeAllocatable.MilliValue()), nil
 }
 
 // TODO: We don't need to pass list of nodes here as they are already available in nodeInfos.

diff --git a/hack/verify-gofmt.sh b/hack/verify-gofmt.sh
@@ -24,7 +24,7 @@ KUBE_ROOT=$(dirname "${BASH_SOURCE}")/..
 
 GO_VERSION=($(go version))
 
-if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.2|go1.3|go1.4|go1.5|go1.6|go1.7|go1.8') ]]; then
+if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.2|go1.3|go1.4|go1.5|go1.6|go1.7|go1.8|go1.9') ]]; then
   echo "Unknown go version '${GO_VERSION}', skipping gofmt."
   exit 1
 fi

diff --git a/vertical-pod-autoscaler/initializer/Dockerfile b/vertical-pod-autoscaler/initializer/Dockerfile
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 
-FROM gcr.io/google_containers/ubuntu-slim:0.1
+FROM k8s.gcr.io/ubuntu-slim:0.1
 MAINTAINER Beata Skiba "[email protected]"
 MAINTAINER Marcin Wielgus "[email protected]"
 

diff --git a/vertical-pod-autoscaler/initializer/core/initializer.go b/vertical-pod-autoscaler/initializer/core/initializer.go
@@ -130,11 +130,13 @@ func (initializer *initializer) updateResourceRequests(obj interface{}) {
 		failedPod, err := markAsFailed(pod)
 		if err != nil {
 			glog.Errorf("unable to mark failed initialization for pod %v: %v", pod.Name, err)
+			return
 		}
 		err = initializer.doUpdatePod(failedPod)
 		if err != nil {
-			glog.Errorf("unable to mark failed initialization for pod %v: %v", pod.Name, err)
+			glog.Errorf("unable to update pod %v after failed initialization: %v", pod.Name, err)
 		}
+		return
 	}
 	err = initializer.doUpdatePod(initializedPod)
 	if err != nil {

diff --git a/vertical-pod-autoscaler/updater/Dockerfile b/vertical-pod-autoscaler/updater/Dockerfile
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 
-FROM gcr.io/google_containers/ubuntu-slim:0.1
+FROM k8s.gcr.io/ubuntu-slim:0.1
 MAINTAINER Gabriela Filipek "[email protected]"
 MAINTAINER Marcin Wielgus "[email protected]"