Skip to content

Commit

Permalink
Merge pull request #2827 from aleksandra-malinowska/logging-taints-cp…
Browse files Browse the repository at this point in the history
…-1.16

Cherry-pick #2638, #2717, and #2719 to 1.16: Log taint preventing scale-up
  • Loading branch information
k8s-ci-robot authored Feb 13, 2020
2 parents 70a28db + ce5fa1d commit 6ba0b20
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 8 deletions.
15 changes: 9 additions & 6 deletions cluster-autoscaler/core/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,10 @@ package core

import (
"fmt"
"k8s.io/apimachinery/pkg/types"
"math/rand"
"reflect"
"time"

appsv1 "k8s.io/api/apps/v1"
apiv1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/clusterstate"
"k8s.io/autoscaler/cluster-autoscaler/clusterstate/utils"
Expand All @@ -39,15 +35,21 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/utils/glogx"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"

appsv1 "k8s.io/api/apps/v1"
apiv1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/types"
"k8s.io/klog"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)

const (
// ReschedulerTaintKey is the name of the taint created by rescheduler.
ReschedulerTaintKey = "CriticalAddonsOnly"

gkeNodeTerminationHandlerTaint = "cloud.google.com/impending-node-termination"
)

var (
Expand All @@ -61,6 +63,7 @@ var (
schedulerapi.TaintNodePIDPressure: true,
schedulerapi.TaintExternalCloudProvider: true,
schedulerapi.TaintNodeShutdown: true,
gkeNodeTerminationHandlerTaint: true,
}
)

Expand Down
24 changes: 22 additions & 2 deletions cluster-autoscaler/simulator/predicates.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,9 @@ type PredicateError struct {
predicateName string
failureReasons []predicates.PredicateFailureReason
err error
// debugInfo contains additional info that predicate doesn't include,
// but may be useful for debugging (e.g. taints on node blocking scale-up)
debugInfo func() string

reasons []string
message string
Expand All @@ -307,10 +310,10 @@ func (pe *PredicateError) VerboseError() string {
}
// Generate verbose message.
if pe.err != nil {
pe.message = fmt.Sprintf("%s predicate error: %v", pe.predicateName, pe.err)
pe.message = fmt.Sprintf("%s predicate error: %v, %v", pe.predicateName, pe.err, pe.debugInfo())
return pe.message
}
pe.message = fmt.Sprintf("%s predicate mismatch, reason: %s", pe.predicateName, strings.Join(pe.Reasons(), ", "))
pe.message = fmt.Sprintf("%s predicate mismatch, reason: %s, %v", pe.predicateName, strings.Join(pe.Reasons(), ", "), pe.debugInfo())
return pe.message
}

Expand Down Expand Up @@ -366,8 +369,25 @@ func (p *PredicateChecker) CheckPredicates(pod *apiv1.Pod, predicateMetadata pre
predicateName: predInfo.Name,
failureReasons: failureReasons,
err: err,
debugInfo: p.buildDebugInfo(predInfo, nodeInfo),
}
}
}
return nil
}

func emptyString() string {
return ""
}

func (p *PredicateChecker) buildDebugInfo(predInfo PredicateInfo, nodeInfo *schedulernodeinfo.NodeInfo) func() string {
switch predInfo.Name {
case "PodToleratesNodeTaints":
taints := nodeInfo.Node().Spec.Taints
return func() string {
return fmt.Sprintf("taints on node: %#v", taints)
}
default:
return emptyString
}
}
34 changes: 34 additions & 0 deletions cluster-autoscaler/simulator/predicates_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ import (
"testing"
"time"

apiv1 "k8s.io/api/core/v1"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -67,3 +69,35 @@ func TestPredicates(t *testing.T) {
assert.Nil(t, predicateChecker.CheckPredicates(p4, nil, ni2))
assert.NotNil(t, predicateChecker.CheckPredicates(p3, nil, ni2))
}

func TestDebugInfo(t *testing.T) {
p1 := BuildTestPod("p1", 0, 0)

ni1 := schedulernodeinfo.NewNodeInfo()
node1 := BuildTestNode("n1", 1000, 2000000)
node1.Spec.Taints = []apiv1.Taint{
{
Key: "SomeTaint",
Value: "WhyNot?",
Effect: apiv1.TaintEffectNoSchedule,
},
{
Key: "RandomTaint",
Value: "JustBecause",
Effect: apiv1.TaintEffectNoExecute,
},
}
SetNodeReadyState(node1, true, time.Time{})
ni1.SetNode(node1)

predicateChecker := NewTestPredicateChecker()
predicateChecker.predicates = append(predicateChecker.predicates, PredicateInfo{
Name: "PodToleratesNodeTaints",
Predicate: predicates.PodToleratesNodeTaints,
})

predicateErr := predicateChecker.CheckPredicates(p1, nil, ni1)
assert.NotNil(t, predicateErr)
assert.True(t, strings.Contains(predicateErr.Error(), "Predicates failed"))
assert.True(t, strings.Contains(predicateErr.VerboseError(), "RandomTaint"), "got: %v, want: %v", predicateErr.VerboseError(), "RandomTaint")
}

0 comments on commit 6ba0b20

Please sign in to comment.