Skip to content

Commit

Permalink
fix: Handle NodeClaims existing but being unknown in earlier versions (
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis authored Oct 13, 2023
1 parent 199e81a commit 4239902
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 6 deletions.
28 changes: 28 additions & 0 deletions pkg/controllers/deprovisioning/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ import (
"github.com/aws/karpenter-core/pkg/scheduling"
"github.com/aws/karpenter-core/pkg/test"
. "github.com/aws/karpenter-core/pkg/test/expectations"
nodeclaimutil "github.com/aws/karpenter-core/pkg/utils/nodeclaim"
nodepoolutil "github.com/aws/karpenter-core/pkg/utils/nodepool"
)

var ctx context.Context
Expand Down Expand Up @@ -124,6 +126,9 @@ var _ = BeforeEach(func() {
})
leastExpensiveInstance, mostExpensiveInstance = onDemandInstances[0], onDemandInstances[len(onDemandInstances)-1]
leastExpensiveOffering, mostExpensiveOffering = leastExpensiveInstance.Offerings[0], mostExpensiveInstance.Offerings[0]

nodepoolutil.EnableNodePools = true
nodeclaimutil.EnableNodeClaims = true
})

var _ = AfterEach(func() {
Expand Down Expand Up @@ -1230,6 +1235,29 @@ var _ = Describe("Combined/Deprovisioning", func() {
ExpectNotFound(ctx, env.Client, lo.Map(nodeClaims, func(nc *v1beta1.NodeClaim, _ int) client.Object { return nc })...)
ExpectNotFound(ctx, env.Client, lo.Map(nodes, func(n *v1.Node, _ int) client.Object { return n })...)
})
It("shouldn't consider a NodeClaim as a candidate if EnableNodePools/EnableNodeClaims isn't enabled", func() {
nodepoolutil.EnableNodePools = false
nodeclaimutil.EnableNodeClaims = false

nodePool.Spec.Disruption.ExpireAfter = v1beta1.NillableDuration{Duration: lo.ToPtr(time.Second * 30)}
nodeClaim.StatusConditions().MarkTrue(v1beta1.Expired)
ExpectApplied(ctx, env.Client, nodePool, nodeClaim, nodeClaimNode)

// inform cluster state about nodes and nodeclaims
ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*v1.Node{nodeClaimNode}, []*v1beta1.NodeClaim{nodeClaim})

fakeClock.Step(10 * time.Minute)
wg := sync.WaitGroup{}
ExpectTriggerVerifyAction(&wg)
ExpectReconcileSucceeded(ctx, deprovisioningController, types.NamespacedName{})

// Expect that the expired nodeclaim is not gone
Expect(ExpectNodeClaims(ctx, env.Client)).To(HaveLen(1))

Expect(ExpectNodes(ctx, env.Client)).To(HaveLen(1))
ExpectExists(ctx, env.Client, nodeClaim)
ExpectExists(ctx, env.Client, nodeClaimNode)
})
})

var _ = Describe("Pod Eviction Cost", func() {
Expand Down
5 changes: 3 additions & 2 deletions pkg/controllers/state/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"github.com/aws/karpenter-core/pkg/cloudprovider"
"github.com/aws/karpenter-core/pkg/scheduling"
nodeclaimutil "github.com/aws/karpenter-core/pkg/utils/nodeclaim"
nodepoolutil "github.com/aws/karpenter-core/pkg/utils/nodepool"
podutils "github.com/aws/karpenter-core/pkg/utils/pod"
)

Expand Down Expand Up @@ -266,7 +267,7 @@ func (c *Cluster) UpdateNode(ctx context.Context, node *v1.Node) error {

if node.Spec.ProviderID == "" {
// If we know that we own this node, we shouldn't allow the providerID to be empty
if node.Labels[v1alpha5.ProvisionerNameLabelKey] != "" || node.Labels[v1beta1.NodePoolLabelKey] != "" {
if node.Labels[v1alpha5.ProvisionerNameLabelKey] != "" || (node.Labels[v1beta1.NodePoolLabelKey] != "" && nodepoolutil.EnableNodePools) {
return nil
}
node.Spec.ProviderID = node.Name
Expand Down Expand Up @@ -509,7 +510,7 @@ func (c *Cluster) populateInflight(ctx context.Context, n *StateNode) error {
if n.inflightInitialized {
return nil
}
// If the node ies already initialized, we don't need to populate its inflight capacity
// If the node is already initialized, we don't need to populate its inflight capacity
// since its capacity is already represented by the node status
if n.Initialized() {
return nil
Expand Down
4 changes: 2 additions & 2 deletions pkg/controllers/state/statenode.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func NewNode() *StateNode {
}

func (in *StateNode) OwnerKey() nodepoolutil.Key {
if in.Labels()[v1beta1.NodePoolLabelKey] != "" {
if in.Labels()[v1beta1.NodePoolLabelKey] != "" && nodepoolutil.EnableNodePools {
return nodepoolutil.Key{Name: in.Labels()[v1beta1.NodePoolLabelKey], IsProvisioner: false}
}
if in.Labels()[v1alpha5.ProvisionerNameLabelKey] != "" {
Expand Down Expand Up @@ -362,7 +362,7 @@ func (in *StateNode) Nominated() bool {
func (in *StateNode) Managed() bool {
return in.NodeClaim != nil ||
(in.Node != nil && in.Node.Labels[v1alpha5.ProvisionerNameLabelKey] != "") ||
(in.Node != nil && in.Node.Labels[v1beta1.NodePoolLabelKey] != "")
(in.Node != nil && in.Node.Labels[v1beta1.NodePoolLabelKey] != "" && nodepoolutil.EnableNodePools)
}

func (in *StateNode) updateForPod(ctx context.Context, kubeClient client.Client, pod *v1.Pod) error {
Expand Down
124 changes: 124 additions & 0 deletions pkg/controllers/state/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ import (
"github.com/aws/karpenter-core/pkg/operator/options"
"github.com/aws/karpenter-core/pkg/operator/scheme"
"github.com/aws/karpenter-core/pkg/scheduling"
nodeclaimutil "github.com/aws/karpenter-core/pkg/utils/nodeclaim"
nodepoolutil "github.com/aws/karpenter-core/pkg/utils/nodepool"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand Down Expand Up @@ -101,6 +103,8 @@ var _ = BeforeEach(func() {
cloudProvider.InstanceTypes = fake.InstanceTypesAssorted()
provisioner = test.Provisioner(test.ProvisionerOptions{ObjectMeta: metav1.ObjectMeta{Name: "default"}})
nodePool = test.NodePool(v1beta1.NodePool{ObjectMeta: metav1.ObjectMeta{Name: "default"}})
nodepoolutil.EnableNodePools = true
nodeclaimutil.EnableNodeClaims = true
ExpectApplied(ctx, env.Client, provisioner, nodePool)
})
var _ = AfterEach(func() {
Expand Down Expand Up @@ -683,6 +687,28 @@ var _ = Describe("Inflight Nodes", func() {
ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine))
ExpectStateNodeNotFoundForMachine(machine)
})
It("should ignore node updates if nodes don't have provider id and are owned", func() {
node := test.Node(test.NodeOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
v1alpha5.ProvisionerNameLabelKey: provisioner.Name,
},
},
Capacity: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1800m"),
v1.ResourceMemory: resource.MustParse("0"), // Should use the inflight capacity for this value
v1.ResourceEphemeralStorage: resource.MustParse("19000Mi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0"), // Should use the inflight allocatable for this value
v1.ResourceMemory: resource.MustParse("29250Mi"),
v1.ResourceEphemeralStorage: resource.MustParse("0"), // Should use the inflight allocatable for this value
},
})
ExpectApplied(ctx, env.Client, node)
ExpectReconcileSucceeded(ctx, nodeController, client.ObjectKeyFromObject(node))
ExpectStateNodeCount("==", 0)
})
It("should model the inflight data as machine with no node", func() {
machine := test.Machine(v1alpha5.Machine{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -1280,6 +1306,104 @@ var _ = Describe("Inflight Nodes", func() {
ExpectReconcileSucceeded(ctx, nodeClaimController, client.ObjectKeyFromObject(nodeClaim))
ExpectStateNodeNotFoundForNodeClaim(nodeClaim)
})
It("should ignore node updates if nodes don't have provider id and are owned", func() {
node := test.Node(test.NodeOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
v1beta1.NodePoolLabelKey: nodePool.Name,
},
},
Capacity: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1800m"),
v1.ResourceMemory: resource.MustParse("0"), // Should use the inflight capacity for this value
v1.ResourceEphemeralStorage: resource.MustParse("19000Mi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0"), // Should use the inflight allocatable for this value
v1.ResourceMemory: resource.MustParse("29250Mi"),
v1.ResourceEphemeralStorage: resource.MustParse("0"), // Should use the inflight allocatable for this value
},
})
ExpectApplied(ctx, env.Client, node)
ExpectReconcileSucceeded(ctx, nodeController, client.ObjectKeyFromObject(node))
ExpectStateNodeCount("==", 0)
})
It("shouldn't ignore node updates if nodes don't have provider id and EnableNodePools/EnableNodeClaims isn't enabled", func() {
nodepoolutil.EnableNodePools = false
nodeclaimutil.EnableNodeClaims = false

node := test.Node(test.NodeOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
v1beta1.NodePoolLabelKey: nodePool.Name,
},
},
Capacity: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1800m"),
v1.ResourceMemory: resource.MustParse("0"), // Should use the inflight capacity for this value
v1.ResourceEphemeralStorage: resource.MustParse("19000Mi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0"), // Should use the inflight allocatable for this value
v1.ResourceMemory: resource.MustParse("29250Mi"),
v1.ResourceEphemeralStorage: resource.MustParse("0"), // Should use the inflight allocatable for this value
},
})
ExpectApplied(ctx, env.Client, node)
ExpectReconcileSucceeded(ctx, nodeController, client.ObjectKeyFromObject(node))
ExpectStateNodeCount("==", 1)
})
It("shouldn't populate inflight capacity if EnableNodePools/EnableNodeClaims isn't enabled", func() {
nodepoolutil.EnableNodePools = false
nodeclaimutil.EnableNodeClaims = false

instanceType := cloudProvider.InstanceTypes[0]
node := test.Node(test.NodeOptions{
ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{
v1beta1.NodePoolLabelKey: nodePool.Name,
v1.LabelInstanceTypeStable: instanceType.Name,
}},
ProviderID: test.RandomProviderID(),
})
ExpectApplied(ctx, env.Client, node)
ExpectReconcileSucceeded(ctx, nodeController, client.ObjectKeyFromObject(node))

ExpectStateNodeCount("==", 1)
Expect(ExpectStateNodeExists(node).Allocatable()).To(HaveLen(0))
Expect(ExpectStateNodeExists(node).Capacity()).To(HaveLen(0))
})
It("shouldn't populate startup taints if EnableNodePools/EnableNodeClaims isn't enabled", func() {
nodepoolutil.EnableNodePools = false
nodeclaimutil.EnableNodeClaims = false

instanceType := cloudProvider.InstanceTypes[0]
nodePool.Spec.Template.Spec.StartupTaints = []v1.Taint{
{
Key: "test",
Effect: v1.TaintEffectNoSchedule,
},
}
node := test.Node(test.NodeOptions{
ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{
v1beta1.NodePoolLabelKey: nodePool.Name,
v1.LabelInstanceTypeStable: instanceType.Name,
}},
Taints: []v1.Taint{
{
Key: "test",
Effect: v1.TaintEffectNoSchedule,
},
},
ProviderID: test.RandomProviderID(),
})
ExpectApplied(ctx, env.Client, node)
ExpectReconcileSucceeded(ctx, nodeController, client.ObjectKeyFromObject(node))

ExpectStateNodeCount("==", 1)

// This taint wouldn't show if we discovered the startup taints
Expect(ExpectStateNodeExists(node).Taints()).To(HaveLen(1))
})
It("should model the inflight data as nodeclaim with no node", func() {
nodeClaim := test.NodeClaim(v1beta1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Expand Down
4 changes: 2 additions & 2 deletions pkg/utils/nodeclaim/nodeclaim.go
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ func UpdateNodeOwnerReferences(nodeClaim *v1beta1.NodeClaim, node *v1.Node) *v1.
}

func Owner(ctx context.Context, c client.Client, obj interface{ GetLabels() map[string]string }) (*v1beta1.NodePool, error) {
if v, ok := obj.GetLabels()[v1beta1.NodePoolLabelKey]; ok {
if v, ok := obj.GetLabels()[v1beta1.NodePoolLabelKey]; ok && EnableNodeClaims {
nodePool := &v1beta1.NodePool{}
if err := c.Get(ctx, types.NamespacedName{Name: v}, nodePool); err != nil {
return nil, err
Expand All @@ -492,7 +492,7 @@ func Owner(ctx context.Context, c client.Client, obj interface{ GetLabels() map[
}

func OwnerKey(obj interface{ GetLabels() map[string]string }) nodepoolutil.Key {
if v, ok := obj.GetLabels()[v1beta1.NodePoolLabelKey]; ok {
if v, ok := obj.GetLabels()[v1beta1.NodePoolLabelKey]; ok && EnableNodeClaims {
return nodepoolutil.Key{Name: v, IsProvisioner: false}
}
if v, ok := obj.GetLabels()[v1alpha5.ProvisionerNameLabelKey]; ok {
Expand Down

0 comments on commit 4239902

Please sign in to comment.