Skip to content

Commit

Permalink
Support scale up of nodepool with zero nodes with ephemeral storage r…
Browse files Browse the repository at this point in the history
…equests
  • Loading branch information
vbhargav875 committed Apr 28, 2024
1 parent c7fb744 commit 1c02dfc
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 11 deletions.
18 changes: 10 additions & 8 deletions cluster-autoscaler/cloudprovider/oci/common/oci_shape.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (

// ShapeGetter returns the oci shape attributes for the pool.
type ShapeGetter interface {
GetNodePoolShape(*oke.NodePool) (*Shape, error)
GetNodePoolShape(*oke.NodePool, int64) (*Shape, error)
GetInstancePoolShape(pool *core.InstancePool) (*Shape, error)
Refresh()
}
Expand Down Expand Up @@ -51,10 +51,11 @@ func (cc ShapeClientImpl) ListShapes(ctx context.Context, req core.ListShapesReq
// Shape includes the resource attributes of a given shape which should be used
// for constructing node templates.
type Shape struct {
Name string
CPU float32
GPU int
MemoryInBytes float32
Name string
CPU float32
GPU int
MemoryInBytes float32
EphemeralStorageInBytes float32
}

// CreateShapeGetter creates a new oci shape getter.
Expand All @@ -78,14 +79,15 @@ func (osf *shapeGetterImpl) Refresh() {
}

// GetNodePoolShape gets the shape by querying the node pool's configuration
func (osf *shapeGetterImpl) GetNodePoolShape(np *oke.NodePool) (*Shape, error) {
func (osf *shapeGetterImpl) GetNodePoolShape(np *oke.NodePool, ephemeralStorage int64) (*Shape, error) {
shapeName := *np.NodeShape
if np.NodeShapeConfig != nil {
return &Shape{
CPU: *np.NodeShapeConfig.Ocpus * 2,
// num_bytes * kilo * mega * giga
MemoryInBytes: *np.NodeShapeConfig.MemoryInGBs * 1024 * 1024 * 1024,
GPU: 0,
MemoryInBytes: *np.NodeShapeConfig.MemoryInGBs * 1024 * 1024 * 1024,
GPU: 0,
EphemeralStorageInBytes: float32(ephemeralStorage),
}, nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func TestNodePoolGetShape(t *testing.T) {
shapeGetter := CreateShapeGetter(shapeClient)

t.Run(name, func(t *testing.T) {
shape, err := shapeGetter.GetNodePoolShape(&oke.NodePool{NodeShape: &tc.shape, NodeShapeConfig: tc.shapeConfig})
shape, err := shapeGetter.GetNodePoolShape(&oke.NodePool{NodeShape: &tc.shape, NodeShapeConfig: tc.shapeConfig}, -1)
if err != nil {
t.Fatal(err)
}
Expand Down
27 changes: 27 additions & 0 deletions cluster-autoscaler/cloudprovider/oci/common/oci_tag.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
Copyright 2021-2024 Oracle and/or its affiliates.
*/

package common

import (
oke "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/vendor-internal/github.com/oracle/oci-go-sdk/v65/containerengine"
)

// TagsGetter returns the oci tags for the pool.
type TagsGetter interface {
GetFreeformTags(*oke.NodePool) (map[string]string, error)
}

// TagsGetterImpl is the implementation to fetch the oci tags for the pool.
type TagsGetterImpl struct{}

// CreateTagsGetter creates a new oci tags getter.
func CreateTagsGetter() TagsGetter {
return &TagsGetterImpl{}
}

// GetFreeformTags returns the FreeformTags for the nodepool
func (tgi *TagsGetterImpl) GetFreeformTags(np *oke.NodePool) (map[string]string, error) {
return np.FreeformTags, nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,7 @@ const (
// ToBeDeletedByClusterAutoscaler is the taint used to ensure that after a node has been called to be deleted
// no more pods will schedule onto it
ToBeDeletedByClusterAutoscaler = "ignore-taint.cluster-autoscaler.kubernetes.io/oke-impending-node-termination"

// EphemeralStorageSize is the freeform tag key that would be used to determine the ephemeral-storage size of the node
EphemeralStorageSize = "cluster-autoscaler/node-ephemeral-storage"
)
37 changes: 35 additions & 2 deletions cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ func CreateNodePoolManager(cloudConfigPath string, discoveryOpts cloudprovider.N

//ociShapeGetter := ocicommon.CreateShapeGetter(computeClient)
ociShapeGetter := ocicommon.CreateShapeGetter(ocicommon.ShapeClientImpl{ComputeMgmtClient: computeMgmtClient, ComputeClient: computeClient})
ociTagsGetter := ocicommon.CreateTagsGetter()

registeredTaintsGetter := CreateRegisteredTaintsGetter()

Expand All @@ -145,6 +146,7 @@ func CreateNodePoolManager(cloudConfigPath string, discoveryOpts cloudprovider.N
computeClient: &computeClient,
staticNodePools: map[string]NodePool{},
ociShapeGetter: ociShapeGetter,
ociTagsGetter: ociTagsGetter,
registeredTaintsGetter: registeredTaintsGetter,
nodePoolCache: newNodePoolCache(&okeClient),
}
Expand Down Expand Up @@ -210,6 +212,7 @@ type ociManagerImpl struct {
okeClient okeClient
computeClient *core.ComputeClient
ociShapeGetter ocicommon.ShapeGetter
ociTagsGetter ocicommon.TagsGetter
registeredTaintsGetter RegisteredTaintsGetter
staticNodePools map[string]NodePool

Expand Down Expand Up @@ -521,7 +524,15 @@ func (m *ociManagerImpl) buildNodeFromTemplate(nodePool *oke.NodePool) (*apiv1.N
Capacity: apiv1.ResourceList{},
}

shape, err := m.ociShapeGetter.GetNodePoolShape(nodePool)
freeformTags, err := m.ociTagsGetter.GetFreeformTags(nodePool)
if err != nil {
return nil, err
}
ephemeralStorage, err := getEphemeralResourceRequestsInBytes(freeformTags)
if err != nil {
klog.Error(err)
}
shape, err := m.ociShapeGetter.GetNodePoolShape(nodePool, ephemeralStorage)
if err != nil {
return nil, err
}
Expand All @@ -542,11 +553,16 @@ func (m *ociManagerImpl) buildNodeFromTemplate(nodePool *oke.NodePool) (*apiv1.N
})
}

if err != nil {
return nil, err
}
node.Status.Capacity[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI)

node.Status.Capacity[apiv1.ResourceCPU] = *resource.NewQuantity(int64(shape.CPU), resource.DecimalSI)
node.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(int64(shape.MemoryInBytes), resource.DecimalSI)
node.Status.Capacity[ipconsts.ResourceGPU] = *resource.NewQuantity(int64(shape.GPU), resource.DecimalSI)
if ephemeralStorage != -1 {
node.Status.Capacity[apiv1.ResourceEphemeralStorage] = *resource.NewQuantity(ephemeralStorage, resource.DecimalSI)
}

node.Status.Allocatable = node.Status.Capacity

Expand Down Expand Up @@ -634,6 +650,23 @@ func addTaintToSpec(node *apiv1.Node, taintKey string, effect apiv1.TaintEffect)
return true
}

func getEphemeralResourceRequestsInBytes(tags map[string]string) (int64, error) {
for key, value := range tags {
if key == npconsts.EphemeralStorageSize {
klog.V(4).Infof("ephemeral-storage size set with value : %v", value)
value = strings.ReplaceAll(value, " ", "")
resourceSize, err := resource.ParseQuantity(value)
if err != nil {
return -1, err
}
klog.V(4).Infof("ephemeral-storage size = %v (%v)", resourceSize.Value(), resourceSize.Format)
return resourceSize.Value(), nil
}
}
klog.V(4).Infof("ephemeral-storage size not set as part of the nodepool's freeform tags")
return -1, nil
}

// IsConflict checks if the error is a conflict
func IsConflict(err error) bool {
return ReasonForError(err) == metav1.StatusReasonConflict
Expand Down

0 comments on commit 1c02dfc

Please sign in to comment.