From 2f902574f8c3ad1cbe5df7a24330886eda03d230 Mon Sep 17 00:00:00 2001 From: Xavier Duthil Date: Tue, 10 May 2022 11:01:26 +0200 Subject: [PATCH] fix(ovhcloud): Various fixes Fix silent failure of upscale/downscale due to the node group status being not READY Fix Instance.ID to the actual value of Node.Spec.ProviderID Fix infinite looping when DecreaseTargetSize is called Fix the way we determine to which node group a node belongs to Fix handling of non-autoscaled nodepools (now considered as autoscaled nodepools with no margin of action) Fix TemplateNodeInfo with correct allocatable resources from the instance flavor specs Signed-off-by: Xavier Duthil --- .../ovhcloud/ovh_cloud_manager.go | 58 ++- .../ovhcloud/ovh_cloud_manager_test.go | 157 ++++++++ .../ovhcloud/ovh_cloud_node_group.go | 156 +++----- .../ovhcloud/ovh_cloud_node_group_test.go | 370 +++++++++++------- .../ovhcloud/ovh_cloud_provider.go | 112 ++++-- .../ovhcloud/ovh_cloud_provider_test.go | 142 ++++++- .../ovhcloud/sdk/consumer_key.go | 2 +- .../cloudprovider/ovhcloud/sdk/flavor.go | 8 +- .../cloudprovider/ovhcloud/sdk/mock.go | 4 +- .../cloudprovider/ovhcloud/sdk/nodepool.go | 6 + .../cloudprovider/ovhcloud/sdk/ovh.go | 89 ++--- 11 files changed, 769 insertions(+), 335 deletions(-) create mode 100644 cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager_test.go diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager.go index a316d0f74490..3cfa2ecc63bb 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager.go @@ -23,10 +23,14 @@ import ( "fmt" "io" "io/ioutil" + "time" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud/sdk" + "k8s.io/klog/v2" ) +const flavorCacheDuration = time.Hour + // ClientInterface defines all mandatory methods to be exposed as a client (mock or API) type ClientInterface interface { // ListNodePools lists all the node pools found in a Kubernetes cluster. @@ -44,8 +48,8 @@ type ClientInterface interface { // DeleteNodePool deletes a specific pool. DeleteNodePool(ctx context.Context, projectID string, clusterID string, poolID string) (*sdk.NodePool, error) - // ListFlavors list all available flavors usable in a Kubernetes cluster. - ListFlavors(ctx context.Context, projectID string, clusterID string) ([]sdk.Flavor, error) + // ListClusterFlavors list all available flavors usable in a Kubernetes cluster. + ListClusterFlavors(ctx context.Context, projectID string, clusterID string) ([]sdk.Flavor, error) } // OvhCloudManager defines current application context manager to interact @@ -57,7 +61,11 @@ type OvhCloudManager struct { ClusterID string ProjectID string - NodePools []sdk.NodePool + NodePools []sdk.NodePool + NodeGroupPerProviderID map[string]*NodeGroup + + FlavorsCache map[string]sdk.Flavor + FlavorsCacheExpirationTime time.Time } // Config is the configuration file content of OVHcloud provider @@ -138,10 +146,52 @@ func NewManager(configFile io.Reader) (*OvhCloudManager, error) { ProjectID: cfg.ProjectID, ClusterID: cfg.ClusterID, - NodePools: make([]sdk.NodePool, 0), + NodePools: make([]sdk.NodePool, 0), + NodeGroupPerProviderID: make(map[string]*NodeGroup), + + FlavorsCache: make(map[string]sdk.Flavor), + FlavorsCacheExpirationTime: time.Time{}, }, nil } +// getFlavorsByName lists available flavors from cache or from OVHCloud APIs if the cache is outdated +func (m *OvhCloudManager) getFlavorsByName() (map[string]sdk.Flavor, error) { + // Update the flavors cache if expired + if m.FlavorsCacheExpirationTime.Before(time.Now()) { + newFlavorCacheExpirationTime := time.Now().Add(flavorCacheDuration) + klog.V(4).Infof("Listing flavors to update flavors cache (will expire at %s)", newFlavorCacheExpirationTime) + + // Fetch all flavors in API + flavors, err := m.Client.ListClusterFlavors(context.Background(), m.ProjectID, m.ClusterID) + if err != nil { + return nil, fmt.Errorf("failed to list available flavors: %w", err) + } + + // Update the flavors cache + m.FlavorsCache = make(map[string]sdk.Flavor) + for _, flavor := range flavors { + m.FlavorsCache[flavor.Name] = flavor + m.FlavorsCacheExpirationTime = newFlavorCacheExpirationTime + } + } + + return m.FlavorsCache, nil +} + +// getFlavorByName returns the given flavor from cache or API +func (m *OvhCloudManager) getFlavorByName(flavorName string) (sdk.Flavor, error) { + flavorsByName, err := m.getFlavorsByName() + if err != nil { + return sdk.Flavor{}, err + } + + if flavor, ok := flavorsByName[flavorName]; ok { + return flavor, nil + } + + return sdk.Flavor{}, fmt.Errorf("flavor %s not found in available flavors", flavorName) +} + // ReAuthenticate allows OpenStack keystone token to be revoked and re-created to call API func (m *OvhCloudManager) ReAuthenticate() error { if m.OpenStackProvider != nil { diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager_test.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager_test.go new file mode 100644 index 000000000000..a662726cd6e7 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager_test.go @@ -0,0 +1,157 @@ +package ovhcloud + +import ( + "bytes" + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud/sdk" +) + +func newTestManager(t *testing.T) *OvhCloudManager { + cfg := `{ + "project_id": "projectID", + "cluster_id": "clusterID", + "authentication_type": "consumer", + "application_endpoint": "ovh-eu", + "application_key": "key", + "application_secret": "secret", + "application_consumer_key": "consumer_key" + }` + + manager, err := NewManager(bytes.NewBufferString(cfg)) + if err != nil { + assert.FailNow(t, "failed to create manager", err) + } + + client := &sdk.ClientMock{} + ctx := context.Background() + + client.On("ListClusterFlavors", ctx, "projectID", "clusterID").Return( + []sdk.Flavor{ + { + Name: "b2-7", + Category: "b", + State: "available", + VCPUs: 2, + GPUs: 0, + RAM: 7, + }, + { + Name: "t1-45", + Category: "t", + State: "available", + VCPUs: 8, + GPUs: 1, + RAM: 45, + }, + { + Name: "unknown", + Category: "", + State: "unavailable", + VCPUs: 2, + GPUs: 0, + RAM: 7, + }, + }, nil, + ) + manager.Client = client + + return manager +} + +func TestOvhCloudManager_getFlavorsByName(t *testing.T) { + expectedFlavorsByNameFromAPICall := map[string]sdk.Flavor{ + "b2-7": { + Name: "b2-7", + Category: "b", + State: "available", + VCPUs: 2, + GPUs: 0, + RAM: 7, + }, + "t1-45": { + Name: "t1-45", + Category: "t", + State: "available", + VCPUs: 8, + GPUs: 1, + RAM: 45, + }, + "unknown": { + Name: "unknown", + Category: "", + State: "unavailable", + VCPUs: 2, + GPUs: 0, + RAM: 7, + }, + } + + t.Run("brand new manager: list from api", func(t *testing.T) { + ng := newTestManager(t) + flavorsByName, err := ng.getFlavorsByName() + + ng.Client.(*sdk.ClientMock).AssertCalled(t, "ListClusterFlavors", context.Background(), "projectID", "clusterID") + assert.NoError(t, err) + assert.Equal(t, expectedFlavorsByNameFromAPICall, flavorsByName) + assert.Equal(t, expectedFlavorsByNameFromAPICall, ng.FlavorsCache) + }) + + t.Run("flavors cache expired: renew and list from api", func(t *testing.T) { + initialFlavorsCache := map[string]sdk.Flavor{ + "custom": { + Name: "custom", + }, + } + + ng := newTestManager(t) + ng.FlavorsCache = initialFlavorsCache + ng.FlavorsCacheExpirationTime = time.Now() + + flavorsByName, err := ng.getFlavorsByName() + + ng.Client.(*sdk.ClientMock).AssertCalled(t, "ListClusterFlavors", context.Background(), "projectID", "clusterID") + assert.NoError(t, err) + assert.Equal(t, expectedFlavorsByNameFromAPICall, flavorsByName) + assert.Equal(t, expectedFlavorsByNameFromAPICall, ng.FlavorsCache) + }) + + t.Run("flavors cache still valid: list from cache", func(t *testing.T) { + initialFlavorsCache := map[string]sdk.Flavor{ + "custom": { + Name: "custom", + }, + } + + ng := newTestManager(t) + ng.FlavorsCache = initialFlavorsCache + ng.FlavorsCacheExpirationTime = time.Now().Add(time.Minute) + + flavorsByName, err := ng.getFlavorsByName() + + ng.Client.(*sdk.ClientMock).AssertNotCalled(t, "ListClusterFlavors", context.Background(), "projectID", "clusterID") + assert.NoError(t, err) + assert.Equal(t, initialFlavorsCache, flavorsByName) + assert.Equal(t, initialFlavorsCache, ng.FlavorsCache) + }) +} + +func TestOvhCloudManager_getFlavorByName(t *testing.T) { + ng := newTestManager(t) + + t.Run("check default node group max size", func(t *testing.T) { + flavor, err := ng.getFlavorByName("b2-7") + assert.NoError(t, err) + assert.Equal(t, sdk.Flavor{ + Name: "b2-7", + Category: "b", + State: "available", + VCPUs: 2, + GPUs: 0, + RAM: 7, + }, flavor) + }) +} diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group.go index b8b68da84b78..2ae6eec19ecf 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group.go @@ -19,12 +19,13 @@ package ovhcloud import ( "context" "fmt" + "math" "math/rand" - "regexp" "strings" "time" apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" @@ -32,10 +33,10 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud/sdk" "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" ) -// instanceIdRegex defines the expression used for instance's ID -var instanceIdRegex = regexp.MustCompile(`^(.*)/(.*)$`) +const providerIDPrefix = "openstack:///" // NodeGroup implements cloudprovider.NodeGroup interface. type NodeGroup struct { @@ -73,7 +74,7 @@ func (ng *NodeGroup) IncreaseSize(delta int) error { return nil } - klog.V(4).Infof("Increasing NodeGroup size with %d nodes", delta) + klog.V(4).Infof("Increasing NodeGroup size by %d node(s)", delta) // First, verify the NodeGroup can be increased if delta <= 0 { @@ -96,16 +97,14 @@ func (ng *NodeGroup) IncreaseSize(delta int) error { opts := sdk.UpdateNodePoolOpts{ DesiredNodes: &desired, } + klog.V(4).Infof("Upscaling node pool %s to %d desired nodes", ng.ID, desired) - // Eventually, call API to increase desired nodes number, automatically creating new nodes (wait for the pool to be READY before trying to update) - if ng.Status == "READY" { - resp, err := ng.Manager.Client.UpdateNodePool(context.Background(), ng.Manager.ProjectID, ng.Manager.ClusterID, ng.ID, &opts) - if err != nil { - return fmt.Errorf("failed to increase node pool desired size: %w", err) - } - - ng.Status = resp.Status + // Call API to increase desired nodes number, automatically creating new nodes + resp, err := ng.Manager.Client.UpdateNodePool(context.Background(), ng.Manager.ProjectID, ng.Manager.ClusterID, ng.ID, &opts) + if err != nil { + return fmt.Errorf("failed to increase node pool desired size: %w", err) } + ng.Status = resp.Status return nil } @@ -117,7 +116,7 @@ func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { return nil } - klog.V(4).Infof("Deleting %d nodes", len(nodes)) + klog.V(4).Infof("Deleting %d node(s)", len(nodes)) // First, verify the NodeGroup can be decreased size, err := ng.TargetSize() @@ -129,37 +128,28 @@ func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { return fmt.Errorf("node group size would be below minimum size - desired: %d, max: %d", size-len(nodes), ng.MinSize()) } - // Then, fetch node group instances and check that all nodes to remove are linked correctly, - // otherwise it will return an error - instances, err := ng.Nodes() - if err != nil { - return fmt.Errorf("failed to list current node group instances: %w", err) - } - - nodeIds, err := extractNodeIds(nodes, instances, ng.Id()) - if err != nil { - return fmt.Errorf("failed to extract node ids to remove: %w", err) + nodeProviderIds := make([]string, 0) + for _, node := range nodes { + nodeProviderIds = append(nodeProviderIds, node.Spec.ProviderID) } - // Then, forge current size and parameters - ng.CurrentSize = size - len(nodes) - - desired := uint32(ng.CurrentSize) + desired := uint32(size - len(nodes)) opts := sdk.UpdateNodePoolOpts{ DesiredNodes: &desired, - NodesToRemove: nodeIds, + NodesToRemove: nodeProviderIds, } + klog.V(4).Infof("Downscaling node pool %s to %d desired nodes by deleting the following nodes: %s", ng.ID, desired, nodeProviderIds) - // Eventually, call API to remove nodes from a NodeGroup (wait for the pool to be READY before trying to update) - if ng.Status == "READY" { - resp, err := ng.Manager.Client.UpdateNodePool(context.Background(), ng.Manager.ProjectID, ng.Manager.ClusterID, ng.ID, &opts) - if err != nil { - return fmt.Errorf("failed to delete node pool nodes: %w", err) - } - - ng.Status = resp.Status + // Call API to remove nodes from a NodeGroup + resp, err := ng.Manager.Client.UpdateNodePool(context.Background(), ng.Manager.ProjectID, ng.Manager.ClusterID, ng.ID, &opts) + if err != nil { + return fmt.Errorf("failed to delete node pool nodes: %w", err) } + // Update the node group + ng.Status = resp.Status + ng.CurrentSize = size - len(nodes) + return nil } @@ -169,30 +159,8 @@ func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { // It is assumed that cloud provider will not delete the existing nodes if the size // when there is an option to just decrease the target. func (ng *NodeGroup) DecreaseTargetSize(delta int) error { - // Do not use node group which does not support autoscaling - if !ng.Autoscale { - return nil - } - - klog.V(4).Infof("Decreasing NodeGroup size with %d nodes", delta) - - // First, verify the NodeGroup can be decreased - if delta >= 0 { - return fmt.Errorf("decrease size node group delta must be negative") - } - - size, err := ng.TargetSize() - if err != nil { - return fmt.Errorf("failed to get NodeGroup target size") - } - - if size+delta < ng.MinSize() { - return fmt.Errorf("node group size would be below minimum size - desired: %d, max: %d", size+delta, ng.MinSize()) - } - - ng.CurrentSize = size + delta - - return nil + // Cancellation of node provisioning is not supported yet + return cloudprovider.ErrNotImplemented } // Id returns node pool id. @@ -214,15 +182,20 @@ func (ng *NodeGroup) Nodes() ([]cloudprovider.Instance, error) { return nil, fmt.Errorf("failed to list node pool nodes: %w", err) } + klog.V(4).Infof("%d nodes are listed in node pool %s", len(nodes), ng.ID) + // Cast all API nodes into instance interface instances := make([]cloudprovider.Instance, 0) for _, node := range nodes { instance := cloudprovider.Instance{ - Id: fmt.Sprintf("%s/%s", node.ID, node.Name), + Id: fmt.Sprintf("%s%s", providerIDPrefix, node.InstanceID), Status: toInstanceStatus(node.Status), } instances = append(instances, instance) + + // Store the associated node group in cache for future reference + ng.Manager.NodeGroupPerProviderID[instance.Id] = ng } return instances, nil @@ -245,6 +218,18 @@ func (ng *NodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { }, } + flavor, err := ng.Manager.getFlavorByName(ng.Flavor) + if err != nil { + return nil, fmt.Errorf("failed to get specs for flavor %q: %w", ng.Flavor, err) + } + + node.Status.Capacity[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI) + node.Status.Capacity[apiv1.ResourceCPU] = *resource.NewQuantity(int64(flavor.VCPUs), resource.DecimalSI) + node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(int64(flavor.GPUs), resource.DecimalSI) + node.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(int64(flavor.RAM)*int64(math.Pow(1024, 3)), resource.DecimalSI) + + node.Status.Allocatable = node.Status.Capacity + // Setup node info template nodeInfo := schedulerframework.NewNodeInfo(cloudprovider.BuildKubeProxy(ng.Id())) nodeInfo.SetNode(node) @@ -260,7 +245,7 @@ func (ng *NodeGroup) Exist() bool { // Create creates the node group on the cloud provider side. func (ng *NodeGroup) Create() (cloudprovider.NodeGroup, error) { - klog.V(4).Infof("Creating a new NodeGroup") + klog.V(4).Info("Creating a new NodeGroup") // Forge create node pool parameters (defaulting b2-7 for now) name := ng.Id() @@ -339,50 +324,17 @@ func (ng *NodeGroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*c // isGpu checks if a node group is using GPU machines func (ng *NodeGroup) isGpu() bool { - return strings.HasPrefix(ng.Flavor, GPUMachineCategory) -} - -// extractNodeIds find in an array of node resource their cloud instances IDs -func extractNodeIds(nodes []*apiv1.Node, instances []cloudprovider.Instance, groupLabel string) ([]string, error) { - nodeIds := make([]string, 0) - - // Loop through each nodes to find any un-wanted nodes to remove - for _, node := range nodes { - // First, check if node resource has correct node pool label - label, ok := node.Labels[NodePoolLabel] - if !ok || label != groupLabel { - return nil, fmt.Errorf("node %s without label %s, current: %s", node.Name, groupLabel, label) - } - - // Then, loop through each group instances to find if the node is in the list - found := false - for _, instance := range instances { - match := instanceIdRegex.FindStringSubmatch(instance.Id) - if len(match) < 2 { - continue - } - - id := match[1] - name := match[2] - - if node.Name != name { - continue - } - - found = true - nodeIds = append(nodeIds, id) - break - } - - if !found { - return nil, fmt.Errorf("node %s not found in group instances", node.Name) - } + flavor, err := ng.Manager.getFlavorByName(ng.Flavor) + if err != nil { + // Fallback when we are unable to get the flavor: refer to the only category + // known to be a GPU flavor category + return strings.HasPrefix(ng.Flavor, GPUMachineCategory) } - return nodeIds, nil + return flavor.GPUs > 0 } -// toInstanceStatus cast a node status into an instance status +// toInstanceStatus casts a node status into an instance status func toInstanceStatus(status string) *cloudprovider.InstanceStatus { state := &cloudprovider.InstanceStatus{} diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group_test.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group_test.go index 56defe35509b..7efb85356d04 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group_test.go @@ -24,16 +24,17 @@ import ( "time" "github.com/stretchr/testify/assert" - + apiv1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud/sdk" "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" ) -func newTestNodeGroup(t *testing.T, isGpu bool) cloudprovider.NodeGroup { +func newTestNodeGroup(t *testing.T, flavor string) *NodeGroup { cfg := `{ "project_id": "projectID", "cluster_id": "clusterID", @@ -52,60 +53,36 @@ func newTestNodeGroup(t *testing.T, isGpu bool) cloudprovider.NodeGroup { client := &sdk.ClientMock{} ctx := context.Background() - client.On("ListNodePoolNodes", ctx, "projectID", "clusterID", "id").Return( - []sdk.Node{ + client.On("ListClusterFlavors", ctx, "projectID", "clusterID").Return( + []sdk.Flavor{ { - ID: "id-1", - Name: "node-1", - Status: "READY", + Name: "b2-7", + Category: "b", + State: "available", + VCPUs: 2, + GPUs: 0, + RAM: 7, }, { - ID: "id-2", - Name: "node-2", - Status: "INSTALLING", + Name: "t1-45", + Category: "t", + State: "available", + VCPUs: 8, + GPUs: 1, + RAM: 45, }, { - ID: "id-3", - Name: "node-3", - Status: "ERROR", + Name: "unknown", + Category: "", + State: "unavailable", + VCPUs: 2, + GPUs: 0, + RAM: 7, }, }, nil, ) - - name := "pool-b2-7" - size := uint32(3) - min := uint32(1) - max := uint32(5) - - opts := sdk.CreateNodePoolOpts{ - FlavorName: "b2-7", - Name: &name, - DesiredNodes: &size, - MinNodes: &min, - MaxNodes: &max, - Autoscale: true, - } - - client.On("CreateNodePool", ctx, "projectID", "clusterID", &opts).Return( - &sdk.NodePool{ - ID: "id", - Name: "pool-b2-7", - Flavor: "b2-7", - Autoscale: true, - DesiredNodes: 3, - MinNodes: 1, - MaxNodes: 5, - }, nil, - ) - - client.On("DeleteNodePool", ctx, "projectID", "clusterID", "id").Return(&sdk.NodePool{}, nil) - - flavor := "b2-7" - if isGpu { - flavor = "t1-45" - } - manager.Client = client + ng := &NodeGroup{ Manager: manager, NodePool: sdk.NodePool{ @@ -129,99 +106,196 @@ func newTestNodeGroup(t *testing.T, isGpu bool) cloudprovider.NodeGroup { return ng } +func (ng *NodeGroup) mockCallUpdateNodePool(newDesiredNodes uint32, nodesToRemove []string) { + ng.Manager.Client.(*sdk.ClientMock).On( + "UpdateNodePool", + context.Background(), + ng.Manager.ProjectID, + ng.Manager.ClusterID, + ng.ID, + &sdk.UpdateNodePoolOpts{ + DesiredNodes: &newDesiredNodes, + NodesToRemove: nodesToRemove, + }, + ).Return( + &sdk.NodePool{ + ID: ng.ID, + Name: ng.Name, + Flavor: ng.Flavor, + Autoscale: ng.Autoscale, + DesiredNodes: newDesiredNodes, + MinNodes: ng.MinNodes, + MaxNodes: ng.MaxNodes, + }, + nil, + ) +} + +func (ng *NodeGroup) mockCallListNodePoolNodes() { + ng.Manager.Client.(*sdk.ClientMock).On( + "ListNodePoolNodes", + context.Background(), + ng.Manager.ProjectID, + ng.Manager.ClusterID, + ng.ID, + ).Return( + []sdk.Node{ + { + ID: "id-1", + Name: "node-1", + Status: "READY", + InstanceID: "instance-1", + }, + { + ID: "id-2", + Name: "node-2", + Status: "INSTALLING", + InstanceID: "", + }, + { + ID: "id-3", + Name: "node-3", + Status: "ERROR", + InstanceID: "", + }, + }, nil, + ) +} + +func (ng *NodeGroup) mockCallCreateNodePool() { + ng.Manager.Client.(*sdk.ClientMock).On( + "CreateNodePool", + context.Background(), + ng.Manager.ProjectID, + ng.Manager.ClusterID, + &sdk.CreateNodePoolOpts{ + FlavorName: "b2-7", + Name: &ng.Name, + DesiredNodes: &ng.DesiredNodes, + MinNodes: &ng.MinNodes, + MaxNodes: &ng.MaxNodes, + Autoscale: ng.Autoscale, + }, + ).Return( + &sdk.NodePool{ + ID: ng.ID, + Name: ng.Name, + Flavor: ng.Flavor, + Autoscale: ng.Autoscale, + DesiredNodes: ng.DesiredNodes, + MinNodes: ng.MinNodes, + MaxNodes: ng.MaxNodes, + }, nil, + ) +} + +func (ng *NodeGroup) mockCallDeleteNodePool() { + ng.Manager.Client.(*sdk.ClientMock).On("DeleteNodePool", context.Background(), "projectID", "clusterID", "id").Return(&sdk.NodePool{}, nil) +} + func TestOVHCloudNodeGroup_MaxSize(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check default node group max size", func(t *testing.T) { - max := group.MaxSize() + max := ng.MaxSize() assert.Equal(t, 5, max) }) } func TestOVHCloudNodeGroup_MinSize(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check default node group min size", func(t *testing.T) { - min := group.MinSize() + min := ng.MinSize() assert.Equal(t, 1, min) }) } func TestOVHCloudNodeGroup_TargetSize(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check default node group target size", func(t *testing.T) { - size, err := group.TargetSize() + targetSize, err := ng.TargetSize() assert.NoError(t, err) - assert.Equal(t, 3, size) + assert.Equal(t, 3, targetSize) }) } func TestOVHCloudNodeGroup_IncreaseSize(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check increase size below max size", func(t *testing.T) { - err := group.IncreaseSize(1) + ng.mockCallUpdateNodePool(4, nil) + + err := ng.IncreaseSize(1) assert.NoError(t, err) - size, err := group.TargetSize() + targetSize, err := ng.TargetSize() assert.NoError(t, err) - assert.Equal(t, 4, size) + assert.Equal(t, 4, targetSize) }) t.Run("check increase size above max size", func(t *testing.T) { - err := group.IncreaseSize(5) + err := ng.IncreaseSize(5) assert.Error(t, err) }) t.Run("check increase size with negative delta", func(t *testing.T) { - err := group.IncreaseSize(-1) + err := ng.IncreaseSize(-1) assert.Error(t, err) }) } func TestOVHCloudNodeGroup_DeleteNodes(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check delete nodes above min size", func(t *testing.T) { - err := group.DeleteNodes([]*v1.Node{ + ng.mockCallUpdateNodePool(2, []string{"openstack:///instance-1"}) + + err := ng.DeleteNodes([]*v1.Node{ { ObjectMeta: metav1.ObjectMeta{ Name: "node-1", Labels: map[string]string{ - "nodepool": group.Id(), + "nodepool": ng.Id(), }, }, + Spec: v1.NodeSpec{ + ProviderID: "openstack:///instance-1", + }, }, }) assert.NoError(t, err) - size, err := group.TargetSize() + targetSize, err := ng.TargetSize() assert.NoError(t, err) - assert.Equal(t, 2, size) + assert.Equal(t, 2, targetSize) }) t.Run("check delete nodes empty nodes array", func(t *testing.T) { - err := group.DeleteNodes(nil) + ng.mockCallUpdateNodePool(2, []string{}) - size, err := group.TargetSize() + err := ng.DeleteNodes(nil) assert.NoError(t, err) - assert.Equal(t, 2, size) + targetSize, err := ng.TargetSize() + assert.NoError(t, err) + + assert.Equal(t, 2, targetSize) }) t.Run("check delete nodes below min size", func(t *testing.T) { - err := group.DeleteNodes([]*v1.Node{ + err := ng.DeleteNodes([]*v1.Node{ { ObjectMeta: metav1.ObjectMeta{ Name: "node-1", Labels: map[string]string{ - "nodepool": group.Id(), + "nodepool": ng.Id(), }, }, }, @@ -229,7 +303,7 @@ func TestOVHCloudNodeGroup_DeleteNodes(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "node-2", Labels: map[string]string{ - "nodepool": group.Id(), + "nodepool": ng.Id(), }, }, }, @@ -237,161 +311,181 @@ func TestOVHCloudNodeGroup_DeleteNodes(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "node-3", Labels: map[string]string{ - "nodepool": group.Id(), + "nodepool": ng.Id(), }, }, }, }) assert.Error(t, err) }) - - t.Run("check delete nodes with wrong label name", func(t *testing.T) { - err := group.DeleteNodes([]*v1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - Labels: map[string]string{ - "nodepool": "unknown", - }, - }, - }, - }) - - assert.Error(t, err) - }) } func TestOVHCloudNodeGroup_DecreaseTargetSize(t *testing.T) { - group := newTestNodeGroup(t, false) - - t.Run("check decrease size above min size", func(t *testing.T) { - err := group.DecreaseTargetSize(-1) - assert.NoError(t, err) - - size, err := group.TargetSize() - assert.NoError(t, err) - - assert.Equal(t, 2, size) - }) - - t.Run("check decrease size below min size", func(t *testing.T) { - err := group.DecreaseTargetSize(-5) - assert.Error(t, err) - }) - - t.Run("check decrease size with positive delta", func(t *testing.T) { - err := group.DecreaseTargetSize(1) - assert.Error(t, err) - }) + ng := newTestNodeGroup(t, "b2-7") + err := ng.DecreaseTargetSize(-1) + assert.Error(t, err) } func TestOVHCloudNodeGroup_Id(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check node group id fetch", func(t *testing.T) { - name := group.Id() + name := ng.Id() assert.Equal(t, "pool-b2-7", name) }) } func TestOVHCloudNodeGroup_Debug(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check node group debug print", func(t *testing.T) { - debug := group.Debug() + debug := ng.Debug() assert.Equal(t, "pool-b2-7 (3:1:5)", debug) }) } func TestOVHCloudNodeGroup_Nodes(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check nodes list in node group", func(t *testing.T) { - nodes, err := group.Nodes() + ng.mockCallListNodePoolNodes() + + nodes, err := ng.Nodes() assert.NoError(t, err) assert.Equal(t, 3, len(nodes)) - assert.Equal(t, "id-1/node-1", nodes[0].Id) + assert.Equal(t, "openstack:///instance-1", nodes[0].Id) assert.Equal(t, cloudprovider.InstanceRunning, nodes[0].Status.State) - assert.Equal(t, "id-2/node-2", nodes[1].Id) + assert.Equal(t, "openstack:///", nodes[1].Id) assert.Equal(t, cloudprovider.InstanceCreating, nodes[1].Status.State) - assert.Equal(t, "id-3/node-3", nodes[2].Id) + assert.Equal(t, "openstack:///", nodes[2].Id) assert.Equal(t, cloudprovider.OtherErrorClass, nodes[2].Status.ErrorInfo.ErrorClass) assert.Equal(t, "ERROR", nodes[2].Status.ErrorInfo.ErrorCode) }) } func TestOVHCloudNodeGroup_TemplateNodeInfo(t *testing.T) { - group := newTestNodeGroup(t, false) + t.Run("template for b2-7 flavor", func(t *testing.T) { + ng := newTestNodeGroup(t, "b2-7") + template, err := ng.TemplateNodeInfo() + assert.NoError(t, err) + + node := template.Node() + assert.NotNil(t, node) + + assert.Contains(t, node.ObjectMeta.Name, fmt.Sprintf("%s-node-", ng.Id())) + assert.Equal(t, ng.Id(), node.Labels["nodepool"]) - t.Run("check node template filled with group id", func(t *testing.T) { - template, err := group.TemplateNodeInfo() + assert.Equal(t, *resource.NewQuantity(110, resource.DecimalSI), node.Status.Capacity[apiv1.ResourcePods]) + assert.Equal(t, *resource.NewQuantity(2, resource.DecimalSI), node.Status.Capacity[apiv1.ResourceCPU]) + assert.Equal(t, *resource.NewQuantity(0, resource.DecimalSI), node.Status.Capacity[gpu.ResourceNvidiaGPU]) + assert.Equal(t, *resource.NewQuantity(7516192768, resource.DecimalSI), node.Status.Capacity[apiv1.ResourceMemory]) + }) + + t.Run("template for t1-45 flavor", func(t *testing.T) { + ng := newTestNodeGroup(t, "t1-45") + template, err := ng.TemplateNodeInfo() assert.NoError(t, err) node := template.Node() assert.NotNil(t, node) - assert.Contains(t, node.ObjectMeta.Name, fmt.Sprintf("%s-node-", group.Id())) - assert.Equal(t, group.Id(), node.Labels["nodepool"]) + assert.Contains(t, node.ObjectMeta.Name, fmt.Sprintf("%s-node-", ng.Id())) + assert.Equal(t, ng.Id(), node.Labels["nodepool"]) + + assert.Equal(t, *resource.NewQuantity(110, resource.DecimalSI), node.Status.Capacity[apiv1.ResourcePods]) + assert.Equal(t, *resource.NewQuantity(8, resource.DecimalSI), node.Status.Capacity[apiv1.ResourceCPU]) + assert.Equal(t, *resource.NewQuantity(1, resource.DecimalSI), node.Status.Capacity[gpu.ResourceNvidiaGPU]) + assert.Equal(t, *resource.NewQuantity(48318382080, resource.DecimalSI), node.Status.Capacity[apiv1.ResourceMemory]) }) } func TestOVHCloudNodeGroup_Exist(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check exist is true", func(t *testing.T) { - exist := group.Exist() + exist := ng.Exist() assert.True(t, exist) }) } func TestOVHCloudNodeGroup_Create(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check create node group", func(t *testing.T) { - newGroup, err := group.Create() + ng.mockCallCreateNodePool() + + newGroup, err := ng.Create() assert.NoError(t, err) - size, err := newGroup.TargetSize() + targetSize, err := newGroup.TargetSize() assert.NoError(t, err) assert.Equal(t, "pool-b2-7", newGroup.Id()) - assert.Equal(t, 3, size) + assert.Equal(t, 3, targetSize) assert.Equal(t, 1, newGroup.MinSize()) assert.Equal(t, 5, newGroup.MaxSize()) }) } func TestOVHCloudNodeGroup_Delete(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check delete node group", func(t *testing.T) { - err := group.Delete() + ng.mockCallDeleteNodePool() + err := ng.Delete() assert.NoError(t, err) }) } func TestOVHCloudNodeGroup_Autoprovisioned(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check auto-provisioned is false", func(t *testing.T) { - provisioned := group.Autoprovisioned() - + provisioned := ng.Autoprovisioned() assert.False(t, provisioned) }) } +func TestOVHCloudNodeGroup_IsGpu(t *testing.T) { + t.Run("has GPU cores", func(t *testing.T) { + ng := newTestNodeGroup(t, "custom-t1-45") + ng.Manager.FlavorsCache = map[string]sdk.Flavor{ + "custom-t1-45": { + GPUs: 1, + }, + } + ng.Manager.FlavorsCacheExpirationTime = time.Now().Add(time.Minute) + assert.True(t, ng.isGpu()) + }) + + t.Run("doesn't have GPU cores", func(t *testing.T) { + ng := newTestNodeGroup(t, "custom-b2-7") + ng.Manager.FlavorsCache = map[string]sdk.Flavor{ + "custom-b2-7": { + GPUs: 0, + }, + } + assert.False(t, ng.isGpu()) + }) + + t.Run("not found but belong to GPU category", func(t *testing.T) { + ng := newTestNodeGroup(t, GPUMachineCategory+"1-123") + assert.True(t, ng.isGpu()) + }) +} + func TestOVHCloudNodeGroup_GetOptions(t *testing.T) { t.Run("check get autoscaling options", func(t *testing.T) { - group := newTestNodeGroup(t, false) - opts, err := group.GetOptions(config.NodeGroupAutoscalingOptions{}) + ng := newTestNodeGroup(t, "b2-7") + opts, err := ng.GetOptions(config.NodeGroupAutoscalingOptions{}) assert.NoError(t, err) assert.Equal(t, fmt.Sprintf("%.1f", 3.2), fmt.Sprintf("%.1f", opts.ScaleDownUtilizationThreshold)) @@ -401,8 +495,8 @@ func TestOVHCloudNodeGroup_GetOptions(t *testing.T) { }) t.Run("check get autoscaling options on gpu machine", func(t *testing.T) { - group := newTestNodeGroup(t, true) - opts, err := group.GetOptions(config.NodeGroupAutoscalingOptions{}) + ng := newTestNodeGroup(t, "t1-45") + opts, err := ng.GetOptions(config.NodeGroupAutoscalingOptions{}) assert.NoError(t, err) assert.Equal(t, float64(0), opts.ScaleDownUtilizationThreshold) diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider.go index a46a731f278a..075c6cb78a15 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider.go @@ -99,10 +99,11 @@ func (provider *OVHCloudProvider) NodeGroups() []cloudprovider.NodeGroup { groups := make([]cloudprovider.NodeGroup, 0) // Cast API node pools into CA node groups - // Do not add in array node pool which does not support autoscaling for _, pool := range provider.manager.NodePools { + // Node pools without autoscaling are equivalent to node pools with autoscaling but no scale possible if !pool.Autoscale { - continue + pool.MaxNodes = pool.DesiredNodes + pool.MinNodes = pool.DesiredNodes } ng := NodeGroup{ @@ -121,26 +122,75 @@ func (provider *OVHCloudProvider) NodeGroups() []cloudprovider.NodeGroup { // should not be processed by cluster autoscaler, or non-nil error if such // occurred. Must be implemented. func (provider *OVHCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) { - // Fetch node pool label on nodes + // If the provider ID is empty (only the prefix), it means that we are processing an UnregisteredNode retrieved + // from OVHCloud APIs, which has just started being created, and the OpenStack instance ID is not yet set. + // We won't be able to determine the node group of the node with the information at hand. + if node.Spec.ProviderID == providerIDPrefix { + return nil, nil + } + + // Try to retrieve the associated node group from an already built mapping in cache + if ng := provider.findNodeGroupFromCache(node.Spec.ProviderID); ng != nil { + return ng, nil + } + + // Try to find the associated node group from the nodepool label on the node + if ng := provider.findNodeGroupFromLabel(node); ng != nil { + return ng, nil + } + + klog.V(4).Infof("trying to find node group of node %s (provider ID %s) by listing all nodes under autoscaled node pools", node.Name, node.Spec.ProviderID) + + // Call the OVHCloud APIs to list all nodes under autoscaled node pools and find the associated node group. + // This should also refresh the cache for the next time + ng, err := provider.findNodeGroupByListingNodes(node) + if ng == nil { + klog.Warningf("unable to find which node group the node %s (provider ID %s) belongs to", node.Name, node.Spec.ProviderID) + } + + return ng, err +} + +// findNodeGroupFromCache tries to retrieve the associated node group from an already built mapping in cache +func (provider *OVHCloudProvider) findNodeGroupFromCache(providerID string) cloudprovider.NodeGroup { + if ng, ok := provider.manager.NodeGroupPerProviderID[providerID]; ok { + return ng + } + return nil +} + +// findNodeGroupFromLabel tries to find the associated node group from the nodepool label on the node +func (provider *OVHCloudProvider) findNodeGroupFromLabel(node *apiv1.Node) cloudprovider.NodeGroup { + // Retrieve the label specifying the pool the node belongs to labels := node.GetLabels() label, exists := labels[NodePoolLabel] if !exists { - return nil, nil + return nil } - // Find in node groups stored in cache which one is linked to the node - for _, pool := range provider.manager.NodePools { - // Do not check node group which does not support auto-scaling - if !pool.Autoscale { - continue + // Find in the node groups stored in cache the one with the same name + for _, ng := range provider.NodeGroups() { + if ng.Id() == label { + return ng + } + } + + return nil +} + +// findNodeGroupByListingNodes finds the associated node group from by listing all nodes under autoscaled node pools +func (provider *OVHCloudProvider) findNodeGroupByListingNodes(node *apiv1.Node) (cloudprovider.NodeGroup, error) { + for _, ng := range provider.NodeGroups() { + // This calls OVHCloud APIs and refreshes the cache + instances, err := ng.Nodes() + if err != nil { + return nil, fmt.Errorf("failed to list nodes in node group %s: %w", ng.Id(), err) } - if pool.Name == label { - return &NodeGroup{ - NodePool: pool, - Manager: provider.manager, - CurrentSize: -1, - }, nil + for _, instance := range instances { + if instance.Id == node.Spec.ProviderID { + return ng, nil + } } } @@ -157,15 +207,16 @@ func (provider *OVHCloudProvider) Pricing() (cloudprovider.PricingModel, errors. // GetAvailableMachineTypes get all machine types that can be requested from // the cloud provider. Implementation optional. func (provider *OVHCloudProvider) GetAvailableMachineTypes() ([]string, error) { - // Fetch all flavors in API - flavors, err := provider.manager.Client.ListFlavors(context.Background(), provider.manager.ProjectID, provider.manager.ClusterID) + klog.V(4).Info("Getting available machine types") + + flavorsByName, err := provider.manager.getFlavorsByName() if err != nil { - return nil, fmt.Errorf("failed to list available flavors: %w", err) + return nil, fmt.Errorf("failed to get flavors: %w", err) } // Cast flavors into machine types string array machineTypes := make([]string, 0) - for _, flavor := range flavors { + for _, flavor := range flavorsByName { if flavor.State == MachineAvailableState { machineTypes = append(machineTypes, flavor.Name) } @@ -206,16 +257,18 @@ func (provider *OVHCloudProvider) GPULabel() string { // GetAvailableGPUTypes return all available GPU types cloud provider supports. func (provider *OVHCloudProvider) GetAvailableGPUTypes() map[string]struct{} { - // Fetch all flavors in API - flavors, err := provider.manager.Client.ListFlavors(context.Background(), provider.manager.ProjectID, provider.manager.ClusterID) + klog.V(4).Info("Getting available GPU types") + + flavorsByName, err := provider.manager.getFlavorsByName() if err != nil { + klog.Errorf("Failed to get flavors: %w", err) return nil } // Cast flavors into gpu types string array gpuTypes := make(map[string]struct{}, 0) - for _, flavor := range flavors { - if flavor.State == MachineAvailableState && flavor.Category == GPUMachineCategory { + for _, flavor := range flavorsByName { + if flavor.State == MachineAvailableState && flavor.GPUs > 0 { gpuTypes[flavor.Name] = struct{}{} } } @@ -233,7 +286,7 @@ func (provider *OVHCloudProvider) Cleanup() error { // update cloud provider state. In particular the list of node groups returned // by NodeGroups() can change as a result of CloudProvider.Refresh(). func (provider *OVHCloudProvider) Refresh() error { - klog.V(4).Info("Refreshing NodeGroups") + klog.V(4).Info("Listing node pools to refresh NodeGroups") // Check if OpenStack keystone token need to be revoke and re-create err := provider.manager.ReAuthenticate() @@ -247,15 +300,8 @@ func (provider *OVHCloudProvider) Refresh() error { return fmt.Errorf("failed to refresh node pool list: %w", err) } - // Do not append node pool which does not support autoscaling - provider.manager.NodePools = []sdk.NodePool{} - for _, pool := range pools { - if !pool.Autoscale { - continue - } - - provider.manager.NodePools = append(provider.manager.NodePools, pool) - } + // Update the node pools cache + provider.manager.NodePools = pools return nil } diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider_test.go index 7fdb68f76898..2bded3033907 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider_test.go @@ -21,10 +21,9 @@ import ( "context" "testing" + "github.com/stretchr/testify/assert" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - "github.com/stretchr/testify/assert" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud/sdk" ) @@ -45,7 +44,6 @@ func newTestProvider(t *testing.T) *OVHCloudProvider { assert.FailNow(t, "failed to create manager", err) } - // fill the test provider with some example client := &sdk.ClientMock{} ctx := context.Background() @@ -72,22 +70,31 @@ func newTestProvider(t *testing.T) *OVHCloudProvider { }, nil, ) - client.On("ListFlavors", ctx, "projectID", "clusterID").Return( + client.On("ListClusterFlavors", ctx, "projectID", "clusterID").Return( []sdk.Flavor{ { Name: "b2-7", Category: "b", State: "available", + VCPUs: 2, + GPUs: 0, + RAM: 7, }, { Name: "t1-45", Category: "t", State: "available", + VCPUs: 8, + GPUs: 1, + RAM: 45, }, { Name: "unknown", Category: "", State: "unavailable", + VCPUs: 2, + GPUs: 0, + RAM: 7, }, }, nil, ) @@ -130,7 +137,7 @@ func TestOVHCloudProvider_NodeGroups(t *testing.T) { t.Run("check default node groups length", func(t *testing.T) { groups := provider.NodeGroups() - assert.Equal(t, 1, len(groups)) + assert.Equal(t, 2, len(groups)) }) t.Run("check empty node groups length after reset", func(t *testing.T) { @@ -144,7 +151,45 @@ func TestOVHCloudProvider_NodeGroups(t *testing.T) { func TestOVHCloudProvider_NodeGroupForNode(t *testing.T) { provider := newTestProvider(t) - t.Run("check node group with correct label on node", func(t *testing.T) { + ListNodePoolNodesCall1 := provider.manager.Client.(*sdk.ClientMock).On( + "ListNodePoolNodes", + context.Background(), + provider.manager.ProjectID, + provider.manager.ClusterID, + "1", + ) + ListNodePoolNodesCall2 := provider.manager.Client.(*sdk.ClientMock).On( + "ListNodePoolNodes", + context.Background(), + provider.manager.ProjectID, + provider.manager.ClusterID, + "2", + ) + + t.Run("find node group in node group associations cache", func(t *testing.T) { + node := &apiv1.Node{ + Spec: apiv1.NodeSpec{ + ProviderID: providerIDPrefix + "0123", + }, + } + + // Set up the node group association in cache + ng := newTestNodeGroup(t, "b2-7") + provider.manager.NodeGroupPerProviderID[node.Spec.ProviderID] = ng + defer func() { + provider.manager.NodeGroupPerProviderID = make(map[string]*NodeGroup) + }() + + group, err := provider.NodeGroupForNode(node) + assert.NoError(t, err) + assert.NotNil(t, group) + + assert.Equal(t, ng.Name, group.Id()) + assert.Equal(t, ng.MinNodes, uint32(group.MinSize())) + assert.Equal(t, ng.MaxNodes, uint32(group.MaxSize())) + }) + + t.Run("find node group with label on node", func(t *testing.T) { node := &apiv1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "node-1", @@ -152,8 +197,53 @@ func TestOVHCloudProvider_NodeGroupForNode(t *testing.T) { "nodepool": "pool-1", }, }, + Spec: apiv1.NodeSpec{ + ProviderID: providerIDPrefix + "0123", + }, + } + + group, err := provider.NodeGroupForNode(node) + assert.NoError(t, err) + assert.NotNil(t, group) + + assert.Equal(t, "pool-1", group.Id()) + assert.Equal(t, 1, group.MinSize()) + assert.Equal(t, 5, group.MaxSize()) + }) + + t.Run("find node group by listing nodes", func(t *testing.T) { + node := &apiv1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, + Spec: apiv1.NodeSpec{ + ProviderID: providerIDPrefix + "0123", + }, } + // Mock the list nodes api call + ListNodePoolNodesCall1.Return( + []sdk.Node{ + { + Name: "node-0", + InstanceID: "0", + }, + { + Name: "node-1", + InstanceID: "0123", // This corresponds to the node providerID we need + }, + { + Name: "node-2", + InstanceID: "2", + }, + }, nil, + ) + + // Purge the node group associations cache afterwards for the following tests + defer func() { + provider.manager.NodeGroupPerProviderID = make(map[string]*NodeGroup) + }() + group, err := provider.NodeGroupForNode(node) assert.NoError(t, err) assert.NotNil(t, group) @@ -163,7 +253,19 @@ func TestOVHCloudProvider_NodeGroupForNode(t *testing.T) { assert.Equal(t, 5, group.MaxSize()) }) - t.Run("check node group empty if label not exists", func(t *testing.T) { + t.Run("fail to find node group with empty providerID", func(t *testing.T) { + node := &apiv1.Node{ + Spec: apiv1.NodeSpec{ + ProviderID: providerIDPrefix, + }, + } + + group, err := provider.NodeGroupForNode(node) + assert.NoError(t, err) + assert.Nil(t, group) + }) + + t.Run("fail to find node group with incorrect label", func(t *testing.T) { node := &apiv1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "node-1", @@ -171,21 +273,43 @@ func TestOVHCloudProvider_NodeGroupForNode(t *testing.T) { "nodepool": "pool-unknown", }, }, + Spec: apiv1.NodeSpec{ + ProviderID: providerIDPrefix + "0123", + }, } + // Mock the list nodes api call + ListNodePoolNodesCall1.Return( + []sdk.Node{}, nil, + ) + ListNodePoolNodesCall2.Return( + []sdk.Node{}, nil, + ) + group, err := provider.NodeGroupForNode(node) assert.NoError(t, err) assert.Nil(t, group) }) - t.Run("check node group empty if label not present", func(t *testing.T) { + t.Run("fail to find node group with no cache, no label and no result in API call", func(t *testing.T) { node := &apiv1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "node-1", Labels: map[string]string{}, }, + Spec: apiv1.NodeSpec{ + ProviderID: providerIDPrefix + "0123", + }, } + // Mock the list nodes api call + ListNodePoolNodesCall1.Return( + []sdk.Node{}, nil, + ).Once() + ListNodePoolNodesCall2.Return( + []sdk.Node{}, nil, + ).Once() + group, err := provider.NodeGroupForNode(node) assert.NoError(t, err) assert.Nil(t, group) @@ -288,6 +412,6 @@ func TestOVHCloudProvider_Refresh(t *testing.T) { assert.NoError(t, err) groups = provider.NodeGroups() - assert.Equal(t, 1, len(groups)) + assert.Equal(t, 2, len(groups)) }) } diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/consumer_key.go b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/consumer_key.go index 9f2b47e7b1d3..e013047bc3c8 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/consumer_key.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/consumer_key.go @@ -119,7 +119,7 @@ func (ck *CkRequest) AddRecursiveRules(methods []string, path string) { // and return the URL the user needs to visit to validate the key func (ck *CkRequest) Do() (*CkValidationState, error) { state := CkValidationState{} - err := ck.client.PostUnAuth("/auth/credential", ck, &state) + err := ck.client.PostUnAuth("/auth/credential", ck, &state, nil) if err == nil { ck.client.ConsumerKey = state.ConsumerKey diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/flavor.go b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/flavor.go index 11b9a42c149d..a797a5aae3ab 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/flavor.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/flavor.go @@ -26,10 +26,13 @@ type Flavor struct { Name string `json:"name"` Category string `json:"category"` State string `json:"state"` + VCPUs int `json:"vCPUs"` + GPUs int `json:"gpus"` + RAM int `json:"ram"` } -// ListFlavors allows to display flavors available for nodes templates -func (c *Client) ListFlavors(ctx context.Context, projectID string, clusterID string) ([]Flavor, error) { +// ListClusterFlavors allows to display flavors available for nodes templates +func (c *Client) ListClusterFlavors(ctx context.Context, projectID string, clusterID string) ([]Flavor, error) { flavors := make([]Flavor, 0) return flavors, c.CallAPIWithContext( @@ -39,6 +42,7 @@ func (c *Client) ListFlavors(ctx context.Context, projectID string, clusterID st nil, &flavors, nil, + nil, true, ) } diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/mock.go b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/mock.go index a21403fe2fb6..a59c36007c36 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/mock.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/mock.go @@ -62,8 +62,8 @@ func (m *ClientMock) DeleteNodePool(ctx context.Context, projectID string, clust return args.Get(0).(*NodePool), args.Error(1) } -// ListFlavors mocks API call for listing available flavors in cluster -func (m *ClientMock) ListFlavors(ctx context.Context, projectID string, clusterID string) ([]Flavor, error) { +// ListClusterFlavors mocks API call for listing available flavors in cluster +func (m *ClientMock) ListClusterFlavors(ctx context.Context, projectID string, clusterID string) ([]Flavor, error) { args := m.Called(ctx, projectID, clusterID) return args.Get(0).([]Flavor), args.Error(1) diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/nodepool.go b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/nodepool.go index 02e0b440c0fe..d9f935cb062e 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/nodepool.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/nodepool.go @@ -74,6 +74,7 @@ func (c *Client) ListNodePools(ctx context.Context, projectID, clusterID string) nil, &nodepools, nil, + nil, true, ) } @@ -89,6 +90,7 @@ func (c *Client) GetNodePool(ctx context.Context, projectID string, clusterID st nil, &nodepool, nil, + nil, true, ) } @@ -104,6 +106,7 @@ func (c *Client) ListNodePoolNodes(ctx context.Context, projectID string, cluste nil, &nodes, nil, + nil, true, ) } @@ -133,6 +136,7 @@ func (c *Client) CreateNodePool(ctx context.Context, projectID string, clusterID opts, &nodepool, nil, + nil, true, ) } @@ -159,6 +163,7 @@ func (c *Client) UpdateNodePool(ctx context.Context, projectID string, clusterID opts, &nodepool, nil, + nil, true, ) } @@ -174,6 +179,7 @@ func (c *Client) DeleteNodePool(ctx context.Context, projectID string, clusterID nil, &nodepool, nil, + nil, true, ) } diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/ovh.go b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/ovh.go index cccc8d2a2384..0ab335df1308 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/ovh.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/ovh.go @@ -25,6 +25,7 @@ import ( "fmt" "io/ioutil" "net/http" + "net/url" "strconv" "strings" "sync" @@ -176,83 +177,83 @@ func (c *Client) Time() (*time.Time, error) { // // Get is a wrapper for the GET method -func (c *Client) Get(url string, resType interface{}) error { - return c.CallAPI("GET", url, nil, resType, true) +func (c *Client) Get(url string, result interface{}, queryParams url.Values) error { + return c.CallAPI("GET", url, nil, result, queryParams, true) } // GetUnAuth is a wrapper for the unauthenticated GET method -func (c *Client) GetUnAuth(url string, resType interface{}) error { - return c.CallAPI("GET", url, nil, resType, false) +func (c *Client) GetUnAuth(url string, result interface{}, queryParams url.Values) error { + return c.CallAPI("GET", url, nil, result, queryParams, false) } // Post is a wrapper for the POST method -func (c *Client) Post(url string, reqBody, resType interface{}) error { - return c.CallAPI("POST", url, reqBody, resType, true) +func (c *Client) Post(url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPI("POST", url, reqBody, result, queryParams, true) } // PostUnAuth is a wrapper for the unauthenticated POST method -func (c *Client) PostUnAuth(url string, reqBody, resType interface{}) error { - return c.CallAPI("POST", url, reqBody, resType, false) +func (c *Client) PostUnAuth(url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPI("POST", url, reqBody, result, queryParams, false) } // Put is a wrapper for the PUT method -func (c *Client) Put(url string, reqBody, resType interface{}) error { - return c.CallAPI("PUT", url, reqBody, resType, true) +func (c *Client) Put(url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPI("PUT", url, reqBody, result, queryParams, true) } // PutUnAuth is a wrapper for the unauthenticated PUT method -func (c *Client) PutUnAuth(url string, reqBody, resType interface{}) error { - return c.CallAPI("PUT", url, reqBody, resType, false) +func (c *Client) PutUnAuth(url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPI("PUT", url, reqBody, result, queryParams, false) } // Delete is a wrapper for the DELETE method -func (c *Client) Delete(url string, resType interface{}) error { - return c.CallAPI("DELETE", url, nil, resType, true) +func (c *Client) Delete(url string, result interface{}, queryParams url.Values) error { + return c.CallAPI("DELETE", url, nil, result, queryParams, true) } // DeleteUnAuth is a wrapper for the unauthenticated DELETE method -func (c *Client) DeleteUnAuth(url string, resType interface{}) error { - return c.CallAPI("DELETE", url, nil, resType, false) +func (c *Client) DeleteUnAuth(url string, result interface{}, queryParams url.Values) error { + return c.CallAPI("DELETE", url, nil, result, queryParams, false) } // GetWithContext is a wrapper for the GET method -func (c *Client) GetWithContext(ctx context.Context, url string, resType interface{}) error { - return c.CallAPIWithContext(ctx, "GET", url, nil, resType, nil, true) +func (c *Client) GetWithContext(ctx context.Context, url string, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "GET", url, nil, result, queryParams, nil, true) } // GetUnAuthWithContext is a wrapper for the unauthenticated GET method -func (c *Client) GetUnAuthWithContext(ctx context.Context, url string, resType interface{}) error { - return c.CallAPIWithContext(ctx, "GET", url, nil, resType, nil, false) +func (c *Client) GetUnAuthWithContext(ctx context.Context, url string, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "GET", url, nil, result, queryParams, nil, false) } // PostWithContext is a wrapper for the POST method -func (c *Client) PostWithContext(ctx context.Context, url string, reqBody, resType interface{}) error { - return c.CallAPIWithContext(ctx, "POST", url, reqBody, resType, nil, true) +func (c *Client) PostWithContext(ctx context.Context, url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "POST", url, reqBody, result, queryParams, nil, true) } // PostUnAuthWithContext is a wrapper for the unauthenticated POST method -func (c *Client) PostUnAuthWithContext(ctx context.Context, url string, reqBody, resType interface{}) error { - return c.CallAPIWithContext(ctx, "POST", url, reqBody, resType, nil, false) +func (c *Client) PostUnAuthWithContext(ctx context.Context, url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "POST", url, reqBody, result, queryParams, nil, false) } // PutWithContext is a wrapper for the PUT method -func (c *Client) PutWithContext(ctx context.Context, url string, reqBody, resType interface{}) error { - return c.CallAPIWithContext(ctx, "PUT", url, reqBody, resType, nil, true) +func (c *Client) PutWithContext(ctx context.Context, url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "PUT", url, reqBody, result, queryParams, nil, true) } // PutUnAuthWithContext is a wrapper for the unauthenticated PUT method -func (c *Client) PutUnAuthWithContext(ctx context.Context, url string, reqBody, resType interface{}) error { - return c.CallAPIWithContext(ctx, "PUT", url, reqBody, resType, nil, false) +func (c *Client) PutUnAuthWithContext(ctx context.Context, url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "PUT", url, reqBody, result, queryParams, nil, false) } // DeleteWithContext is a wrapper for the DELETE method -func (c *Client) DeleteWithContext(ctx context.Context, url string, resType interface{}) error { - return c.CallAPIWithContext(ctx, "DELETE", url, nil, resType, nil, true) +func (c *Client) DeleteWithContext(ctx context.Context, url string, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "DELETE", url, nil, result, queryParams, nil, true) } // DeleteUnAuthWithContext is a wrapper for the unauthenticated DELETE method -func (c *Client) DeleteUnAuthWithContext(ctx context.Context, url string, resType interface{}) error { - return c.CallAPIWithContext(ctx, "DELETE", url, nil, resType, nil, false) +func (c *Client) DeleteUnAuthWithContext(ctx context.Context, url string, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "DELETE", url, nil, result, queryParams, nil, false) } // timeDelta returns the time delta between the host and the remote API @@ -284,7 +285,7 @@ func (c *Client) getTimeDelta() (time.Duration, error) { func (c *Client) getTime() (*time.Time, error) { var timestamp int64 - err := c.GetUnAuth("/auth/time", ×tamp) + err := c.GetUnAuth("/auth/time", ×tamp, nil) if err != nil { return nil, err } @@ -306,7 +307,7 @@ var getEndpointForSignature = func(c *Client) string { } // NewRequest returns a new HTTP request -func (c *Client) NewRequest(method, path string, reqBody interface{}, headers map[string]interface{}, needAuth bool) (*http.Request, error) { +func (c *Client) NewRequest(method, path string, reqBody interface{}, queryParams url.Values, headers map[string]interface{}, needAuth bool) (*http.Request, error) { var body []byte var err error @@ -402,10 +403,10 @@ func (c *Client) Do(req *http.Request) (*http.Response, error) { // argument is not nil, it will also serialize it as json and inject // the required Content-Type header. // -// If everything went fine, unmarshall response into resType and return nil +// If everything went fine, unmarshall response into result and return nil // otherwise, return the error -func (c *Client) CallAPI(method, path string, reqBody, resType interface{}, needAuth bool) error { - return c.CallAPIWithContext(context.Background(), method, path, reqBody, resType, nil, needAuth) +func (c *Client) CallAPI(method, path string, reqBody, result interface{}, queryParams url.Values, needAuth bool) error { + return c.CallAPIWithContext(context.Background(), method, path, reqBody, result, queryParams, nil, needAuth) } // CallAPIWithContext is the lowest level call helper. If needAuth is true, @@ -426,10 +427,10 @@ func (c *Client) CallAPI(method, path string, reqBody, resType interface{}, need // argument is not nil, it will also serialize it as json and inject // the required Content-Type header. // -// If everything went fine, unmarshall response into resType and return nil +// If everything went fine, unmarshall response into result and return nil // otherwise, return the error -func (c *Client) CallAPIWithContext(ctx context.Context, method, path string, reqBody, resType interface{}, headers map[string]interface{}, needAuth bool) error { - req, err := c.NewRequest(method, path, reqBody, headers, needAuth) +func (c *Client) CallAPIWithContext(ctx context.Context, method, path string, reqBody, result interface{}, queryParams url.Values, headers map[string]interface{}, needAuth bool) error { + req, err := c.NewRequest(method, path, reqBody, queryParams, headers, needAuth) if err != nil { return err } @@ -438,12 +439,12 @@ func (c *Client) CallAPIWithContext(ctx context.Context, method, path string, re if err != nil { return err } - return c.UnmarshalResponse(response, resType) + return c.UnmarshalResponse(response, result) } // UnmarshalResponse checks the response and unmarshals it into the response // type if needed Helper function, called from CallAPI -func (c *Client) UnmarshalResponse(response *http.Response, resType interface{}) error { +func (c *Client) UnmarshalResponse(response *http.Response, result interface{}) error { // Read all the response body defer response.Body.Close() body, err := ioutil.ReadAll(response.Body) @@ -463,9 +464,9 @@ func (c *Client) UnmarshalResponse(response *http.Response, resType interface{}) } // Nothing to unmarshal - if len(body) == 0 || resType == nil { + if len(body) == 0 || result == nil { return nil } - return json.Unmarshal(body, &resType) + return json.Unmarshal(body, &result) }