diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager.go index a316d0f74490..3cfa2ecc63bb 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager.go @@ -23,10 +23,14 @@ import ( "fmt" "io" "io/ioutil" + "time" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud/sdk" + "k8s.io/klog/v2" ) +const flavorCacheDuration = time.Hour + // ClientInterface defines all mandatory methods to be exposed as a client (mock or API) type ClientInterface interface { // ListNodePools lists all the node pools found in a Kubernetes cluster. @@ -44,8 +48,8 @@ type ClientInterface interface { // DeleteNodePool deletes a specific pool. DeleteNodePool(ctx context.Context, projectID string, clusterID string, poolID string) (*sdk.NodePool, error) - // ListFlavors list all available flavors usable in a Kubernetes cluster. - ListFlavors(ctx context.Context, projectID string, clusterID string) ([]sdk.Flavor, error) + // ListClusterFlavors list all available flavors usable in a Kubernetes cluster. + ListClusterFlavors(ctx context.Context, projectID string, clusterID string) ([]sdk.Flavor, error) } // OvhCloudManager defines current application context manager to interact @@ -57,7 +61,11 @@ type OvhCloudManager struct { ClusterID string ProjectID string - NodePools []sdk.NodePool + NodePools []sdk.NodePool + NodeGroupPerProviderID map[string]*NodeGroup + + FlavorsCache map[string]sdk.Flavor + FlavorsCacheExpirationTime time.Time } // Config is the configuration file content of OVHcloud provider @@ -138,10 +146,52 @@ func NewManager(configFile io.Reader) (*OvhCloudManager, error) { ProjectID: cfg.ProjectID, ClusterID: cfg.ClusterID, - NodePools: make([]sdk.NodePool, 0), + NodePools: make([]sdk.NodePool, 0), + NodeGroupPerProviderID: make(map[string]*NodeGroup), + + FlavorsCache: make(map[string]sdk.Flavor), + FlavorsCacheExpirationTime: time.Time{}, }, nil } +// getFlavorsByName lists available flavors from cache or from OVHCloud APIs if the cache is outdated +func (m *OvhCloudManager) getFlavorsByName() (map[string]sdk.Flavor, error) { + // Update the flavors cache if expired + if m.FlavorsCacheExpirationTime.Before(time.Now()) { + newFlavorCacheExpirationTime := time.Now().Add(flavorCacheDuration) + klog.V(4).Infof("Listing flavors to update flavors cache (will expire at %s)", newFlavorCacheExpirationTime) + + // Fetch all flavors in API + flavors, err := m.Client.ListClusterFlavors(context.Background(), m.ProjectID, m.ClusterID) + if err != nil { + return nil, fmt.Errorf("failed to list available flavors: %w", err) + } + + // Update the flavors cache + m.FlavorsCache = make(map[string]sdk.Flavor) + for _, flavor := range flavors { + m.FlavorsCache[flavor.Name] = flavor + m.FlavorsCacheExpirationTime = newFlavorCacheExpirationTime + } + } + + return m.FlavorsCache, nil +} + +// getFlavorByName returns the given flavor from cache or API +func (m *OvhCloudManager) getFlavorByName(flavorName string) (sdk.Flavor, error) { + flavorsByName, err := m.getFlavorsByName() + if err != nil { + return sdk.Flavor{}, err + } + + if flavor, ok := flavorsByName[flavorName]; ok { + return flavor, nil + } + + return sdk.Flavor{}, fmt.Errorf("flavor %s not found in available flavors", flavorName) +} + // ReAuthenticate allows OpenStack keystone token to be revoked and re-created to call API func (m *OvhCloudManager) ReAuthenticate() error { if m.OpenStackProvider != nil { diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager_test.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager_test.go new file mode 100644 index 000000000000..1bf3a3111192 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_manager_test.go @@ -0,0 +1,173 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ovhcloud + +import ( + "bytes" + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud/sdk" +) + +func newTestManager(t *testing.T) *OvhCloudManager { + cfg := `{ + "project_id": "projectID", + "cluster_id": "clusterID", + "authentication_type": "consumer", + "application_endpoint": "ovh-eu", + "application_key": "key", + "application_secret": "secret", + "application_consumer_key": "consumer_key" + }` + + manager, err := NewManager(bytes.NewBufferString(cfg)) + if err != nil { + assert.FailNow(t, "failed to create manager", err) + } + + client := &sdk.ClientMock{} + ctx := context.Background() + + client.On("ListClusterFlavors", ctx, "projectID", "clusterID").Return( + []sdk.Flavor{ + { + Name: "b2-7", + Category: "b", + State: "available", + VCPUs: 2, + GPUs: 0, + RAM: 7, + }, + { + Name: "t1-45", + Category: "t", + State: "available", + VCPUs: 8, + GPUs: 1, + RAM: 45, + }, + { + Name: "unknown", + Category: "", + State: "unavailable", + VCPUs: 2, + GPUs: 0, + RAM: 7, + }, + }, nil, + ) + manager.Client = client + + return manager +} + +func TestOvhCloudManager_getFlavorsByName(t *testing.T) { + expectedFlavorsByNameFromAPICall := map[string]sdk.Flavor{ + "b2-7": { + Name: "b2-7", + Category: "b", + State: "available", + VCPUs: 2, + GPUs: 0, + RAM: 7, + }, + "t1-45": { + Name: "t1-45", + Category: "t", + State: "available", + VCPUs: 8, + GPUs: 1, + RAM: 45, + }, + "unknown": { + Name: "unknown", + Category: "", + State: "unavailable", + VCPUs: 2, + GPUs: 0, + RAM: 7, + }, + } + + t.Run("brand new manager: list from api", func(t *testing.T) { + ng := newTestManager(t) + flavorsByName, err := ng.getFlavorsByName() + + ng.Client.(*sdk.ClientMock).AssertCalled(t, "ListClusterFlavors", context.Background(), "projectID", "clusterID") + assert.NoError(t, err) + assert.Equal(t, expectedFlavorsByNameFromAPICall, flavorsByName) + assert.Equal(t, expectedFlavorsByNameFromAPICall, ng.FlavorsCache) + }) + + t.Run("flavors cache expired: renew and list from api", func(t *testing.T) { + initialFlavorsCache := map[string]sdk.Flavor{ + "custom": { + Name: "custom", + }, + } + + ng := newTestManager(t) + ng.FlavorsCache = initialFlavorsCache + ng.FlavorsCacheExpirationTime = time.Now() + + flavorsByName, err := ng.getFlavorsByName() + + ng.Client.(*sdk.ClientMock).AssertCalled(t, "ListClusterFlavors", context.Background(), "projectID", "clusterID") + assert.NoError(t, err) + assert.Equal(t, expectedFlavorsByNameFromAPICall, flavorsByName) + assert.Equal(t, expectedFlavorsByNameFromAPICall, ng.FlavorsCache) + }) + + t.Run("flavors cache still valid: list from cache", func(t *testing.T) { + initialFlavorsCache := map[string]sdk.Flavor{ + "custom": { + Name: "custom", + }, + } + + ng := newTestManager(t) + ng.FlavorsCache = initialFlavorsCache + ng.FlavorsCacheExpirationTime = time.Now().Add(time.Minute) + + flavorsByName, err := ng.getFlavorsByName() + + ng.Client.(*sdk.ClientMock).AssertNotCalled(t, "ListClusterFlavors", context.Background(), "projectID", "clusterID") + assert.NoError(t, err) + assert.Equal(t, initialFlavorsCache, flavorsByName) + assert.Equal(t, initialFlavorsCache, ng.FlavorsCache) + }) +} + +func TestOvhCloudManager_getFlavorByName(t *testing.T) { + ng := newTestManager(t) + + t.Run("check default node group max size", func(t *testing.T) { + flavor, err := ng.getFlavorByName("b2-7") + assert.NoError(t, err) + assert.Equal(t, sdk.Flavor{ + Name: "b2-7", + Category: "b", + State: "available", + VCPUs: 2, + GPUs: 0, + RAM: 7, + }, flavor) + }) +} diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group.go index b8b68da84b78..2ae6eec19ecf 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group.go @@ -19,12 +19,13 @@ package ovhcloud import ( "context" "fmt" + "math" "math/rand" - "regexp" "strings" "time" apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" @@ -32,10 +33,10 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud/sdk" "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" ) -// instanceIdRegex defines the expression used for instance's ID -var instanceIdRegex = regexp.MustCompile(`^(.*)/(.*)$`) +const providerIDPrefix = "openstack:///" // NodeGroup implements cloudprovider.NodeGroup interface. type NodeGroup struct { @@ -73,7 +74,7 @@ func (ng *NodeGroup) IncreaseSize(delta int) error { return nil } - klog.V(4).Infof("Increasing NodeGroup size with %d nodes", delta) + klog.V(4).Infof("Increasing NodeGroup size by %d node(s)", delta) // First, verify the NodeGroup can be increased if delta <= 0 { @@ -96,16 +97,14 @@ func (ng *NodeGroup) IncreaseSize(delta int) error { opts := sdk.UpdateNodePoolOpts{ DesiredNodes: &desired, } + klog.V(4).Infof("Upscaling node pool %s to %d desired nodes", ng.ID, desired) - // Eventually, call API to increase desired nodes number, automatically creating new nodes (wait for the pool to be READY before trying to update) - if ng.Status == "READY" { - resp, err := ng.Manager.Client.UpdateNodePool(context.Background(), ng.Manager.ProjectID, ng.Manager.ClusterID, ng.ID, &opts) - if err != nil { - return fmt.Errorf("failed to increase node pool desired size: %w", err) - } - - ng.Status = resp.Status + // Call API to increase desired nodes number, automatically creating new nodes + resp, err := ng.Manager.Client.UpdateNodePool(context.Background(), ng.Manager.ProjectID, ng.Manager.ClusterID, ng.ID, &opts) + if err != nil { + return fmt.Errorf("failed to increase node pool desired size: %w", err) } + ng.Status = resp.Status return nil } @@ -117,7 +116,7 @@ func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { return nil } - klog.V(4).Infof("Deleting %d nodes", len(nodes)) + klog.V(4).Infof("Deleting %d node(s)", len(nodes)) // First, verify the NodeGroup can be decreased size, err := ng.TargetSize() @@ -129,37 +128,28 @@ func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { return fmt.Errorf("node group size would be below minimum size - desired: %d, max: %d", size-len(nodes), ng.MinSize()) } - // Then, fetch node group instances and check that all nodes to remove are linked correctly, - // otherwise it will return an error - instances, err := ng.Nodes() - if err != nil { - return fmt.Errorf("failed to list current node group instances: %w", err) - } - - nodeIds, err := extractNodeIds(nodes, instances, ng.Id()) - if err != nil { - return fmt.Errorf("failed to extract node ids to remove: %w", err) + nodeProviderIds := make([]string, 0) + for _, node := range nodes { + nodeProviderIds = append(nodeProviderIds, node.Spec.ProviderID) } - // Then, forge current size and parameters - ng.CurrentSize = size - len(nodes) - - desired := uint32(ng.CurrentSize) + desired := uint32(size - len(nodes)) opts := sdk.UpdateNodePoolOpts{ DesiredNodes: &desired, - NodesToRemove: nodeIds, + NodesToRemove: nodeProviderIds, } + klog.V(4).Infof("Downscaling node pool %s to %d desired nodes by deleting the following nodes: %s", ng.ID, desired, nodeProviderIds) - // Eventually, call API to remove nodes from a NodeGroup (wait for the pool to be READY before trying to update) - if ng.Status == "READY" { - resp, err := ng.Manager.Client.UpdateNodePool(context.Background(), ng.Manager.ProjectID, ng.Manager.ClusterID, ng.ID, &opts) - if err != nil { - return fmt.Errorf("failed to delete node pool nodes: %w", err) - } - - ng.Status = resp.Status + // Call API to remove nodes from a NodeGroup + resp, err := ng.Manager.Client.UpdateNodePool(context.Background(), ng.Manager.ProjectID, ng.Manager.ClusterID, ng.ID, &opts) + if err != nil { + return fmt.Errorf("failed to delete node pool nodes: %w", err) } + // Update the node group + ng.Status = resp.Status + ng.CurrentSize = size - len(nodes) + return nil } @@ -169,30 +159,8 @@ func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { // It is assumed that cloud provider will not delete the existing nodes if the size // when there is an option to just decrease the target. func (ng *NodeGroup) DecreaseTargetSize(delta int) error { - // Do not use node group which does not support autoscaling - if !ng.Autoscale { - return nil - } - - klog.V(4).Infof("Decreasing NodeGroup size with %d nodes", delta) - - // First, verify the NodeGroup can be decreased - if delta >= 0 { - return fmt.Errorf("decrease size node group delta must be negative") - } - - size, err := ng.TargetSize() - if err != nil { - return fmt.Errorf("failed to get NodeGroup target size") - } - - if size+delta < ng.MinSize() { - return fmt.Errorf("node group size would be below minimum size - desired: %d, max: %d", size+delta, ng.MinSize()) - } - - ng.CurrentSize = size + delta - - return nil + // Cancellation of node provisioning is not supported yet + return cloudprovider.ErrNotImplemented } // Id returns node pool id. @@ -214,15 +182,20 @@ func (ng *NodeGroup) Nodes() ([]cloudprovider.Instance, error) { return nil, fmt.Errorf("failed to list node pool nodes: %w", err) } + klog.V(4).Infof("%d nodes are listed in node pool %s", len(nodes), ng.ID) + // Cast all API nodes into instance interface instances := make([]cloudprovider.Instance, 0) for _, node := range nodes { instance := cloudprovider.Instance{ - Id: fmt.Sprintf("%s/%s", node.ID, node.Name), + Id: fmt.Sprintf("%s%s", providerIDPrefix, node.InstanceID), Status: toInstanceStatus(node.Status), } instances = append(instances, instance) + + // Store the associated node group in cache for future reference + ng.Manager.NodeGroupPerProviderID[instance.Id] = ng } return instances, nil @@ -245,6 +218,18 @@ func (ng *NodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { }, } + flavor, err := ng.Manager.getFlavorByName(ng.Flavor) + if err != nil { + return nil, fmt.Errorf("failed to get specs for flavor %q: %w", ng.Flavor, err) + } + + node.Status.Capacity[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI) + node.Status.Capacity[apiv1.ResourceCPU] = *resource.NewQuantity(int64(flavor.VCPUs), resource.DecimalSI) + node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(int64(flavor.GPUs), resource.DecimalSI) + node.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(int64(flavor.RAM)*int64(math.Pow(1024, 3)), resource.DecimalSI) + + node.Status.Allocatable = node.Status.Capacity + // Setup node info template nodeInfo := schedulerframework.NewNodeInfo(cloudprovider.BuildKubeProxy(ng.Id())) nodeInfo.SetNode(node) @@ -260,7 +245,7 @@ func (ng *NodeGroup) Exist() bool { // Create creates the node group on the cloud provider side. func (ng *NodeGroup) Create() (cloudprovider.NodeGroup, error) { - klog.V(4).Infof("Creating a new NodeGroup") + klog.V(4).Info("Creating a new NodeGroup") // Forge create node pool parameters (defaulting b2-7 for now) name := ng.Id() @@ -339,50 +324,17 @@ func (ng *NodeGroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*c // isGpu checks if a node group is using GPU machines func (ng *NodeGroup) isGpu() bool { - return strings.HasPrefix(ng.Flavor, GPUMachineCategory) -} - -// extractNodeIds find in an array of node resource their cloud instances IDs -func extractNodeIds(nodes []*apiv1.Node, instances []cloudprovider.Instance, groupLabel string) ([]string, error) { - nodeIds := make([]string, 0) - - // Loop through each nodes to find any un-wanted nodes to remove - for _, node := range nodes { - // First, check if node resource has correct node pool label - label, ok := node.Labels[NodePoolLabel] - if !ok || label != groupLabel { - return nil, fmt.Errorf("node %s without label %s, current: %s", node.Name, groupLabel, label) - } - - // Then, loop through each group instances to find if the node is in the list - found := false - for _, instance := range instances { - match := instanceIdRegex.FindStringSubmatch(instance.Id) - if len(match) < 2 { - continue - } - - id := match[1] - name := match[2] - - if node.Name != name { - continue - } - - found = true - nodeIds = append(nodeIds, id) - break - } - - if !found { - return nil, fmt.Errorf("node %s not found in group instances", node.Name) - } + flavor, err := ng.Manager.getFlavorByName(ng.Flavor) + if err != nil { + // Fallback when we are unable to get the flavor: refer to the only category + // known to be a GPU flavor category + return strings.HasPrefix(ng.Flavor, GPUMachineCategory) } - return nodeIds, nil + return flavor.GPUs > 0 } -// toInstanceStatus cast a node status into an instance status +// toInstanceStatus casts a node status into an instance status func toInstanceStatus(status string) *cloudprovider.InstanceStatus { state := &cloudprovider.InstanceStatus{} diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group_test.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group_test.go index 56defe35509b..7efb85356d04 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group_test.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_node_group_test.go @@ -24,16 +24,17 @@ import ( "time" "github.com/stretchr/testify/assert" - + apiv1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud/sdk" "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" ) -func newTestNodeGroup(t *testing.T, isGpu bool) cloudprovider.NodeGroup { +func newTestNodeGroup(t *testing.T, flavor string) *NodeGroup { cfg := `{ "project_id": "projectID", "cluster_id": "clusterID", @@ -52,60 +53,36 @@ func newTestNodeGroup(t *testing.T, isGpu bool) cloudprovider.NodeGroup { client := &sdk.ClientMock{} ctx := context.Background() - client.On("ListNodePoolNodes", ctx, "projectID", "clusterID", "id").Return( - []sdk.Node{ + client.On("ListClusterFlavors", ctx, "projectID", "clusterID").Return( + []sdk.Flavor{ { - ID: "id-1", - Name: "node-1", - Status: "READY", + Name: "b2-7", + Category: "b", + State: "available", + VCPUs: 2, + GPUs: 0, + RAM: 7, }, { - ID: "id-2", - Name: "node-2", - Status: "INSTALLING", + Name: "t1-45", + Category: "t", + State: "available", + VCPUs: 8, + GPUs: 1, + RAM: 45, }, { - ID: "id-3", - Name: "node-3", - Status: "ERROR", + Name: "unknown", + Category: "", + State: "unavailable", + VCPUs: 2, + GPUs: 0, + RAM: 7, }, }, nil, ) - - name := "pool-b2-7" - size := uint32(3) - min := uint32(1) - max := uint32(5) - - opts := sdk.CreateNodePoolOpts{ - FlavorName: "b2-7", - Name: &name, - DesiredNodes: &size, - MinNodes: &min, - MaxNodes: &max, - Autoscale: true, - } - - client.On("CreateNodePool", ctx, "projectID", "clusterID", &opts).Return( - &sdk.NodePool{ - ID: "id", - Name: "pool-b2-7", - Flavor: "b2-7", - Autoscale: true, - DesiredNodes: 3, - MinNodes: 1, - MaxNodes: 5, - }, nil, - ) - - client.On("DeleteNodePool", ctx, "projectID", "clusterID", "id").Return(&sdk.NodePool{}, nil) - - flavor := "b2-7" - if isGpu { - flavor = "t1-45" - } - manager.Client = client + ng := &NodeGroup{ Manager: manager, NodePool: sdk.NodePool{ @@ -129,99 +106,196 @@ func newTestNodeGroup(t *testing.T, isGpu bool) cloudprovider.NodeGroup { return ng } +func (ng *NodeGroup) mockCallUpdateNodePool(newDesiredNodes uint32, nodesToRemove []string) { + ng.Manager.Client.(*sdk.ClientMock).On( + "UpdateNodePool", + context.Background(), + ng.Manager.ProjectID, + ng.Manager.ClusterID, + ng.ID, + &sdk.UpdateNodePoolOpts{ + DesiredNodes: &newDesiredNodes, + NodesToRemove: nodesToRemove, + }, + ).Return( + &sdk.NodePool{ + ID: ng.ID, + Name: ng.Name, + Flavor: ng.Flavor, + Autoscale: ng.Autoscale, + DesiredNodes: newDesiredNodes, + MinNodes: ng.MinNodes, + MaxNodes: ng.MaxNodes, + }, + nil, + ) +} + +func (ng *NodeGroup) mockCallListNodePoolNodes() { + ng.Manager.Client.(*sdk.ClientMock).On( + "ListNodePoolNodes", + context.Background(), + ng.Manager.ProjectID, + ng.Manager.ClusterID, + ng.ID, + ).Return( + []sdk.Node{ + { + ID: "id-1", + Name: "node-1", + Status: "READY", + InstanceID: "instance-1", + }, + { + ID: "id-2", + Name: "node-2", + Status: "INSTALLING", + InstanceID: "", + }, + { + ID: "id-3", + Name: "node-3", + Status: "ERROR", + InstanceID: "", + }, + }, nil, + ) +} + +func (ng *NodeGroup) mockCallCreateNodePool() { + ng.Manager.Client.(*sdk.ClientMock).On( + "CreateNodePool", + context.Background(), + ng.Manager.ProjectID, + ng.Manager.ClusterID, + &sdk.CreateNodePoolOpts{ + FlavorName: "b2-7", + Name: &ng.Name, + DesiredNodes: &ng.DesiredNodes, + MinNodes: &ng.MinNodes, + MaxNodes: &ng.MaxNodes, + Autoscale: ng.Autoscale, + }, + ).Return( + &sdk.NodePool{ + ID: ng.ID, + Name: ng.Name, + Flavor: ng.Flavor, + Autoscale: ng.Autoscale, + DesiredNodes: ng.DesiredNodes, + MinNodes: ng.MinNodes, + MaxNodes: ng.MaxNodes, + }, nil, + ) +} + +func (ng *NodeGroup) mockCallDeleteNodePool() { + ng.Manager.Client.(*sdk.ClientMock).On("DeleteNodePool", context.Background(), "projectID", "clusterID", "id").Return(&sdk.NodePool{}, nil) +} + func TestOVHCloudNodeGroup_MaxSize(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check default node group max size", func(t *testing.T) { - max := group.MaxSize() + max := ng.MaxSize() assert.Equal(t, 5, max) }) } func TestOVHCloudNodeGroup_MinSize(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check default node group min size", func(t *testing.T) { - min := group.MinSize() + min := ng.MinSize() assert.Equal(t, 1, min) }) } func TestOVHCloudNodeGroup_TargetSize(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check default node group target size", func(t *testing.T) { - size, err := group.TargetSize() + targetSize, err := ng.TargetSize() assert.NoError(t, err) - assert.Equal(t, 3, size) + assert.Equal(t, 3, targetSize) }) } func TestOVHCloudNodeGroup_IncreaseSize(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check increase size below max size", func(t *testing.T) { - err := group.IncreaseSize(1) + ng.mockCallUpdateNodePool(4, nil) + + err := ng.IncreaseSize(1) assert.NoError(t, err) - size, err := group.TargetSize() + targetSize, err := ng.TargetSize() assert.NoError(t, err) - assert.Equal(t, 4, size) + assert.Equal(t, 4, targetSize) }) t.Run("check increase size above max size", func(t *testing.T) { - err := group.IncreaseSize(5) + err := ng.IncreaseSize(5) assert.Error(t, err) }) t.Run("check increase size with negative delta", func(t *testing.T) { - err := group.IncreaseSize(-1) + err := ng.IncreaseSize(-1) assert.Error(t, err) }) } func TestOVHCloudNodeGroup_DeleteNodes(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check delete nodes above min size", func(t *testing.T) { - err := group.DeleteNodes([]*v1.Node{ + ng.mockCallUpdateNodePool(2, []string{"openstack:///instance-1"}) + + err := ng.DeleteNodes([]*v1.Node{ { ObjectMeta: metav1.ObjectMeta{ Name: "node-1", Labels: map[string]string{ - "nodepool": group.Id(), + "nodepool": ng.Id(), }, }, + Spec: v1.NodeSpec{ + ProviderID: "openstack:///instance-1", + }, }, }) assert.NoError(t, err) - size, err := group.TargetSize() + targetSize, err := ng.TargetSize() assert.NoError(t, err) - assert.Equal(t, 2, size) + assert.Equal(t, 2, targetSize) }) t.Run("check delete nodes empty nodes array", func(t *testing.T) { - err := group.DeleteNodes(nil) + ng.mockCallUpdateNodePool(2, []string{}) - size, err := group.TargetSize() + err := ng.DeleteNodes(nil) assert.NoError(t, err) - assert.Equal(t, 2, size) + targetSize, err := ng.TargetSize() + assert.NoError(t, err) + + assert.Equal(t, 2, targetSize) }) t.Run("check delete nodes below min size", func(t *testing.T) { - err := group.DeleteNodes([]*v1.Node{ + err := ng.DeleteNodes([]*v1.Node{ { ObjectMeta: metav1.ObjectMeta{ Name: "node-1", Labels: map[string]string{ - "nodepool": group.Id(), + "nodepool": ng.Id(), }, }, }, @@ -229,7 +303,7 @@ func TestOVHCloudNodeGroup_DeleteNodes(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "node-2", Labels: map[string]string{ - "nodepool": group.Id(), + "nodepool": ng.Id(), }, }, }, @@ -237,161 +311,181 @@ func TestOVHCloudNodeGroup_DeleteNodes(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "node-3", Labels: map[string]string{ - "nodepool": group.Id(), + "nodepool": ng.Id(), }, }, }, }) assert.Error(t, err) }) - - t.Run("check delete nodes with wrong label name", func(t *testing.T) { - err := group.DeleteNodes([]*v1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - Labels: map[string]string{ - "nodepool": "unknown", - }, - }, - }, - }) - - assert.Error(t, err) - }) } func TestOVHCloudNodeGroup_DecreaseTargetSize(t *testing.T) { - group := newTestNodeGroup(t, false) - - t.Run("check decrease size above min size", func(t *testing.T) { - err := group.DecreaseTargetSize(-1) - assert.NoError(t, err) - - size, err := group.TargetSize() - assert.NoError(t, err) - - assert.Equal(t, 2, size) - }) - - t.Run("check decrease size below min size", func(t *testing.T) { - err := group.DecreaseTargetSize(-5) - assert.Error(t, err) - }) - - t.Run("check decrease size with positive delta", func(t *testing.T) { - err := group.DecreaseTargetSize(1) - assert.Error(t, err) - }) + ng := newTestNodeGroup(t, "b2-7") + err := ng.DecreaseTargetSize(-1) + assert.Error(t, err) } func TestOVHCloudNodeGroup_Id(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check node group id fetch", func(t *testing.T) { - name := group.Id() + name := ng.Id() assert.Equal(t, "pool-b2-7", name) }) } func TestOVHCloudNodeGroup_Debug(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check node group debug print", func(t *testing.T) { - debug := group.Debug() + debug := ng.Debug() assert.Equal(t, "pool-b2-7 (3:1:5)", debug) }) } func TestOVHCloudNodeGroup_Nodes(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check nodes list in node group", func(t *testing.T) { - nodes, err := group.Nodes() + ng.mockCallListNodePoolNodes() + + nodes, err := ng.Nodes() assert.NoError(t, err) assert.Equal(t, 3, len(nodes)) - assert.Equal(t, "id-1/node-1", nodes[0].Id) + assert.Equal(t, "openstack:///instance-1", nodes[0].Id) assert.Equal(t, cloudprovider.InstanceRunning, nodes[0].Status.State) - assert.Equal(t, "id-2/node-2", nodes[1].Id) + assert.Equal(t, "openstack:///", nodes[1].Id) assert.Equal(t, cloudprovider.InstanceCreating, nodes[1].Status.State) - assert.Equal(t, "id-3/node-3", nodes[2].Id) + assert.Equal(t, "openstack:///", nodes[2].Id) assert.Equal(t, cloudprovider.OtherErrorClass, nodes[2].Status.ErrorInfo.ErrorClass) assert.Equal(t, "ERROR", nodes[2].Status.ErrorInfo.ErrorCode) }) } func TestOVHCloudNodeGroup_TemplateNodeInfo(t *testing.T) { - group := newTestNodeGroup(t, false) + t.Run("template for b2-7 flavor", func(t *testing.T) { + ng := newTestNodeGroup(t, "b2-7") + template, err := ng.TemplateNodeInfo() + assert.NoError(t, err) + + node := template.Node() + assert.NotNil(t, node) + + assert.Contains(t, node.ObjectMeta.Name, fmt.Sprintf("%s-node-", ng.Id())) + assert.Equal(t, ng.Id(), node.Labels["nodepool"]) - t.Run("check node template filled with group id", func(t *testing.T) { - template, err := group.TemplateNodeInfo() + assert.Equal(t, *resource.NewQuantity(110, resource.DecimalSI), node.Status.Capacity[apiv1.ResourcePods]) + assert.Equal(t, *resource.NewQuantity(2, resource.DecimalSI), node.Status.Capacity[apiv1.ResourceCPU]) + assert.Equal(t, *resource.NewQuantity(0, resource.DecimalSI), node.Status.Capacity[gpu.ResourceNvidiaGPU]) + assert.Equal(t, *resource.NewQuantity(7516192768, resource.DecimalSI), node.Status.Capacity[apiv1.ResourceMemory]) + }) + + t.Run("template for t1-45 flavor", func(t *testing.T) { + ng := newTestNodeGroup(t, "t1-45") + template, err := ng.TemplateNodeInfo() assert.NoError(t, err) node := template.Node() assert.NotNil(t, node) - assert.Contains(t, node.ObjectMeta.Name, fmt.Sprintf("%s-node-", group.Id())) - assert.Equal(t, group.Id(), node.Labels["nodepool"]) + assert.Contains(t, node.ObjectMeta.Name, fmt.Sprintf("%s-node-", ng.Id())) + assert.Equal(t, ng.Id(), node.Labels["nodepool"]) + + assert.Equal(t, *resource.NewQuantity(110, resource.DecimalSI), node.Status.Capacity[apiv1.ResourcePods]) + assert.Equal(t, *resource.NewQuantity(8, resource.DecimalSI), node.Status.Capacity[apiv1.ResourceCPU]) + assert.Equal(t, *resource.NewQuantity(1, resource.DecimalSI), node.Status.Capacity[gpu.ResourceNvidiaGPU]) + assert.Equal(t, *resource.NewQuantity(48318382080, resource.DecimalSI), node.Status.Capacity[apiv1.ResourceMemory]) }) } func TestOVHCloudNodeGroup_Exist(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check exist is true", func(t *testing.T) { - exist := group.Exist() + exist := ng.Exist() assert.True(t, exist) }) } func TestOVHCloudNodeGroup_Create(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check create node group", func(t *testing.T) { - newGroup, err := group.Create() + ng.mockCallCreateNodePool() + + newGroup, err := ng.Create() assert.NoError(t, err) - size, err := newGroup.TargetSize() + targetSize, err := newGroup.TargetSize() assert.NoError(t, err) assert.Equal(t, "pool-b2-7", newGroup.Id()) - assert.Equal(t, 3, size) + assert.Equal(t, 3, targetSize) assert.Equal(t, 1, newGroup.MinSize()) assert.Equal(t, 5, newGroup.MaxSize()) }) } func TestOVHCloudNodeGroup_Delete(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check delete node group", func(t *testing.T) { - err := group.Delete() + ng.mockCallDeleteNodePool() + err := ng.Delete() assert.NoError(t, err) }) } func TestOVHCloudNodeGroup_Autoprovisioned(t *testing.T) { - group := newTestNodeGroup(t, false) + ng := newTestNodeGroup(t, "b2-7") t.Run("check auto-provisioned is false", func(t *testing.T) { - provisioned := group.Autoprovisioned() - + provisioned := ng.Autoprovisioned() assert.False(t, provisioned) }) } +func TestOVHCloudNodeGroup_IsGpu(t *testing.T) { + t.Run("has GPU cores", func(t *testing.T) { + ng := newTestNodeGroup(t, "custom-t1-45") + ng.Manager.FlavorsCache = map[string]sdk.Flavor{ + "custom-t1-45": { + GPUs: 1, + }, + } + ng.Manager.FlavorsCacheExpirationTime = time.Now().Add(time.Minute) + assert.True(t, ng.isGpu()) + }) + + t.Run("doesn't have GPU cores", func(t *testing.T) { + ng := newTestNodeGroup(t, "custom-b2-7") + ng.Manager.FlavorsCache = map[string]sdk.Flavor{ + "custom-b2-7": { + GPUs: 0, + }, + } + assert.False(t, ng.isGpu()) + }) + + t.Run("not found but belong to GPU category", func(t *testing.T) { + ng := newTestNodeGroup(t, GPUMachineCategory+"1-123") + assert.True(t, ng.isGpu()) + }) +} + func TestOVHCloudNodeGroup_GetOptions(t *testing.T) { t.Run("check get autoscaling options", func(t *testing.T) { - group := newTestNodeGroup(t, false) - opts, err := group.GetOptions(config.NodeGroupAutoscalingOptions{}) + ng := newTestNodeGroup(t, "b2-7") + opts, err := ng.GetOptions(config.NodeGroupAutoscalingOptions{}) assert.NoError(t, err) assert.Equal(t, fmt.Sprintf("%.1f", 3.2), fmt.Sprintf("%.1f", opts.ScaleDownUtilizationThreshold)) @@ -401,8 +495,8 @@ func TestOVHCloudNodeGroup_GetOptions(t *testing.T) { }) t.Run("check get autoscaling options on gpu machine", func(t *testing.T) { - group := newTestNodeGroup(t, true) - opts, err := group.GetOptions(config.NodeGroupAutoscalingOptions{}) + ng := newTestNodeGroup(t, "t1-45") + opts, err := ng.GetOptions(config.NodeGroupAutoscalingOptions{}) assert.NoError(t, err) assert.Equal(t, float64(0), opts.ScaleDownUtilizationThreshold) diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider.go index a46a731f278a..075c6cb78a15 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider.go @@ -99,10 +99,11 @@ func (provider *OVHCloudProvider) NodeGroups() []cloudprovider.NodeGroup { groups := make([]cloudprovider.NodeGroup, 0) // Cast API node pools into CA node groups - // Do not add in array node pool which does not support autoscaling for _, pool := range provider.manager.NodePools { + // Node pools without autoscaling are equivalent to node pools with autoscaling but no scale possible if !pool.Autoscale { - continue + pool.MaxNodes = pool.DesiredNodes + pool.MinNodes = pool.DesiredNodes } ng := NodeGroup{ @@ -121,26 +122,75 @@ func (provider *OVHCloudProvider) NodeGroups() []cloudprovider.NodeGroup { // should not be processed by cluster autoscaler, or non-nil error if such // occurred. Must be implemented. func (provider *OVHCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) { - // Fetch node pool label on nodes + // If the provider ID is empty (only the prefix), it means that we are processing an UnregisteredNode retrieved + // from OVHCloud APIs, which has just started being created, and the OpenStack instance ID is not yet set. + // We won't be able to determine the node group of the node with the information at hand. + if node.Spec.ProviderID == providerIDPrefix { + return nil, nil + } + + // Try to retrieve the associated node group from an already built mapping in cache + if ng := provider.findNodeGroupFromCache(node.Spec.ProviderID); ng != nil { + return ng, nil + } + + // Try to find the associated node group from the nodepool label on the node + if ng := provider.findNodeGroupFromLabel(node); ng != nil { + return ng, nil + } + + klog.V(4).Infof("trying to find node group of node %s (provider ID %s) by listing all nodes under autoscaled node pools", node.Name, node.Spec.ProviderID) + + // Call the OVHCloud APIs to list all nodes under autoscaled node pools and find the associated node group. + // This should also refresh the cache for the next time + ng, err := provider.findNodeGroupByListingNodes(node) + if ng == nil { + klog.Warningf("unable to find which node group the node %s (provider ID %s) belongs to", node.Name, node.Spec.ProviderID) + } + + return ng, err +} + +// findNodeGroupFromCache tries to retrieve the associated node group from an already built mapping in cache +func (provider *OVHCloudProvider) findNodeGroupFromCache(providerID string) cloudprovider.NodeGroup { + if ng, ok := provider.manager.NodeGroupPerProviderID[providerID]; ok { + return ng + } + return nil +} + +// findNodeGroupFromLabel tries to find the associated node group from the nodepool label on the node +func (provider *OVHCloudProvider) findNodeGroupFromLabel(node *apiv1.Node) cloudprovider.NodeGroup { + // Retrieve the label specifying the pool the node belongs to labels := node.GetLabels() label, exists := labels[NodePoolLabel] if !exists { - return nil, nil + return nil } - // Find in node groups stored in cache which one is linked to the node - for _, pool := range provider.manager.NodePools { - // Do not check node group which does not support auto-scaling - if !pool.Autoscale { - continue + // Find in the node groups stored in cache the one with the same name + for _, ng := range provider.NodeGroups() { + if ng.Id() == label { + return ng + } + } + + return nil +} + +// findNodeGroupByListingNodes finds the associated node group from by listing all nodes under autoscaled node pools +func (provider *OVHCloudProvider) findNodeGroupByListingNodes(node *apiv1.Node) (cloudprovider.NodeGroup, error) { + for _, ng := range provider.NodeGroups() { + // This calls OVHCloud APIs and refreshes the cache + instances, err := ng.Nodes() + if err != nil { + return nil, fmt.Errorf("failed to list nodes in node group %s: %w", ng.Id(), err) } - if pool.Name == label { - return &NodeGroup{ - NodePool: pool, - Manager: provider.manager, - CurrentSize: -1, - }, nil + for _, instance := range instances { + if instance.Id == node.Spec.ProviderID { + return ng, nil + } } } @@ -157,15 +207,16 @@ func (provider *OVHCloudProvider) Pricing() (cloudprovider.PricingModel, errors. // GetAvailableMachineTypes get all machine types that can be requested from // the cloud provider. Implementation optional. func (provider *OVHCloudProvider) GetAvailableMachineTypes() ([]string, error) { - // Fetch all flavors in API - flavors, err := provider.manager.Client.ListFlavors(context.Background(), provider.manager.ProjectID, provider.manager.ClusterID) + klog.V(4).Info("Getting available machine types") + + flavorsByName, err := provider.manager.getFlavorsByName() if err != nil { - return nil, fmt.Errorf("failed to list available flavors: %w", err) + return nil, fmt.Errorf("failed to get flavors: %w", err) } // Cast flavors into machine types string array machineTypes := make([]string, 0) - for _, flavor := range flavors { + for _, flavor := range flavorsByName { if flavor.State == MachineAvailableState { machineTypes = append(machineTypes, flavor.Name) } @@ -206,16 +257,18 @@ func (provider *OVHCloudProvider) GPULabel() string { // GetAvailableGPUTypes return all available GPU types cloud provider supports. func (provider *OVHCloudProvider) GetAvailableGPUTypes() map[string]struct{} { - // Fetch all flavors in API - flavors, err := provider.manager.Client.ListFlavors(context.Background(), provider.manager.ProjectID, provider.manager.ClusterID) + klog.V(4).Info("Getting available GPU types") + + flavorsByName, err := provider.manager.getFlavorsByName() if err != nil { + klog.Errorf("Failed to get flavors: %w", err) return nil } // Cast flavors into gpu types string array gpuTypes := make(map[string]struct{}, 0) - for _, flavor := range flavors { - if flavor.State == MachineAvailableState && flavor.Category == GPUMachineCategory { + for _, flavor := range flavorsByName { + if flavor.State == MachineAvailableState && flavor.GPUs > 0 { gpuTypes[flavor.Name] = struct{}{} } } @@ -233,7 +286,7 @@ func (provider *OVHCloudProvider) Cleanup() error { // update cloud provider state. In particular the list of node groups returned // by NodeGroups() can change as a result of CloudProvider.Refresh(). func (provider *OVHCloudProvider) Refresh() error { - klog.V(4).Info("Refreshing NodeGroups") + klog.V(4).Info("Listing node pools to refresh NodeGroups") // Check if OpenStack keystone token need to be revoke and re-create err := provider.manager.ReAuthenticate() @@ -247,15 +300,8 @@ func (provider *OVHCloudProvider) Refresh() error { return fmt.Errorf("failed to refresh node pool list: %w", err) } - // Do not append node pool which does not support autoscaling - provider.manager.NodePools = []sdk.NodePool{} - for _, pool := range pools { - if !pool.Autoscale { - continue - } - - provider.manager.NodePools = append(provider.manager.NodePools, pool) - } + // Update the node pools cache + provider.manager.NodePools = pools return nil } diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider_test.go index 7fdb68f76898..2bded3033907 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider_test.go @@ -21,10 +21,9 @@ import ( "context" "testing" + "github.com/stretchr/testify/assert" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - "github.com/stretchr/testify/assert" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud/sdk" ) @@ -45,7 +44,6 @@ func newTestProvider(t *testing.T) *OVHCloudProvider { assert.FailNow(t, "failed to create manager", err) } - // fill the test provider with some example client := &sdk.ClientMock{} ctx := context.Background() @@ -72,22 +70,31 @@ func newTestProvider(t *testing.T) *OVHCloudProvider { }, nil, ) - client.On("ListFlavors", ctx, "projectID", "clusterID").Return( + client.On("ListClusterFlavors", ctx, "projectID", "clusterID").Return( []sdk.Flavor{ { Name: "b2-7", Category: "b", State: "available", + VCPUs: 2, + GPUs: 0, + RAM: 7, }, { Name: "t1-45", Category: "t", State: "available", + VCPUs: 8, + GPUs: 1, + RAM: 45, }, { Name: "unknown", Category: "", State: "unavailable", + VCPUs: 2, + GPUs: 0, + RAM: 7, }, }, nil, ) @@ -130,7 +137,7 @@ func TestOVHCloudProvider_NodeGroups(t *testing.T) { t.Run("check default node groups length", func(t *testing.T) { groups := provider.NodeGroups() - assert.Equal(t, 1, len(groups)) + assert.Equal(t, 2, len(groups)) }) t.Run("check empty node groups length after reset", func(t *testing.T) { @@ -144,7 +151,45 @@ func TestOVHCloudProvider_NodeGroups(t *testing.T) { func TestOVHCloudProvider_NodeGroupForNode(t *testing.T) { provider := newTestProvider(t) - t.Run("check node group with correct label on node", func(t *testing.T) { + ListNodePoolNodesCall1 := provider.manager.Client.(*sdk.ClientMock).On( + "ListNodePoolNodes", + context.Background(), + provider.manager.ProjectID, + provider.manager.ClusterID, + "1", + ) + ListNodePoolNodesCall2 := provider.manager.Client.(*sdk.ClientMock).On( + "ListNodePoolNodes", + context.Background(), + provider.manager.ProjectID, + provider.manager.ClusterID, + "2", + ) + + t.Run("find node group in node group associations cache", func(t *testing.T) { + node := &apiv1.Node{ + Spec: apiv1.NodeSpec{ + ProviderID: providerIDPrefix + "0123", + }, + } + + // Set up the node group association in cache + ng := newTestNodeGroup(t, "b2-7") + provider.manager.NodeGroupPerProviderID[node.Spec.ProviderID] = ng + defer func() { + provider.manager.NodeGroupPerProviderID = make(map[string]*NodeGroup) + }() + + group, err := provider.NodeGroupForNode(node) + assert.NoError(t, err) + assert.NotNil(t, group) + + assert.Equal(t, ng.Name, group.Id()) + assert.Equal(t, ng.MinNodes, uint32(group.MinSize())) + assert.Equal(t, ng.MaxNodes, uint32(group.MaxSize())) + }) + + t.Run("find node group with label on node", func(t *testing.T) { node := &apiv1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "node-1", @@ -152,8 +197,53 @@ func TestOVHCloudProvider_NodeGroupForNode(t *testing.T) { "nodepool": "pool-1", }, }, + Spec: apiv1.NodeSpec{ + ProviderID: providerIDPrefix + "0123", + }, + } + + group, err := provider.NodeGroupForNode(node) + assert.NoError(t, err) + assert.NotNil(t, group) + + assert.Equal(t, "pool-1", group.Id()) + assert.Equal(t, 1, group.MinSize()) + assert.Equal(t, 5, group.MaxSize()) + }) + + t.Run("find node group by listing nodes", func(t *testing.T) { + node := &apiv1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, + Spec: apiv1.NodeSpec{ + ProviderID: providerIDPrefix + "0123", + }, } + // Mock the list nodes api call + ListNodePoolNodesCall1.Return( + []sdk.Node{ + { + Name: "node-0", + InstanceID: "0", + }, + { + Name: "node-1", + InstanceID: "0123", // This corresponds to the node providerID we need + }, + { + Name: "node-2", + InstanceID: "2", + }, + }, nil, + ) + + // Purge the node group associations cache afterwards for the following tests + defer func() { + provider.manager.NodeGroupPerProviderID = make(map[string]*NodeGroup) + }() + group, err := provider.NodeGroupForNode(node) assert.NoError(t, err) assert.NotNil(t, group) @@ -163,7 +253,19 @@ func TestOVHCloudProvider_NodeGroupForNode(t *testing.T) { assert.Equal(t, 5, group.MaxSize()) }) - t.Run("check node group empty if label not exists", func(t *testing.T) { + t.Run("fail to find node group with empty providerID", func(t *testing.T) { + node := &apiv1.Node{ + Spec: apiv1.NodeSpec{ + ProviderID: providerIDPrefix, + }, + } + + group, err := provider.NodeGroupForNode(node) + assert.NoError(t, err) + assert.Nil(t, group) + }) + + t.Run("fail to find node group with incorrect label", func(t *testing.T) { node := &apiv1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "node-1", @@ -171,21 +273,43 @@ func TestOVHCloudProvider_NodeGroupForNode(t *testing.T) { "nodepool": "pool-unknown", }, }, + Spec: apiv1.NodeSpec{ + ProviderID: providerIDPrefix + "0123", + }, } + // Mock the list nodes api call + ListNodePoolNodesCall1.Return( + []sdk.Node{}, nil, + ) + ListNodePoolNodesCall2.Return( + []sdk.Node{}, nil, + ) + group, err := provider.NodeGroupForNode(node) assert.NoError(t, err) assert.Nil(t, group) }) - t.Run("check node group empty if label not present", func(t *testing.T) { + t.Run("fail to find node group with no cache, no label and no result in API call", func(t *testing.T) { node := &apiv1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "node-1", Labels: map[string]string{}, }, + Spec: apiv1.NodeSpec{ + ProviderID: providerIDPrefix + "0123", + }, } + // Mock the list nodes api call + ListNodePoolNodesCall1.Return( + []sdk.Node{}, nil, + ).Once() + ListNodePoolNodesCall2.Return( + []sdk.Node{}, nil, + ).Once() + group, err := provider.NodeGroupForNode(node) assert.NoError(t, err) assert.Nil(t, group) @@ -288,6 +412,6 @@ func TestOVHCloudProvider_Refresh(t *testing.T) { assert.NoError(t, err) groups = provider.NodeGroups() - assert.Equal(t, 1, len(groups)) + assert.Equal(t, 2, len(groups)) }) } diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/consumer_key.go b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/consumer_key.go index 9f2b47e7b1d3..e013047bc3c8 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/consumer_key.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/consumer_key.go @@ -119,7 +119,7 @@ func (ck *CkRequest) AddRecursiveRules(methods []string, path string) { // and return the URL the user needs to visit to validate the key func (ck *CkRequest) Do() (*CkValidationState, error) { state := CkValidationState{} - err := ck.client.PostUnAuth("/auth/credential", ck, &state) + err := ck.client.PostUnAuth("/auth/credential", ck, &state, nil) if err == nil { ck.client.ConsumerKey = state.ConsumerKey diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/flavor.go b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/flavor.go index 11b9a42c149d..a797a5aae3ab 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/flavor.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/flavor.go @@ -26,10 +26,13 @@ type Flavor struct { Name string `json:"name"` Category string `json:"category"` State string `json:"state"` + VCPUs int `json:"vCPUs"` + GPUs int `json:"gpus"` + RAM int `json:"ram"` } -// ListFlavors allows to display flavors available for nodes templates -func (c *Client) ListFlavors(ctx context.Context, projectID string, clusterID string) ([]Flavor, error) { +// ListClusterFlavors allows to display flavors available for nodes templates +func (c *Client) ListClusterFlavors(ctx context.Context, projectID string, clusterID string) ([]Flavor, error) { flavors := make([]Flavor, 0) return flavors, c.CallAPIWithContext( @@ -39,6 +42,7 @@ func (c *Client) ListFlavors(ctx context.Context, projectID string, clusterID st nil, &flavors, nil, + nil, true, ) } diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/mock.go b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/mock.go index a21403fe2fb6..a59c36007c36 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/mock.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/mock.go @@ -62,8 +62,8 @@ func (m *ClientMock) DeleteNodePool(ctx context.Context, projectID string, clust return args.Get(0).(*NodePool), args.Error(1) } -// ListFlavors mocks API call for listing available flavors in cluster -func (m *ClientMock) ListFlavors(ctx context.Context, projectID string, clusterID string) ([]Flavor, error) { +// ListClusterFlavors mocks API call for listing available flavors in cluster +func (m *ClientMock) ListClusterFlavors(ctx context.Context, projectID string, clusterID string) ([]Flavor, error) { args := m.Called(ctx, projectID, clusterID) return args.Get(0).([]Flavor), args.Error(1) diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/nodepool.go b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/nodepool.go index 02e0b440c0fe..d9f935cb062e 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/nodepool.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/nodepool.go @@ -74,6 +74,7 @@ func (c *Client) ListNodePools(ctx context.Context, projectID, clusterID string) nil, &nodepools, nil, + nil, true, ) } @@ -89,6 +90,7 @@ func (c *Client) GetNodePool(ctx context.Context, projectID string, clusterID st nil, &nodepool, nil, + nil, true, ) } @@ -104,6 +106,7 @@ func (c *Client) ListNodePoolNodes(ctx context.Context, projectID string, cluste nil, &nodes, nil, + nil, true, ) } @@ -133,6 +136,7 @@ func (c *Client) CreateNodePool(ctx context.Context, projectID string, clusterID opts, &nodepool, nil, + nil, true, ) } @@ -159,6 +163,7 @@ func (c *Client) UpdateNodePool(ctx context.Context, projectID string, clusterID opts, &nodepool, nil, + nil, true, ) } @@ -174,6 +179,7 @@ func (c *Client) DeleteNodePool(ctx context.Context, projectID string, clusterID nil, &nodepool, nil, + nil, true, ) } diff --git a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/ovh.go b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/ovh.go index cccc8d2a2384..0ab335df1308 100644 --- a/cluster-autoscaler/cloudprovider/ovhcloud/sdk/ovh.go +++ b/cluster-autoscaler/cloudprovider/ovhcloud/sdk/ovh.go @@ -25,6 +25,7 @@ import ( "fmt" "io/ioutil" "net/http" + "net/url" "strconv" "strings" "sync" @@ -176,83 +177,83 @@ func (c *Client) Time() (*time.Time, error) { // // Get is a wrapper for the GET method -func (c *Client) Get(url string, resType interface{}) error { - return c.CallAPI("GET", url, nil, resType, true) +func (c *Client) Get(url string, result interface{}, queryParams url.Values) error { + return c.CallAPI("GET", url, nil, result, queryParams, true) } // GetUnAuth is a wrapper for the unauthenticated GET method -func (c *Client) GetUnAuth(url string, resType interface{}) error { - return c.CallAPI("GET", url, nil, resType, false) +func (c *Client) GetUnAuth(url string, result interface{}, queryParams url.Values) error { + return c.CallAPI("GET", url, nil, result, queryParams, false) } // Post is a wrapper for the POST method -func (c *Client) Post(url string, reqBody, resType interface{}) error { - return c.CallAPI("POST", url, reqBody, resType, true) +func (c *Client) Post(url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPI("POST", url, reqBody, result, queryParams, true) } // PostUnAuth is a wrapper for the unauthenticated POST method -func (c *Client) PostUnAuth(url string, reqBody, resType interface{}) error { - return c.CallAPI("POST", url, reqBody, resType, false) +func (c *Client) PostUnAuth(url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPI("POST", url, reqBody, result, queryParams, false) } // Put is a wrapper for the PUT method -func (c *Client) Put(url string, reqBody, resType interface{}) error { - return c.CallAPI("PUT", url, reqBody, resType, true) +func (c *Client) Put(url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPI("PUT", url, reqBody, result, queryParams, true) } // PutUnAuth is a wrapper for the unauthenticated PUT method -func (c *Client) PutUnAuth(url string, reqBody, resType interface{}) error { - return c.CallAPI("PUT", url, reqBody, resType, false) +func (c *Client) PutUnAuth(url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPI("PUT", url, reqBody, result, queryParams, false) } // Delete is a wrapper for the DELETE method -func (c *Client) Delete(url string, resType interface{}) error { - return c.CallAPI("DELETE", url, nil, resType, true) +func (c *Client) Delete(url string, result interface{}, queryParams url.Values) error { + return c.CallAPI("DELETE", url, nil, result, queryParams, true) } // DeleteUnAuth is a wrapper for the unauthenticated DELETE method -func (c *Client) DeleteUnAuth(url string, resType interface{}) error { - return c.CallAPI("DELETE", url, nil, resType, false) +func (c *Client) DeleteUnAuth(url string, result interface{}, queryParams url.Values) error { + return c.CallAPI("DELETE", url, nil, result, queryParams, false) } // GetWithContext is a wrapper for the GET method -func (c *Client) GetWithContext(ctx context.Context, url string, resType interface{}) error { - return c.CallAPIWithContext(ctx, "GET", url, nil, resType, nil, true) +func (c *Client) GetWithContext(ctx context.Context, url string, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "GET", url, nil, result, queryParams, nil, true) } // GetUnAuthWithContext is a wrapper for the unauthenticated GET method -func (c *Client) GetUnAuthWithContext(ctx context.Context, url string, resType interface{}) error { - return c.CallAPIWithContext(ctx, "GET", url, nil, resType, nil, false) +func (c *Client) GetUnAuthWithContext(ctx context.Context, url string, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "GET", url, nil, result, queryParams, nil, false) } // PostWithContext is a wrapper for the POST method -func (c *Client) PostWithContext(ctx context.Context, url string, reqBody, resType interface{}) error { - return c.CallAPIWithContext(ctx, "POST", url, reqBody, resType, nil, true) +func (c *Client) PostWithContext(ctx context.Context, url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "POST", url, reqBody, result, queryParams, nil, true) } // PostUnAuthWithContext is a wrapper for the unauthenticated POST method -func (c *Client) PostUnAuthWithContext(ctx context.Context, url string, reqBody, resType interface{}) error { - return c.CallAPIWithContext(ctx, "POST", url, reqBody, resType, nil, false) +func (c *Client) PostUnAuthWithContext(ctx context.Context, url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "POST", url, reqBody, result, queryParams, nil, false) } // PutWithContext is a wrapper for the PUT method -func (c *Client) PutWithContext(ctx context.Context, url string, reqBody, resType interface{}) error { - return c.CallAPIWithContext(ctx, "PUT", url, reqBody, resType, nil, true) +func (c *Client) PutWithContext(ctx context.Context, url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "PUT", url, reqBody, result, queryParams, nil, true) } // PutUnAuthWithContext is a wrapper for the unauthenticated PUT method -func (c *Client) PutUnAuthWithContext(ctx context.Context, url string, reqBody, resType interface{}) error { - return c.CallAPIWithContext(ctx, "PUT", url, reqBody, resType, nil, false) +func (c *Client) PutUnAuthWithContext(ctx context.Context, url string, reqBody, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "PUT", url, reqBody, result, queryParams, nil, false) } // DeleteWithContext is a wrapper for the DELETE method -func (c *Client) DeleteWithContext(ctx context.Context, url string, resType interface{}) error { - return c.CallAPIWithContext(ctx, "DELETE", url, nil, resType, nil, true) +func (c *Client) DeleteWithContext(ctx context.Context, url string, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "DELETE", url, nil, result, queryParams, nil, true) } // DeleteUnAuthWithContext is a wrapper for the unauthenticated DELETE method -func (c *Client) DeleteUnAuthWithContext(ctx context.Context, url string, resType interface{}) error { - return c.CallAPIWithContext(ctx, "DELETE", url, nil, resType, nil, false) +func (c *Client) DeleteUnAuthWithContext(ctx context.Context, url string, result interface{}, queryParams url.Values) error { + return c.CallAPIWithContext(ctx, "DELETE", url, nil, result, queryParams, nil, false) } // timeDelta returns the time delta between the host and the remote API @@ -284,7 +285,7 @@ func (c *Client) getTimeDelta() (time.Duration, error) { func (c *Client) getTime() (*time.Time, error) { var timestamp int64 - err := c.GetUnAuth("/auth/time", ×tamp) + err := c.GetUnAuth("/auth/time", ×tamp, nil) if err != nil { return nil, err } @@ -306,7 +307,7 @@ var getEndpointForSignature = func(c *Client) string { } // NewRequest returns a new HTTP request -func (c *Client) NewRequest(method, path string, reqBody interface{}, headers map[string]interface{}, needAuth bool) (*http.Request, error) { +func (c *Client) NewRequest(method, path string, reqBody interface{}, queryParams url.Values, headers map[string]interface{}, needAuth bool) (*http.Request, error) { var body []byte var err error @@ -402,10 +403,10 @@ func (c *Client) Do(req *http.Request) (*http.Response, error) { // argument is not nil, it will also serialize it as json and inject // the required Content-Type header. // -// If everything went fine, unmarshall response into resType and return nil +// If everything went fine, unmarshall response into result and return nil // otherwise, return the error -func (c *Client) CallAPI(method, path string, reqBody, resType interface{}, needAuth bool) error { - return c.CallAPIWithContext(context.Background(), method, path, reqBody, resType, nil, needAuth) +func (c *Client) CallAPI(method, path string, reqBody, result interface{}, queryParams url.Values, needAuth bool) error { + return c.CallAPIWithContext(context.Background(), method, path, reqBody, result, queryParams, nil, needAuth) } // CallAPIWithContext is the lowest level call helper. If needAuth is true, @@ -426,10 +427,10 @@ func (c *Client) CallAPI(method, path string, reqBody, resType interface{}, need // argument is not nil, it will also serialize it as json and inject // the required Content-Type header. // -// If everything went fine, unmarshall response into resType and return nil +// If everything went fine, unmarshall response into result and return nil // otherwise, return the error -func (c *Client) CallAPIWithContext(ctx context.Context, method, path string, reqBody, resType interface{}, headers map[string]interface{}, needAuth bool) error { - req, err := c.NewRequest(method, path, reqBody, headers, needAuth) +func (c *Client) CallAPIWithContext(ctx context.Context, method, path string, reqBody, result interface{}, queryParams url.Values, headers map[string]interface{}, needAuth bool) error { + req, err := c.NewRequest(method, path, reqBody, queryParams, headers, needAuth) if err != nil { return err } @@ -438,12 +439,12 @@ func (c *Client) CallAPIWithContext(ctx context.Context, method, path string, re if err != nil { return err } - return c.UnmarshalResponse(response, resType) + return c.UnmarshalResponse(response, result) } // UnmarshalResponse checks the response and unmarshals it into the response // type if needed Helper function, called from CallAPI -func (c *Client) UnmarshalResponse(response *http.Response, resType interface{}) error { +func (c *Client) UnmarshalResponse(response *http.Response, result interface{}) error { // Read all the response body defer response.Body.Close() body, err := ioutil.ReadAll(response.Body) @@ -463,9 +464,9 @@ func (c *Client) UnmarshalResponse(response *http.Response, resType interface{}) } // Nothing to unmarshal - if len(body) == 0 || resType == nil { + if len(body) == 0 || result == nil { return nil } - return json.Unmarshal(body, &resType) + return json.Unmarshal(body, &result) }