diff --git a/cluster-autoscaler/README.md b/cluster-autoscaler/README.md index ac996cd63659..3902cd36a743 100644 --- a/cluster-autoscaler/README.md +++ b/cluster-autoscaler/README.md @@ -20,12 +20,13 @@ You should also take a look at the notes and "gotchas" for your specific cloud p * [CloudStack](./cloudprovider/cloudstack/README.md) * [HuaweiCloud](./cloudprovider/huaweicloud/README.md) * [Hetzner](./cloudprovider/hetzner/README.md) -* [Packet](./cloudprovider/packet/README.md#notes) +* [Packet](./cloudprovider/packet/README.md#notes) * [IonosCloud](./cloudprovider/ionoscloud/README.md) * [OVHcloud](./cloudprovider/ovhcloud/README.md) * [Linode](./cloudprovider/linode/README.md) * [ClusterAPI](./cloudprovider/clusterapi/README.md) * [BizflyCloud](./cloudprovider/bizflycloud/README.md) +* [Rancher](./cloudprovider/rancher/README.md) # Releases @@ -159,3 +160,4 @@ Supported cloud providers: * Linode https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/linode/README.md * Hetzner https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/hetzner/README.md * Cluster API https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/clusterapi/README.md +* Rancher https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/rancher/README.md diff --git a/cluster-autoscaler/cloudprovider/builder/builder_all.go b/cluster-autoscaler/cloudprovider/builder/builder_all.go index a10526145425..e7f9fc04395b 100644 --- a/cluster-autoscaler/cloudprovider/builder/builder_all.go +++ b/cluster-autoscaler/cloudprovider/builder/builder_all.go @@ -1,4 +1,4 @@ -// +build !gce,!aws,!azure,!kubemark,!alicloud,!magnum,!digitalocean,!clusterapi,!huaweicloud,!ionoscloud,!linode,!hetzner,!bizflycloud +// +build !gce,!aws,!azure,!kubemark,!alicloud,!magnum,!digitalocean,!clusterapi,!huaweicloud,!ionoscloud,!linode,!hetzner,!bizflycloud,!rancher /* Copyright 2018 The Kubernetes Authors. @@ -37,6 +37,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/magnum" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/packet" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/rancher" "k8s.io/autoscaler/cluster-autoscaler/config" ) @@ -58,6 +59,7 @@ var AvailableCloudProviders = []string{ cloudprovider.IonoscloudProviderName, cloudprovider.LinodeProviderName, cloudprovider.BizflyCloudProviderName, + cloudprovider.RancherProviderName, } // DefaultCloudProvider is GCE. @@ -99,6 +101,8 @@ func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGro return ionoscloud.BuildIonosCloud(opts, do, rl) case cloudprovider.LinodeProviderName: return linode.BuildLinode(opts, do, rl) + case cloudprovider.RancherProviderName: + return rancher.BuildRancher(opts, do, rl) } return nil } diff --git a/cluster-autoscaler/cloudprovider/builder/builder_rancher.go b/cluster-autoscaler/cloudprovider/builder/builder_rancher.go new file mode 100644 index 000000000000..91deb82d31af --- /dev/null +++ b/cluster-autoscaler/cloudprovider/builder/builder_rancher.go @@ -0,0 +1,42 @@ +// +build rancher + +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package builder + +import ( + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/rancher" + "k8s.io/autoscaler/cluster-autoscaler/config" +) + +// AvailableCloudProviders supported by the rancher provider builder. +var AvailableCloudProviders = []string{ + cloudprovider.RancherProviderName, +} + +// DefaultCloudProvider for do-only build is rancher. +const DefaultCloudProvider = cloudprovider.RancherProviderName + +func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { + switch opts.CloudProviderName { + case cloudprovider.RancherProviderName: + return rancher.BuildRancher(opts, do, rl) + } + + return nil +} diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go index c53937da2e78..3cd4364a78a0 100644 --- a/cluster-autoscaler/cloudprovider/cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/cloud_provider.go @@ -62,6 +62,8 @@ const ( OVHcloudProviderName = "ovhcloud" // LinodeProviderName gets the provider name of linode LinodeProviderName = "linode" + // RancherProviderName gets the provider name of rancher + RancherProviderName = "rancher" ) // CloudProvider contains configuration info and functions for interacting with diff --git a/cluster-autoscaler/cloudprovider/rancher/README.md b/cluster-autoscaler/cloudprovider/rancher/README.md new file mode 100644 index 000000000000..8066525f5302 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/rancher/README.md @@ -0,0 +1,54 @@ +# Cluster Autoscaler for Rancher + +The cluster autoscaler for Rancher scales nodes within any specified Rancher Kubernetes Engine cluster's node pool. + +# Requirements + +Rancher version >= 2.5.6 + +# Configuration + +The cluster-autoscaler for Rancher needs a configuration file to work by using --cloud-config parameter. + +Here an [example](examples/autoscaler-config-example.yaml). + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: cluster-autoscaler-cloud-config + namespace: kube-system +type: Opaque +stringData: + cloud-config: |- + [Global] + url=https://rancherapi.com/v3 + access=your-token + secret=your-secret + cluster-id=c-abcdef + autoscaler_node_arg: "2:6:c-abcdef:np-abcde" # Your NodePool ID +``` + +You have to create a new API Key from your Rancher Dashboard to get the `access` and `secret` values to use the Autoscaler. + +# Development + +Make sure you're inside the root path of the [autoscaler repository](https://github.com/kubernetes/autoscaler) + +1.) Build the `cluster-autoscaler` binary: + +``` +make build-in-docker +``` + +2.) Build the docker image: + +``` +docker build -t rancher/cluster-autoscaler:dev . +``` + +3.) Push the docker image to Docker hub: + +``` +docker push rancher/cluster-autoscaler:dev +``` diff --git a/cluster-autoscaler/cloudprovider/rancher/examples/autoscaler-config-example.yaml b/cluster-autoscaler/cloudprovider/rancher/examples/autoscaler-config-example.yaml new file mode 100644 index 000000000000..ebe257fed332 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/rancher/examples/autoscaler-config-example.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Secret +metadata: + name: cluster-autoscaler-cloud-config + namespace: kube-system +type: Opaque +stringData: + cloud-config: |- + [Global] + url=https://rancherapi.com/v3 + access=token-abcdef + secret=ksjdhfiusdhfkjsdfhisudhfnskjdfhskjdfhksdjfhksjdfhksdjf + cluster-id=c-abcdef + autoscaler_node_arg: "2:6:c-abcdef:np-abcde" \ No newline at end of file diff --git a/cluster-autoscaler/cloudprovider/rancher/examples/autoscaler-noconfig-template.txt b/cluster-autoscaler/cloudprovider/rancher/examples/autoscaler-noconfig-template.txt new file mode 100644 index 000000000000..b9f7f4516b35 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/rancher/examples/autoscaler-noconfig-template.txt @@ -0,0 +1,118 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-autoscaler-role +rules: + - apiGroups: [""] + resources: ["events", "endpoints"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] + - apiGroups: [""] + resources: ["pods/status"] + verbs: ["update"] + - apiGroups: [""] + resources: ["endpoints"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["watch", "list", "get", "update"] + - apiGroups: [""] + resources: + - "pods" + - "services" + - "replicationcontrollers" + - "persistentvolumeclaims" + - "persistentvolumes" + verbs: ["watch", "list", "get"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["watch", "list", "get"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["watch", "list"] + - apiGroups: ["apps"] + resources: ["daemonsets", "replicasets", "statefulsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses", "csinodes"] + verbs: ["watch", "list", "get"] + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create","list","watch"] + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] + verbs: ["delete", "get", "update"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["watch", "list", "get", "create", "update", "patch", "delete", "deletecollection"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cluster-autoscaler-rolebinding + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler-role +subjects: + - kind: ServiceAccount + name: cluster-autoscaler-account + namespace: kube-system + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cluster-autoscaler-account + namespace: kube-system + +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + app: cluster-autoscaler +spec: + replicas: 1 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + namespace: kube-system + labels: + app: cluster-autoscaler + spec: + serviceAccountName: cluster-autoscaler-account + containers: + - name: cluster-autoscaler + image: us.gcr.io/gns-hosting-dev/cluster-autoscaler-amd64:{{ env "TAG" }} + imagePullPolicy: Always + command: + - ./cluster-autoscaler + - --alsologtostderr + - --cloud-config=/config/cloud-config + - --cloud-provider=rancher + - --nodes=$(NODE_ARG) + volumeMounts: + - name: cloud-config + mountPath: /config + readOnly: true + env: + - name: NODE_ARG + valueFrom: + secretKeyRef: + name: cluster-autoscaler-cloud-config + key: autoscaler_node_arg + volumes: + - name: cloud-config + secret: + secretName: cluster-autoscaler-cloud-config diff --git a/cluster-autoscaler/cloudprovider/rancher/examples/autoscaler-noconfig.yaml b/cluster-autoscaler/cloudprovider/rancher/examples/autoscaler-noconfig.yaml new file mode 100644 index 000000000000..c0afb496e538 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/rancher/examples/autoscaler-noconfig.yaml @@ -0,0 +1,118 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-autoscaler-role +rules: + - apiGroups: [""] + resources: ["events", "endpoints"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] + - apiGroups: [""] + resources: ["pods/status"] + verbs: ["update"] + - apiGroups: [""] + resources: ["endpoints"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["watch", "list", "get", "update"] + - apiGroups: [""] + resources: + - "pods" + - "services" + - "replicationcontrollers" + - "persistentvolumeclaims" + - "persistentvolumes" + verbs: ["watch", "list", "get"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["watch", "list", "get"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["watch", "list"] + - apiGroups: ["apps"] + resources: ["daemonsets", "replicasets", "statefulsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses", "csinodes"] + verbs: ["watch", "list", "get"] + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create","list","watch"] + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] + verbs: ["delete", "get", "update"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["watch", "list", "get", "create", "update", "patch", "delete", "deletecollection"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cluster-autoscaler-rolebinding + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler-role +subjects: + - kind: ServiceAccount + name: cluster-autoscaler-account + namespace: kube-system + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cluster-autoscaler-account + namespace: kube-system + +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + app: cluster-autoscaler +spec: + replicas: 1 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + namespace: kube-system + labels: + app: cluster-autoscaler + spec: + serviceAccountName: cluster-autoscaler-account + containers: + - name: cluster-autoscaler + image: cluster-autoscaler:latest + imagePullPolicy: Always + command: + - ./cluster-autoscaler + - --alsologtostderr + - --cloud-config=/config/cloud-config + - --cloud-provider=rancher + - --nodes=$(NODE_ARG) + volumeMounts: + - name: cloud-config + mountPath: /config + readOnly: true + env: + - name: NODE_ARG + valueFrom: + secretKeyRef: + name: cluster-autoscaler-cloud-config + key: autoscaler_node_arg + volumes: + - name: cloud-config + secret: + secretName: cluster-autoscaler-cloud-config diff --git a/cluster-autoscaler/cloudprovider/rancher/rancher/client.go b/cluster-autoscaler/cloudprovider/rancher/rancher/client.go new file mode 100644 index 000000000000..8b39f072e6f7 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/rancher/rancher/client.go @@ -0,0 +1,255 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rancher + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "os" +) + +// Cluster represents a Rancher cluster. +type Cluster struct { + ID string `json:"id"` + Name string `json:"name"` +} + +// NodePool represents a Rancher nodePool. +type NodePool struct { + ID string `json:"id"` + Name string `json:"name"` + ClusterID string `json:"clusterId"` + ControlPlane bool `json:"controlPlane"` + Etcd bool `json:"etcd"` + Worker bool `json:"worker"` + Quantity int `json:"quantity"` + State string `json:"state"` + DrainBeforeDelete bool `json:"drainBeforeDelete"` +} + +type nodePoolResponse struct { + Data []NodePool `json:"data"` +} + +// Node represents a Rancher node. +type Node struct { + ID string `json:"id"` + Name string `json:"nodeName"` + ProviderID string `json:"providerId"` + ClusterID string `json:"clusterId"` + NodePoolID string `json:"nodePoolId"` + State string `json:"state"` +} + +type nodeResponse struct { + Data []Node `json:"data"` +} + +// Client is an HTTP client for RancherAPI. +type Client struct { + url string + token string + cli *http.Client +} + +// New returns a new client for rancher cli. +func New(url, token string) *Client { + c := http.DefaultClient + if os.Getenv("AUTOSCALER_HTTP_DEBUG") != "" { + c.Transport = loggerTransport + } + + return &Client{ + url: url, + token: token, + cli: c, + } +} + +// ClusterByID returns the cluster by id +func (c *Client) ClusterByID(id string) (*Cluster, error) { + resp, err := c.doRequest(http.MethodGet, fmt.Sprintf("%s/clusters/%s", c.url, id), nil, nil) + if err != nil { + return nil, err + } + + var np Cluster + if err := json.Unmarshal(resp, &np); err != nil { + return nil, err + } + + return &np, err +} + +// ResizeNodePool resizes the selected nodePool +func (c *Client) ResizeNodePool(id string, size int) (*NodePool, error) { + url := fmt.Sprintf("%s/nodePools/%s", c.url, id) + reqBody, err := json.Marshal(map[string]interface{}{ + "quantity": size, + }) + if err != nil { + return nil, err + } + + resp, err := c.doRequest(http.MethodPut, url, bytes.NewBuffer(reqBody), nil) + if err != nil { + return nil, err + } + + var np NodePool + if err := json.Unmarshal(resp, &np); err != nil { + return nil, err + } + + return &np, nil +} + +// NodePoolsByCluster returns all node pools by cluster. +func (c *Client) NodePoolsByCluster(clusterID string) ([]NodePool, error) { + url := fmt.Sprintf("%s/clusters/%s/nodepools", c.url, clusterID) + resp, err := c.doRequest(http.MethodGet, url, nil, nil) + if err != nil { + return nil, err + } + + var body nodePoolResponse + if err := json.Unmarshal(resp, &body); err != nil { + return nil, err + } + + if len(body.Data) == 0 { + return nil, fmt.Errorf("clusterID: %q does not have nodePools", clusterID) + } + + return body.Data, nil +} + +// ScaleDownNode deletes the specific node and scale down the node pool. +func (c *Client) ScaleDownNode(nodeID string) error { + url := fmt.Sprintf("%s/nodes/%s?action=scaledown", c.url, nodeID) + _, err := c.doRequest(http.MethodPost, url, nil, nil) + return err +} + +// NodePoolByID returns a node pool by id. +func (c *Client) NodePoolByID(id string) (*NodePool, error) { + url := fmt.Sprintf("%s/nodePools/%s", c.url, id) + resp, err := c.doRequest(http.MethodGet, url, nil, nil) + if err != nil { + return nil, err + } + + var np NodePool + if err := json.Unmarshal(resp, &np); err != nil { + return nil, err + } + + return &np, nil +} + +// NodesByNodePool returns all the nodes in a node pool. +func (c *Client) NodesByNodePool(nodePoolID string) ([]Node, error) { + return c.nodesByFilters(map[string]string{"nodePoolId": nodePoolID}) +} + +// NodeByProviderID returns a node by providerID +func (c *Client) NodeByProviderID(providerID string) (*Node, error) { + nodes, err := c.nodesByFilters(map[string]string{"providerId": providerID}) + if err != nil { + return nil, err + } + + if len(nodes) == 0 { + return nil, fmt.Errorf("nodeID: %q does not exist", providerID) + } + + return &nodes[0], nil +} + +// NodeByNameAndCluster returns the node that match name and cluster +func (c *Client) NodeByNameAndCluster(name, cluster string) (*Node, error) { + nodes, err := c.nodesByFilters(map[string]string{"name": name, "clusterId": cluster}) + if err != nil { + return nil, err + } + + if len(nodes) == 0 { + return nil, fmt.Errorf("node: %q for cluster: %q does not exist", name, cluster) + } + + return &nodes[0], nil +} + +func (c *Client) nodesByFilters(filters map[string]string) ([]Node, error) { + url := fmt.Sprintf("%s/nodes", c.url) + resp, err := c.doRequest(http.MethodGet, url, nil, filters) + if err != nil { + return nil, err + } + + var body nodeResponse + if err := json.Unmarshal(resp, &body); err != nil { + return nil, err + } + + return body.Data, nil +} + +func (c *Client) doRequest(verb, url string, body io.Reader, params map[string]string) ([]byte, error) { + req, err := http.NewRequest(verb, url, body) + if err != nil { + return nil, err + } + + req.Header.Add("Authorization", fmt.Sprintf("bearer %s", c.token)) + req.Header.Add("Content-Type", "application/json") + if len(params) > 0 { + q := req.URL.Query() + for k, v := range params { + q.Add(k, v) + } + req.URL.RawQuery = q.Encode() + } + + resp, err := c.cli.Do(req) + if err != nil { + return nil, err + } + + defer resp.Body.Close() + var b bytes.Buffer + if _, err := io.Copy(&b, resp.Body); err != nil { + return nil, err + } + + switch resp.StatusCode { + case http.StatusOK, http.StatusCreated, http.StatusAccepted, http.StatusNoContent: + case http.StatusUnauthorized: + return nil, fmt.Errorf("rancher: authentication failed with credentials provided") + case http.StatusForbidden: + return nil, fmt.Errorf("rancher: %s is forbidden", url) + case http.StatusNotFound: + return nil, fmt.Errorf("rancher: %s resource not found", url) + default: + return nil, fmt.Errorf("rancher: %s invalid status code %d", url, resp.StatusCode) + } + + return b.Bytes(), nil +} diff --git a/cluster-autoscaler/cloudprovider/rancher/rancher/client_test.go b/cluster-autoscaler/cloudprovider/rancher/rancher/client_test.go new file mode 100644 index 000000000000..64e993ff1202 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/rancher/rancher/client_test.go @@ -0,0 +1,226 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rancher + +import ( + "fmt" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestClusterByID(t *testing.T) { + clusterID := "c-lkf2d" + t.Run("success", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + response := ` + { + "id": "c-lkf2d", + "name": "c-lkf2d" + } + ` + fmt.Fprintln(w, response) + })) + + defer ts.Close() + cli := New(ts.URL, "") + cluster, err := cli.ClusterByID(clusterID) + assert.NoError(t, err, "unexpected error") + assert.Equal(t, cluster.ID, clusterID) + }) + + t.Run("failed", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + })) + + defer ts.Close() + cli := New(ts.URL, "") + _, err := cli.ClusterByID(clusterID) + assert.Error(t, err) + }) +} + +func TestResizeNodePool(t *testing.T) { + t.Run("success", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + response := ` + { + "id": "c-wxas29:master", + "quantity": 2 + } + ` + fmt.Fprintln(w, response) + })) + + defer ts.Close() + cli := New(ts.URL, "") + qty := 2 + np, err := cli.ResizeNodePool("asd", qty) + assert.NoError(t, err) + assert.Equal(t, np.Quantity, qty) + }) + + t.Run("failed", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + })) + + defer ts.Close() + cli := New(ts.URL, "") + _, err := cli.ResizeNodePool("asd", 2) + assert.Error(t, err) + }) +} + +func TestNodePoolsByCluster(t *testing.T) { + t.Run("success", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + response := ` + { + "data": [{"id":"nodepool1"},{"id":"nodepool2"}] + } + ` + fmt.Fprintln(w, response) + })) + + defer ts.Close() + cli := New(ts.URL, "") + nps, err := cli.NodePoolsByCluster("cluster1") + assert.NoError(t, err) + assert.Equal(t, len(nps), 2) + }) + + t.Run("failed", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + response := ` + { + "data": [] + } + ` + fmt.Fprintln(w, response) + })) + + defer ts.Close() + cli := New(ts.URL, "") + nps, err := cli.NodePoolsByCluster("cluster1") + assert.Error(t, err) + assert.Equal(t, len(nps), 0) + }) +} + +func TestNodePoolByID(t *testing.T) { + t.Run("success", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + response := ` + { + "id": "nodepool1" + } + ` + fmt.Fprintln(w, response) + })) + + defer ts.Close() + cli := New(ts.URL, "") + np, err := cli.NodePoolByID("nodepool1") + assert.NoError(t, err) + assert.Equal(t, np.ID, "nodepool1") + }) + + t.Run("failed", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + })) + + defer ts.Close() + cli := New(ts.URL, "") + _, err := cli.NodePoolByID("nodepool1") + assert.Error(t, err) + }) +} + +func TestNodeByProviderID(t *testing.T) { + t.Run("success", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + response := ` + { + "data": [{"id": "node1"}] + } + ` + fmt.Fprintln(w, response) + })) + + defer ts.Close() + cli := New(ts.URL, "") + n, err := cli.NodeByProviderID("gce_uuid") + assert.NoError(t, err) + assert.Equal(t, n.ID, "node1") + }) + + t.Run("failed", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + response := ` + { + "data": [] + } + ` + fmt.Fprintln(w, response) + })) + + defer ts.Close() + cli := New(ts.URL, "") + _, err := cli.NodeByProviderID("gce_uuid") + assert.Error(t, err) + }) +} + +func Test_doRequest(t *testing.T) { + t.Run("failed with 401", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusUnauthorized) + })) + + defer ts.Close() + cli := New(ts.URL, "") + _, err := cli.doRequest("GET", ts.URL, nil, nil) + assert.Error(t, err) + }) + + t.Run("failed with 403", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusForbidden) + })) + + defer ts.Close() + cli := New(ts.URL, "") + _, err := cli.doRequest("GET", ts.URL, nil, nil) + assert.Error(t, err) + }) + + t.Run("failed with 404", func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + })) + + defer ts.Close() + cli := New(ts.URL, "") + _, err := cli.doRequest("GET", ts.URL, nil, nil) + assert.Error(t, err) + }) +} diff --git a/cluster-autoscaler/cloudprovider/rancher/rancher/roundtrip_logger.go b/cluster-autoscaler/cloudprovider/rancher/rancher/roundtrip_logger.go new file mode 100644 index 000000000000..ef216712f170 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/rancher/rancher/roundtrip_logger.go @@ -0,0 +1,81 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rancher + +import ( + "log" + "net/http" + "net/http/httputil" +) + +type transport struct { + Transport http.RoundTripper + LogRequest func(req *http.Request) + LogResponse func(resp *http.Response) +} + +var loggerTransport = &transport{ + Transport: http.DefaultTransport, +} + +// DefaultLogRequest is used if transport.LogRequest is not set. +var DefaultLogRequest = func(req *http.Request) { + a, _ := httputil.DumpRequestOut(req, true) + log.Printf("-->\n%s", a) +} + +// DefaultLogResponse is used if transport.LogResponse is not set. +var DefaultLogResponse = func(resp *http.Response) { + a, _ := httputil.DumpResponse(resp, true) + log.Printf("<--\n%s", a) +} + +func (t *transport) RoundTrip(req *http.Request) (*http.Response, error) { + t.logRequest(req) + + resp, err := t.transport().RoundTrip(req) + if err != nil { + return resp, err + } + + t.logResponse(resp) + return resp, err +} + +func (t *transport) logRequest(req *http.Request) { + if t.LogRequest != nil { + t.LogRequest(req) + } else { + DefaultLogRequest(req) + } +} + +func (t *transport) logResponse(resp *http.Response) { + if t.LogResponse != nil { + t.LogResponse(resp) + } else { + DefaultLogResponse(resp) + } +} + +func (t *transport) transport() http.RoundTripper { + if t.Transport != nil { + return t.Transport + } + + return http.DefaultTransport +} diff --git a/cluster-autoscaler/cloudprovider/rancher/rancher_cloud_provider.go b/cluster-autoscaler/cloudprovider/rancher/rancher_cloud_provider.go new file mode 100644 index 000000000000..86b72276bec1 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/rancher/rancher_cloud_provider.go @@ -0,0 +1,218 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rancher + +import ( + "errors" + "fmt" + "os" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" + caerrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + "k8s.io/klog/v2" +) + +var ( + availableGPUTypes map[string]struct{} +) + +var ( + errAutodiscoveryNotSupported = errors.New("only support static discovery scaling group for now") + errNodeSpecsCannotEmpty = errors.New("node group specs must be specified") +) + +// rancherProvider implements CloudProvider interface. +type rancherProvider struct { + manager *manager + nodePools []*NodePool + resourceLimiter *cloudprovider.ResourceLimiter +} + +func newRancherProvider(manager *manager, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) (*rancherProvider, error) { + klog.V(5).Info("Build rancher autoscaler CloudProvider") + if do.AutoDiscoverySpecified() { + return nil, errAutodiscoveryNotSupported + } + + if len(do.NodeGroupSpecs) == 0 { + return nil, errNodeSpecsCannotEmpty + } + + clusterNPS, err := manager.getNodePools() + if err != nil { + return nil, fmt.Errorf("error trying to get nodePools from rancher: %w", err) + } + + nps := make([]*NodePool, len(do.NodeGroupSpecs)) + for i, spec := range do.NodeGroupSpecs { + ns, err := dynamic.SpecFromString(spec, false) + if err != nil { + return nil, fmt.Errorf("failed to parse node group spec: %v", err) + } + + np, ok := clusterNPS[ns.Name] + if !ok { + return nil, fmt.Errorf("nodePool: %s does not exist", ns.Name) + } + + if !np.DrainBeforeDelete { + return nil, fmt.Errorf("nodePool: %s must have DrainBeforeDelete enabled", ns.Name) + } + + nps[i] = &NodePool{ + manager: manager, + id: ns.Name, + minSize: ns.MinSize, + maxSize: ns.MaxSize, + rancherNP: np, + } + klog.Infof("Inserting NodePool: %#v", nps[i]) + } + + return &rancherProvider{ + manager: manager, + nodePools: nps, + resourceLimiter: rl, + }, nil +} + +// Name returns name of the cloud provider. +func (u rancherProvider) Name() string { + return cloudprovider.RancherProviderName +} + +// NodeGroups returns all node pools configured for this cloud provider. +func (u rancherProvider) NodeGroups() []cloudprovider.NodeGroup { + nodeGroups := make([]cloudprovider.NodeGroup, len(u.nodePools)) + for i, ng := range u.nodePools { + nodeGroups[i] = ng + } + return nodeGroups +} + +// NodeGroupForNode returns the node group for the given node, nil if the node +// should not be processed by cluster autoscaler, or non-nil error if such +// occurred. Must be implemented. +func (u rancherProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) { + np, err := u.manager.getNode(node) + if err != nil { + return nil, err + } + + klog.V(4).Infof("checking node pool for node: %s - %q", node.Name, np.ID) + for _, pool := range u.nodePools { + if np.NodePoolID == pool.id { + klog.Infof("Node %q belongs to NodePool %q", np.ID, pool.id) + return pool, nil + } + } + + return nil, nil +} + +// Pricing returns pricing model for this cloud provider or error if not available. +// Implementation optional. +func (u rancherProvider) Pricing() (cloudprovider.PricingModel, caerrors.AutoscalerError) { + return nil, cloudprovider.ErrNotImplemented +} + +// GetAvailableMachineTypes get all machine types that can be requested from the cloud provider. +// Implementation optional. +func (u rancherProvider) GetAvailableMachineTypes() ([]string, error) { + return []string{}, nil +} + +// NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically +// created on the cloud provider side. The node group is not returned by NodeGroups() until it is created. +// Implementation optional. +func (u rancherProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string, taints []apiv1.Taint, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// GetResourceLimiter returns struct containing limits (max, min) for resources (cores, memory etc.). +func (u rancherProvider) GetResourceLimiter() (*cloudprovider.ResourceLimiter, error) { + return u.resourceLimiter, nil +} + +// GPULabel returns the label added to nodes with GPU resource. +func (u rancherProvider) GPULabel() string { + return "gpu-image" +} + +// GetAvailableGPUTypes return all available GPU types cloud provider supports. +func (u rancherProvider) GetAvailableGPUTypes() map[string]struct{} { + return availableGPUTypes +} + +// Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc. +func (u rancherProvider) Cleanup() error { + return nil +} + +// Refresh is called before every main loop and can be used to dynamically update cloud provider state. +// In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh(). +func (u rancherProvider) Refresh() error { + klog.V(4).Info("Refreshing node group") + clusterNPS, err := u.manager.getNodePools() + if err != nil { + return err + } + + for _, np := range u.nodePools { + clusterNode, ok := clusterNPS[np.id] + if !ok { + return fmt.Errorf("nodePool: %q does not exist", np.id) + } + + if !clusterNode.DrainBeforeDelete { + return fmt.Errorf("nodePool: %q must have DrainBeforeDelete enabled", clusterNode.Name) + } + + np.rancherNP = clusterNode + } + + return nil +} + +// BuildRancher builds the Rancher cloud provider. +func BuildRancher(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { + configFile, err := os.Open(opts.CloudConfig) + if err != nil { + klog.Fatalf("Couldn't open cloud provider configuration %s: %#v", opts.CloudConfig, err) + } + defer configFile.Close() + + manager, err := newManager(configFile) + if err != nil { + klog.Fatalf("Failed to create rancher manager: %v", err) + } + + if _, err := manager.client.ClusterByID(manager.clusterID); err != nil { + klog.Fatalf("Failed to create rancher provider: error retrieving cluster %v", err) + } + + provider, err := newRancherProvider(manager, do, rl) + if err != nil { + klog.Fatalf("Failed to create rancher provider: %v", err) + } + + return provider +} diff --git a/cluster-autoscaler/cloudprovider/rancher/rancher_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/rancher/rancher_cloud_provider_test.go new file mode 100644 index 000000000000..338d42f8897f --- /dev/null +++ b/cluster-autoscaler/cloudprovider/rancher/rancher_cloud_provider_test.go @@ -0,0 +1,81 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rancher + +import ( + "fmt" + "testing" + + apiv1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/rancher/rancher" +) + +func TestRancherProvider_NodeGroupForNode(t *testing.T) { + t.Run("success", func(t *testing.T) { + var cli clientMock + nodePool := NodePool{id: "pool1"} + cli.nodeByProviderIDFn = func(providerID string) (*rancher.Node, error) { + return &rancher.Node{Name: "worker1", NodePoolID: nodePool.id}, nil + } + + manager := manager{client: &cli} + u := rancherProvider{manager: &manager, nodePools: []*NodePool{&nodePool}} + np, err := u.NodeGroupForNode(&apiv1.Node{ObjectMeta: v1.ObjectMeta{Name: "worker1"}}) + if err != nil { + t.Errorf("unexpected error %v", err) + } + + if np.Id() != nodePool.id { + t.Errorf("got %s expected %s", np.Id(), nodePool.id) + } + }) + + t.Run("node does not exist - failed", func(t *testing.T) { + var cli clientMock + cli.nodeByProviderIDFn = func(providerID string) (*rancher.Node, error) { + return nil, fmt.Errorf("node %q does not exist", providerID) + } + + manager := manager{client: &cli} + u := rancherProvider{manager: &manager} + _, err := u.NodeGroupForNode(&apiv1.Node{ObjectMeta: v1.ObjectMeta{Name: "worker1"}}) + if err == nil { + t.Error("expected error") + } + }) + + t.Run("node belongs to nodePool without auto-scale", func(t *testing.T) { + var cli clientMock + nodePool := NodePool{id: "pool1"} + cli.nodeByProviderIDFn = func(providerID string) (*rancher.Node, error) { + return &rancher.Node{Name: "worker1", NodePoolID: "pool2"}, nil + } + + manager := manager{client: &cli} + u := rancherProvider{manager: &manager, nodePools: []*NodePool{&nodePool}} + np, err := u.NodeGroupForNode(&apiv1.Node{ObjectMeta: v1.ObjectMeta{Name: "worker1"}}) + if err != nil { + t.Errorf("unexpected error %v", err) + } + + if np != nil { + t.Errorf("unexpected value, %v", np) + } + }) + +} diff --git a/cluster-autoscaler/cloudprovider/rancher/rancher_manager.go b/cluster-autoscaler/cloudprovider/rancher/rancher_manager.go new file mode 100644 index 000000000000..80089c79e57d --- /dev/null +++ b/cluster-autoscaler/cloudprovider/rancher/rancher_manager.go @@ -0,0 +1,132 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rancher + +import ( + "errors" + "fmt" + "io" + "strings" + + "gopkg.in/gcfg.v1" + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/rancher/rancher" + "k8s.io/klog/v2" +) + +type service interface { + ResizeNodePool(id string, size int) (*rancher.NodePool, error) + NodePoolsByCluster(clusterID string) ([]rancher.NodePool, error) + NodePoolByID(id string) (*rancher.NodePool, error) + NodesByNodePool(nodePoolID string) ([]rancher.Node, error) + NodeByProviderID(providerID string) (*rancher.Node, error) + NodeByNameAndCluster(name, cluster string) (*rancher.Node, error) + ClusterByID(id string) (*rancher.Cluster, error) + ScaleDownNode(nodeID string) error +} + +var ( + errSecretRequired = errors.New("secret is required") + errAccessRequired = errors.New("access is required") + errClusterIDRequired = errors.New("clusterID is required") + errURLRequired = errors.New("url is required") + errClusterIDInvalidFormat = errors.New("clusterID invalid format") +) + +type manager struct { + client service + clusterID string +} + +// rancherConfig is the configuration of the rancher provider +type rancherConfig struct { + Global struct { + // ClusterID is the id associated with the Cluster where rancher + // Cluster Autoscaler is running. + ClusterID string `gcfg:"cluster-id"` + + // Secret is the User's Secret associated with the cluster where + // rancher Cluster Autoscaler is running. + Secret string `gcfg:"secret"` + + // Access is the User's Access associated with the cluster where + // rancher Cluster Autoscaler is running. + Access string `gcfg:"access"` + + // URL points to Rancher API. + URL string `gcfg:"url"` + } +} + +func (c *rancherConfig) validate() error { + if c.Global.Access == "" { + return errAccessRequired + } + + if c.Global.Secret == "" { + return errSecretRequired + } + + if c.Global.ClusterID == "" { + return errClusterIDRequired + } + + if !strings.HasPrefix(c.Global.ClusterID, "c-") || len(c.Global.ClusterID) != 7 { + return errClusterIDInvalidFormat + } + + if c.Global.URL == "" { + return errURLRequired + } + + return nil +} + +func newManager(configReader io.Reader) (*manager, error) { + var cfg rancherConfig + if err := gcfg.ReadInto(&cfg, configReader); err != nil { + return nil, fmt.Errorf("couldn't read rancher cloud config: %w", err) + } + + if err := cfg.validate(); err != nil { + return nil, fmt.Errorf("rancher autoscaler cloudconfig error: %w", err) + } + + return &manager{ + client: rancher.New(cfg.Global.URL, fmt.Sprintf("%s:%s", cfg.Global.Access, cfg.Global.Secret)), + clusterID: cfg.Global.ClusterID, + }, nil +} + +func (m *manager) getNodePools() (map[string]rancher.NodePool, error) { + nps, err := m.client.NodePoolsByCluster(m.clusterID) + if err != nil { + return nil, err + } + + out := make(map[string]rancher.NodePool, len(nps)) + for _, np := range nps { + out[np.ID] = np + } + + return out, nil +} + +func (m *manager) getNode(node *apiv1.Node) (*rancher.Node, error) { + klog.Infof("Trying to get Node by ProviderID %q", node.Spec.ProviderID) + return m.client.NodeByProviderID(node.Spec.ProviderID) +} diff --git a/cluster-autoscaler/cloudprovider/rancher/rancher_manager_test.go b/cluster-autoscaler/cloudprovider/rancher/rancher_manager_test.go new file mode 100644 index 000000000000..c95b7ccfaf39 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/rancher/rancher_manager_test.go @@ -0,0 +1,216 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rancher + +import ( + "errors" + "fmt" + "strings" + "testing" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/rancher/rancher" +) + +func TestNewManager(t *testing.T) { + t.Run("success", func(t *testing.T) { + cfg := ` + [Global] + cluster-id=c-jha58 + secret=my-token + access=my-access + url=https://auks/v3 + ` + _, err := newManager(strings.NewReader(cfg)) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + }) + + t.Run("failed: missing url", func(t *testing.T) { + cfg := ` + [Global] + cluster-id=c-jha58 + secret=my-secret + access=my-access + ` + _, err := newManager(strings.NewReader(cfg)) + if !errors.Is(err, errURLRequired) { + t.Errorf("expected error: %v got %v", errURLRequired, err) + } + }) + + t.Run("failed: missing secret", func(t *testing.T) { + cfg := ` + [Global] + cluster-id=c-jha58 + url=https://auks/v3 + access=my-access + ` + _, err := newManager(strings.NewReader(cfg)) + if !errors.Is(err, errSecretRequired) { + t.Errorf("expected error: %v got %v", errSecretRequired, err) + } + }) + + t.Run("failed: missing access", func(t *testing.T) { + cfg := ` + [Global] + cluster-id=c-jha58 + url=https://auks/v3 + secret=my-secret + ` + _, err := newManager(strings.NewReader(cfg)) + if !errors.Is(err, errAccessRequired) { + t.Errorf("expected error: %v got %v", errAccessRequired, err) + } + }) + + t.Run("failed: missing clusterID", func(t *testing.T) { + cfg := ` + [Global] + secret=my-secret + access=my-access + url=https://auks/v3 + ` + _, err := newManager(strings.NewReader(cfg)) + if !errors.Is(err, errClusterIDRequired) { + t.Errorf("expected error: %v got %v", errClusterIDRequired, err) + } + }) +} + +func TestManager_getNodePools(t *testing.T) { + t.Run("success", func(t *testing.T) { + var cli clientMock + nodePools := []rancher.NodePool{ + {ID: "1", Name: "worker-1"}, + {ID: "2", Name: "worker-2"}, + {ID: "3", Name: "worker-3"}, + } + + cli.nodePoolsByClusterFn = func(clusterID string) ([]rancher.NodePool, error) { + return nodePools, nil + } + + manager := manager{client: &cli} + nps, err := manager.getNodePools() + if err != nil { + t.Errorf("unexpected error: %v", err) + } + + if len(nps) != len(nodePools) { + t.Errorf("got %d expected %d", len(nps), len(nodePools)) + } + }) + + t.Run("failed", func(t *testing.T) { + var cli clientMock + cli.nodePoolsByClusterFn = func(clusterID string) ([]rancher.NodePool, error) { + return nil, fmt.Errorf("cluster does not have nodePools") + } + + manager := manager{client: &cli} + _, err := manager.getNodePools() + if err == nil { + t.Errorf("expected error, got %v", err) + } + }) +} + +func TestManager_getNode(t *testing.T) { + t.Run("get node by providerID - success", func(t *testing.T) { + var cli clientMock + cli.nodeByProviderIDFn = func(providerID string) (*rancher.Node, error) { + if providerID == "openstack:///gerg-fergerg-ergreg" { + return &rancher.Node{Name: providerID}, nil + } + return nil, fmt.Errorf("node %q does not exist", providerID) + } + + manager := manager{client: &cli} + node, err := manager.getNode(&apiv1.Node{Spec: apiv1.NodeSpec{ProviderID: "openstack:///gerg-fergerg-ergreg"}}) + if err != nil { + t.Errorf("unexpected error, got %v", err) + } + + if node == nil { + t.Errorf("expected a node, got %v", node) + } + }) + + t.Run("get node by providerID - failed ", func(t *testing.T) { + var cli clientMock + cli.nodeByProviderIDFn = func(providerID string) (*rancher.Node, error) { + return nil, fmt.Errorf("node %q does not exist", providerID) + } + + manager := manager{client: &cli} + _, err := manager.getNode(&apiv1.Node{Spec: apiv1.NodeSpec{ProviderID: "openstack:///gerg-fergerg-ergreg"}}) + if err == nil { + t.Error("expected error") + } + }) +} + +type clientMock struct { + clusterByIDFn func(id string) (*rancher.Cluster, error) + resizeNodePoolFn func(id string, size int) (*rancher.NodePool, error) + nodePoolsByClusterFn func(clusterID string) ([]rancher.NodePool, error) + nodePoolByIDFN func(id string) (*rancher.NodePool, error) + nodesByNodePoolFn func(nodePoolID string) ([]rancher.Node, error) + nodeByProviderIDFn func(providerID string) (*rancher.Node, error) + deleteNodeFn func(id string) error + nodeByNameAndClusterFn func(name, cluster string) (*rancher.Node, error) + ScaleDownNodeFn func(nodeID string) error +} + +func (s *clientMock) ResizeNodePool(id string, size int) (*rancher.NodePool, error) { + return s.resizeNodePoolFn(id, size) +} + +func (s *clientMock) NodePoolsByCluster(clusterID string) ([]rancher.NodePool, error) { + return s.nodePoolsByClusterFn(clusterID) +} + +func (s *clientMock) NodePoolByID(id string) (*rancher.NodePool, error) { + return s.nodePoolByIDFN(id) +} + +func (s *clientMock) NodesByNodePool(nodePoolID string) ([]rancher.Node, error) { + return s.nodesByNodePoolFn(nodePoolID) +} + +func (s clientMock) NodeByProviderID(providerID string) (*rancher.Node, error) { + return s.nodeByProviderIDFn(providerID) +} + +func (s clientMock) DeleteNode(id string) error { + return s.deleteNodeFn(id) +} + +func (s clientMock) NodeByNameAndCluster(name, cluster string) (*rancher.Node, error) { + return s.nodeByNameAndClusterFn(name, cluster) +} + +func (s clientMock) ClusterByID(id string) (*rancher.Cluster, error) { + return s.clusterByIDFn(id) +} + +func (s clientMock) ScaleDownNode(nodeID string) error { + return s.ScaleDownNodeFn(nodeID) +} diff --git a/cluster-autoscaler/cloudprovider/rancher/rancher_node_pool.go b/cluster-autoscaler/cloudprovider/rancher/rancher_node_pool.go new file mode 100644 index 000000000000..fa4b6a4f2449 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/rancher/rancher_node_pool.go @@ -0,0 +1,233 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rancher + +import ( + "errors" + "fmt" + "k8s.io/autoscaler/cluster-autoscaler/config" + "math" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/rancher/rancher" + "k8s.io/klog/v2" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" +) + +// NodePool implements cloudprovider.NodeGroup interface. NodePool contains +// configuration info and functions to control a set of nodes that have the +// same capacity and set of labels. +type NodePool struct { + manager *manager + rancherNP rancher.NodePool + id string + minSize int + maxSize int +} + +// MaxSize returns maximum size of the node group. +func (n *NodePool) MaxSize() int { + return n.maxSize +} + +// MinSize returns minimum size of the node group. +func (n *NodePool) MinSize() int { + return n.minSize +} + +// TargetSize returns the current target size of the node group. It is possible that the +// number of nodes in Kubernetes is different at the moment but should be equal +// to Size() once everything stabilizes (new nodes finish startup and registration or +// removed nodes are deleted completely). Implementation required. +func (n *NodePool) TargetSize() (int, error) { + return n.rancherNP.Quantity, nil +} + +// IncreaseSize increases the size of the node group. To delete a node you need +// to explicitly name it and use DeleteNode. This function should wait until +// node group size is updated. Implementation required. +func (n *NodePool) IncreaseSize(delta int) error { + klog.Infof("increasing NodePoolID: %q by %d", n.id, delta) + if delta <= 0 { + return fmt.Errorf("delta must be positive, have: %d", delta) + } + + targetSize := n.rancherNP.Quantity + delta + if targetSize > n.MaxSize() { + return fmt.Errorf("size increase is too large - desired:%d max:%d", targetSize, n.maxSize) + } + + updatedNodePool, err := n.manager.client.ResizeNodePool(n.id, targetSize) + if err != nil { + return err + } + + if updatedNodePool.Quantity != targetSize { + return fmt.Errorf("couldn't increase size to %d (delta: %d). Current size is: %d", + targetSize, delta, n.rancherNP.Quantity) + } + + n.rancherNP.Quantity = targetSize + return nil +} + +// DeleteNodes deletes nodes from this node group. Error is returned either on +// failure or if the given node doesn't belong to this node group. This function +// should wait until node group size is updated. Implementation required. +func (n *NodePool) DeleteNodes(nodes []*apiv1.Node) error { + klog.Infof("Deleting %d nodes from %q\n", len(nodes), n.Id()) + + size, err := n.TargetSize() + if err != nil { + klog.Errorf("failed to get node pool size: %s", err.Error()) + return err + } + + if size-len(nodes) < n.MinSize() { + return fmt.Errorf("unable to delete nodes, size decrease is too small. current: %d desired: %d min: %d", + size, size-len(nodes), n.minSize) + } + + for _, node := range nodes { + rn, err := n.manager.getNode(node) + if err != nil { + return err + } + + klog.Infof("Deleting node %q - %q", rn.Name, rn.ID) + if rn.NodePoolID != n.id { + return fmt.Errorf("node: %s doesn't belong to the nodePool: %s", node.Name, n.id) + } + + if err := n.manager.client.ScaleDownNode(rn.ID); err != nil { + return err + } + + n.rancherNP.Quantity-- + } + + return nil +} + +// DecreaseTargetSize decreases the target size of the node group. This function +// doesn't permit to delete any existing node and can be used only to reduce the +// request for new nodes that have not been yet fulfilled. Delta should be negative. +// It is assumed that cloud provider will not delete the existing nodes when there +// is an option to just decrease the target. Implementation required. +func (n *NodePool) DecreaseTargetSize(delta int) error { + delta = int(-math.Abs(float64(delta))) + if delta == 0 { + return errors.New("delta cannot be 0") + } + + klog.Infof("Decreasing NodePoolID: %q by %d", n.id, delta) + targetSize := n.rancherNP.Quantity + delta + if targetSize < n.MinSize() { + return fmt.Errorf("size decrease is too small. current: %d desired: %d min: %d", + n.rancherNP.Quantity, targetSize, n.minSize) + } + + updatedNodePool, err := n.manager.client.ResizeNodePool(n.id, targetSize) + if err != nil { + return err + } + + if updatedNodePool.Quantity != targetSize { + return fmt.Errorf("couldn't decrease size to %d (delta: %d). Current size is: %d", + targetSize, delta, n.rancherNP.Quantity) + } + + n.rancherNP.Quantity = targetSize + return nil +} + +// Id returns an unique identifier of the node group. +func (n *NodePool) Id() string { + return n.id +} + +// Debug returns a string containing all information regarding this node group. +func (n *NodePool) Debug() string { + return fmt.Sprintf("id: %s (min:%d max:%d)", n.id, n.minSize, n.maxSize) +} + +// Nodes returns a list of all nodes that belong to this node group. +// It is required that Instance objects returned by this method have Id field set. +// Other fields are optional. +// This list should include also instances that might have not become a kubernetes node yet. +func (n *NodePool) Nodes() ([]cloudprovider.Instance, error) { + klog.V(4).Infof("Getting nodes from NodePool: %q", n.id) + nodes, err := n.manager.client.NodesByNodePool(n.id) + if err != nil { + return nil, err + } + + out := make([]cloudprovider.Instance, len(nodes)) + for i, n := range nodes { + if n.State != "active" { + continue + } + out[i] = cloudprovider.Instance{Id: n.ProviderID} + } + return out, nil +} + +// TemplateNodeInfo returns a schedulerframework.NodeInfo structure of an empty +// (as if just started) node. This will be used in scale-up simulations to +// predict what would a new node look like if a node group was expanded. The returned +// NodeInfo is expected to have a fully populated Node object, with all of the labels, +// capacity and allocatable information as well as all pods that are started on +// the node by default, using manifest (most likely only kube-proxy). Implementation optional. +func (n *NodePool) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// Exist checks if the node group really exists on the cloud provider side. Allows to tell the +// theoretical node group from the real one. Implementation required. +func (n *NodePool) Exist() bool { + if _, err := n.manager.client.NodePoolByID(n.id); err != nil { + return false + } + + return true +} + +// Create creates the node group on the cloud provider side. Implementation optional. +func (n *NodePool) Create() (cloudprovider.NodeGroup, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// Delete deletes the node group on the cloud provider side. +// This will be executed only for autoprovisioned node groups, once their size drops to 0. +// Implementation optional. +func (n *NodePool) Delete() error { + return cloudprovider.ErrNotImplemented +} + +// Autoprovisioned returns true if the node group is autoprovisioned. An autoprovisioned group +// was created by CA and can be deleted when scaled to 0. +func (n *NodePool) Autoprovisioned() bool { + return false +} + +// GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular +// NodeGroup. Returning a nil will result in using default options. +// Implementation optional. +func (n *NodePool) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { + return nil, cloudprovider.ErrNotImplemented +} diff --git a/cluster-autoscaler/cloudprovider/rancher/rancher_node_pool_test.go b/cluster-autoscaler/cloudprovider/rancher/rancher_node_pool_test.go new file mode 100644 index 000000000000..79fd1c1c6385 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/rancher/rancher_node_pool_test.go @@ -0,0 +1,144 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rancher + +import ( + "fmt" + "testing" + + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/rancher/rancher" +) + +func TestNodePool_MaxSize(t *testing.T) { + maxSize := 5 + np := NodePool{maxSize: maxSize} + if np.MaxSize() != maxSize { + t.Errorf("got %d expected %d", np.MaxSize(), maxSize) + } +} + +func TestNodePool_MinSize(t *testing.T) { + minSize := 2 + np := NodePool{minSize: minSize} + if np.MinSize() != minSize { + t.Errorf("got %d expected %d", np.MaxSize(), minSize) + } +} + +func TestNodePool_Nodes(t *testing.T) { + t.Run("success", func(t *testing.T) { + var cli clientMock + rancherNodes := []rancher.Node{ + {Name: "worker1"}, + {Name: "worker2"}, + {Name: "worker3"}, + } + + cli.nodesByNodePoolFn = func(nodePoolID string) ([]rancher.Node, error) { + return rancherNodes, nil + } + + np := NodePool{manager: &manager{client: &cli}} + nodes, err := np.Nodes() + if err != nil { + t.Errorf("unexpected error: %v", err) + } + + if len(nodes) != len(rancherNodes) { + t.Errorf("got %d expected %d", len(nodes), len(rancherNodes)) + } + }) + + t.Run("failed", func(t *testing.T) { + var cli clientMock + cli.nodesByNodePoolFn = func(nodePoolID string) ([]rancher.Node, error) { + return nil, fmt.Errorf("client error") + } + + np := NodePool{manager: &manager{client: &cli}} + nodes, err := np.Nodes() + if err == nil { + t.Errorf("expected error") + } + + if len(nodes) != 0 { + t.Errorf("got %d expected %d", len(nodes), 0) + } + }) +} + +func TestNodePool_IncreaseSize(t *testing.T) { + t.Run("increase from 0 to 5 - success", func(t *testing.T) { + var cli clientMock + cli.resizeNodePoolFn = func(id string, size int) (*rancher.NodePool, error) { + return &rancher.NodePool{Quantity: size}, nil + } + + targetSize := 5 + manager := manager{client: &cli} + np := NodePool{manager: &manager, minSize: 0, maxSize: 5} + if err := np.IncreaseSize(targetSize); err != nil { + t.Errorf("unexpected error: %v", err) + } + + if np.rancherNP.Quantity != targetSize { + t.Errorf("got %d expected %d", np.rancherNP.Quantity, targetSize) + } + }) + + t.Run("negative increase - failed", func(t *testing.T) { + np := NodePool{minSize: 0, maxSize: 5} + if err := np.IncreaseSize(-3); err == nil { + t.Error("expected error") + } + }) + + t.Run("increase above maximum - failed", func(t *testing.T) { + np := NodePool{minSize: 0, maxSize: 5} + if err := np.IncreaseSize(10); err == nil { + t.Error("expected error") + } + }) +} + +func TestNodePool_DecreaseTargetSize(t *testing.T) { + t.Run("decrease from 6 to 2 - success", func(t *testing.T) { + var cli clientMock + cli.resizeNodePoolFn = func(id string, size int) (*rancher.NodePool, error) { + return &rancher.NodePool{Quantity: size}, nil + } + + currentQty := 6 + decreaseBy := 4 + manager := manager{client: &cli} + np := NodePool{manager: &manager, minSize: 2, maxSize: 10, rancherNP: rancher.NodePool{Quantity: currentQty}} + if err := np.DecreaseTargetSize(decreaseBy); err != nil { + t.Errorf("unexpected error: %v", err) + } + + if np.rancherNP.Quantity != currentQty-decreaseBy { + t.Errorf("got %d expected %d", np.rancherNP.Quantity, currentQty-decreaseBy) + } + }) + + t.Run("decrease over minimum - failed", func(t *testing.T) { + np := NodePool{minSize: 2, maxSize: 5, rancherNP: rancher.NodePool{Quantity: 4}} + if err := np.DecreaseTargetSize(3); err == nil { + t.Error("expected error") + } + }) +}