From f8a54b97c402f0306ecb237a71389865bf3ceae4 Mon Sep 17 00:00:00 2001 From: enxebre Date: Wed, 13 Jul 2022 16:09:24 +0200 Subject: [PATCH] Get capi targetsize from cache This ensured that access to replicas during scale down operations were never stale by accessing the API server https://github.com/kubernetes/autoscaler/issues/3104. This honoured that behaviour while moving to unstructured client https://github.com/kubernetes/autoscaler/pull/3312. This regressed that behaviour while trying to reduce the API server load https://github.com/kubernetes/autoscaler/pull/4443. This put back the never stale replicas behaviour at the cost of loading back the API server https://github.com/kubernetes/autoscaler/pull/4634. Currently on e.g a 48 minutes cluster it does 1.4k get request to the scale subresource. This PR tries to satisfy both non stale replicas during scale down and prevent the API server from being overloaded. To achieve that it lets targetSize which is called on every autoscaling cluster state loop from come from cache. Also note that the scale down implementation has changed https://github.com/kubernetes/autoscaler/commits/master/cluster-autoscaler/core/scaledown. --- .../cloudprovider/clusterapi/clusterapi_nodegroup.go | 12 ++++++++++-- .../clusterapi/clusterapi_nodegroup_test.go | 2 +- .../clusterapi/clusterapi_unstructured_test.go | 11 +++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go index 63f8299570fb..87e7803571f6 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go @@ -18,6 +18,7 @@ package clusterapi import ( "fmt" + "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -52,7 +53,14 @@ func (ng *nodegroup) MaxSize() int { // (new nodes finish startup and registration or removed nodes are // deleted completely). Implementation required. func (ng *nodegroup) TargetSize() (int, error) { - return ng.scalableResource.Replicas() + replicas, found, err := unstructured.NestedInt64(ng.scalableResource.unstructured.Object, "spec", "replicas") + if err != nil { + return 0, errors.Wrap(err, "error getting replica count") + } + if !found { + return 0, fmt.Errorf("unable to find replicas") + } + return int(replicas), nil } // IncreaseSize increases the size of the node group. To delete a node @@ -163,7 +171,7 @@ func (ng *nodegroup) DecreaseTargetSize(delta int) error { return fmt.Errorf("size decrease must be negative") } - size, err := ng.TargetSize() + size, err := ng.scalableResource.Replicas() if err != nil { return err } diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go index 37059450fc52..42eef0957261 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go @@ -1072,7 +1072,7 @@ func TestNodeGroupDeleteNodesSequential(t *testing.T) { ng = nodegroups[0] // Check the nodegroup is at the expected size - actualSize, err := ng.TargetSize() + actualSize, err := ng.scalableResource.Replicas() if err != nil { t.Fatalf("unexpected error: %v", err) } diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go index be51c9350f77..7252b49f677f 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go @@ -60,6 +60,17 @@ func TestSetSize(t *testing.T) { if s.Spec.Replicas != int32(updatedReplicas) { t.Errorf("expected %v, got: %v", updatedReplicas, s.Spec.Replicas) } + + replicas, found, err := unstructured.NestedInt64(sr.unstructured.Object, "spec", "replicas") + if err != nil { + t.Fatal(err) + } + if !found { + t.Fatal("replicas = 0") + } + if replicas != int64(updatedReplicas) { + t.Errorf("expected %v, got: %v", updatedReplicas, replicas) + } } t.Run("MachineSet", func(t *testing.T) {