diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go index bcb19ddddc13..cbbccf00a74f 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go @@ -32,7 +32,8 @@ import ( ) const ( - maxAddTaintRetries = 5 + maxAddTaintRetries = 5 + maxForceRefreshRetries = 3 ) var ( @@ -249,11 +250,22 @@ func (m *ociManagerImpl) TaintToPreventFurtherSchedulingOnRestart(nodes []*apiv1 } func (m *ociManagerImpl) forceRefresh() error { - err := m.nodePoolCache.rebuild(m.staticNodePools) - if err != nil { - return err + for i := 1; i <= maxForceRefreshRetries; i++ { + err := m.nodePoolCache.rebuild(m.staticNodePools) + if err != nil { + if strings.Contains(err.Error(), "404") && i == maxForceRefreshRetries { + m.lastRefresh = time.Now() + klog.Errorf("Failed to fetch the nodepools. Retrying after %v", m.lastRefresh.Add(m.cfg.Global.RefreshInterval)) + return err + } else if i == maxForceRefreshRetries { + klog.Error("Failed to fetch the nodepools.") + return err + } + klog.Errorf("Failed to fetch the nodepools. Retries available : %v", maxForceRefreshRetries-i) + } else { + break + } } - m.lastRefresh = time.Now() klog.Infof("Refreshed NodePool list, next refresh after %v", m.lastRefresh.Add(m.cfg.Global.RefreshInterval)) return nil @@ -441,7 +453,7 @@ func (m *ociManagerImpl) GetNodePoolForInstance(instance ocicommon.OciRef) (Node np, found := m.staticNodePools[instance.NodePoolID] if !found { - klog.Infof("did not find node pool for reference: %+v", instance) + klog.V(4).Infof("did not find node pool for reference: %+v", instance) return nil, errInstanceNodePoolNotFound }