From 8d2ba8dd02d94300ba21dc4f0e662850677323ea Mon Sep 17 00:00:00 2001 From: Jayant Jain Date: Wed, 2 Feb 2022 06:03:06 +0000 Subject: [PATCH] mig_info_provider.go:fillMigInstances will now use locking when calling the gce api. This is to avoid multiple gce calls for the same mig during scale down (which is done in parallel). --- .../cloudprovider/gce/mig_info_provider.go | 38 +++++++++++++------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/gce/mig_info_provider.go b/cluster-autoscaler/cloudprovider/gce/mig_info_provider.go index 2e393240eae5..de6120802908 100644 --- a/cluster-autoscaler/cloudprovider/gce/mig_info_provider.go +++ b/cluster-autoscaler/cloudprovider/gce/mig_info_provider.go @@ -49,12 +49,13 @@ type MigInfoProvider interface { } type cachingMigInfoProvider struct { - mutex sync.Mutex + migInfoMutex sync.Mutex cache *GceCache migLister MigLister gceClient AutoscalingGceClient projectId string concurrentGceRefreshes int + migInstanceMutex sync.Mutex } // NewCachingMigInfoProvider creates an instance of caching MigInfoProvider @@ -127,7 +128,7 @@ func (c *cachingMigInfoProvider) RegenerateMigInstancesCache() error { migs := c.migLister.GetMigs() errors := make([]error, len(migs)) workqueue.ParallelizeUntil(context.Background(), c.concurrentGceRefreshes, len(migs), func(piece int) { - errors[piece] = c.fillMigInstances(migs[piece].GceRef()) + errors[piece] = c.fillMigInstancesNoLock(migs[piece].GceRef()) }, workqueue.WithChunkSize(c.concurrentGceRefreshes)) for _, err := range errors { @@ -149,7 +150,7 @@ func (c *cachingMigInfoProvider) findMigWithMatchingBasename(instanceRef GceRef) return nil } -func (c *cachingMigInfoProvider) fillMigInstances(migRef GceRef) error { +func (c *cachingMigInfoProvider) fillMigInstancesNoLock(migRef GceRef) error { klog.V(4).Infof("Regenerating MIG instances cache for %s", migRef.String()) instances, err := c.gceClient.FetchMigInstances(migRef) if err != nil { @@ -159,9 +160,22 @@ func (c *cachingMigInfoProvider) fillMigInstances(migRef GceRef) error { return c.cache.SetMigInstances(migRef, instances) } +// fillMigInstances is now using locks due to multiple parallel calls to the gce api +// for the same MIG during large scale down. This should only affect the unique number of +// migs and all other instances would pick up from cache, thereby saving networking +func (c *cachingMigInfoProvider) fillMigInstances(migRef GceRef) error { + c.migInstanceMutex.Lock() + defer c.migInstanceMutex.Unlock() + _, ok := c.cache.GetMigInstances(migRef) + if ok { + return nil + } + return c.fillMigInstancesNoLock(migRef) +} + func (c *cachingMigInfoProvider) GetMigTargetSize(migRef GceRef) (int64, error) { - c.mutex.Lock() - defer c.mutex.Unlock() + c.migInfoMutex.Lock() + defer c.migInfoMutex.Unlock() targetSize, found := c.cache.GetMigTargetSize(migRef) if found { @@ -185,8 +199,8 @@ func (c *cachingMigInfoProvider) GetMigTargetSize(migRef GceRef) (int64, error) } func (c *cachingMigInfoProvider) GetMigBasename(migRef GceRef) (string, error) { - c.mutex.Lock() - defer c.mutex.Unlock() + c.migInfoMutex.Lock() + defer c.migInfoMutex.Unlock() basename, found := c.cache.GetMigBasename(migRef) if found { @@ -210,8 +224,8 @@ func (c *cachingMigInfoProvider) GetMigBasename(migRef GceRef) (string, error) { } func (c *cachingMigInfoProvider) GetMigInstanceTemplateName(migRef GceRef) (string, error) { - c.mutex.Lock() - defer c.mutex.Unlock() + c.migInfoMutex.Lock() + defer c.migInfoMutex.Unlock() templateName, found := c.cache.GetMigInstanceTemplateName(migRef) if found { @@ -240,8 +254,8 @@ func (c *cachingMigInfoProvider) GetMigInstanceTemplate(migRef GceRef) (*gce.Ins return nil, err } - c.mutex.Lock() - defer c.mutex.Unlock() + c.migInfoMutex.Lock() + defer c.migInfoMutex.Unlock() template, found := c.cache.GetMigInstanceTemplate(migRef) if found && template.Name == templateName { @@ -257,7 +271,7 @@ func (c *cachingMigInfoProvider) GetMigInstanceTemplate(migRef GceRef) (*gce.Ins return template, nil } -// filMigInfoCache needs to be called with mutex locked +// filMigInfoCache needs to be called with migInfoMutex locked func (c *cachingMigInfoProvider) fillMigInfoCache() error { var zones []string for zone := range c.listAllZonesWithMigs() {