From de92e7e1a04a8c505ae5b935cf4ec9ef566caa14 Mon Sep 17 00:00:00 2001 From: andyzhangx Date: Wed, 10 Apr 2024 15:07:03 +0000 Subject: [PATCH] fix: cache GetVolumeStats on Windows node fix --- pkg/azuredisk/azure_common_darwin.go | 2 +- pkg/azuredisk/azure_common_linux.go | 2 +- pkg/azuredisk/azure_common_windows.go | 17 ++++++++++++++++- pkg/azuredisk/azuredisk.go | 11 +++++++++++ pkg/azuredisk/azuredisk_option.go | 2 ++ pkg/azuredisk/nodeserver.go | 2 +- pkg/azuredisk/nodeserver_v2.go | 2 +- 7 files changed, 33 insertions(+), 5 deletions(-) diff --git a/pkg/azuredisk/azure_common_darwin.go b/pkg/azuredisk/azure_common_darwin.go index df34dff9f1..09be9dead4 100644 --- a/pkg/azuredisk/azure_common_darwin.go +++ b/pkg/azuredisk/azure_common_darwin.go @@ -119,6 +119,6 @@ func rescanAllVolumes(io azureutils.IOHandler) error { return nil } -func GetVolumeStats(ctx context.Context, m *mount.SafeFormatAndMount, target string, hostutil hostUtil) ([]*csi.VolumeUsage, error) { +func (d *DriverCore) GetVolumeStats(ctx context.Context, m *mount.SafeFormatAndMount, volumeID, target string, hostutil hostUtil) ([]*csi.VolumeUsage, error) { return []*csi.VolumeUsage{}, nil } diff --git a/pkg/azuredisk/azure_common_linux.go b/pkg/azuredisk/azure_common_linux.go index 537a163a0c..e1711bc644 100644 --- a/pkg/azuredisk/azure_common_linux.go +++ b/pkg/azuredisk/azure_common_linux.go @@ -269,7 +269,7 @@ func rescanAllVolumes(io azureutils.IOHandler) error { return nil } -func GetVolumeStats(_ context.Context, m *mount.SafeFormatAndMount, target string, hostutil hostUtil) ([]*csi.VolumeUsage, error) { +func (d *DriverCore) GetVolumeStats(_ context.Context, m *mount.SafeFormatAndMount, _, target string, hostutil hostUtil) ([]*csi.VolumeUsage, error) { var volUsages []*csi.VolumeUsage _, err := os.Stat(target) if err != nil { diff --git a/pkg/azuredisk/azure_common_windows.go b/pkg/azuredisk/azure_common_windows.go index 390912181b..ac5089bb6e 100644 --- a/pkg/azuredisk/azure_common_windows.go +++ b/pkg/azuredisk/azure_common_windows.go @@ -25,12 +25,14 @@ import ( "strconv" "github.com/container-storage-interface/spec/lib/go/csi" + "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "k8s.io/klog/v2" "k8s.io/mount-utils" "sigs.k8s.io/azuredisk-csi-driver/pkg/azureutils" "sigs.k8s.io/azuredisk-csi-driver/pkg/mounter" + azcache "sigs.k8s.io/cloud-provider-azure/pkg/cache" ) func formatAndMount(source, target, fstype string, options []string, m *mount.SafeFormatAndMount) error { @@ -162,9 +164,22 @@ func rescanAllVolumes(io azureutils.IOHandler) error { return nil } -func GetVolumeStats(ctx context.Context, m *mount.SafeFormatAndMount, target string, hostutil hostUtil) ([]*csi.VolumeUsage, error) { +func (d *DriverCore) GetVolumeStats(ctx context.Context, m *mount.SafeFormatAndMount, volumeID, target string, hostutil hostUtil) ([]*csi.VolumeUsage, error) { + // check if the volume stats is cached + cache, err := d.volStatsCache.Get(volumeID, azcache.CacheReadTypeDefault) + if err != nil { + return nil, status.Errorf(codes.Internal, err.Error()) + } + if cache != nil { + volUsage := cache.(csi.VolumeUsage) + klog.V(6).Infof("NodeGetVolumeStats: volume stats for volume %s path %s is cached", volumeID, target) + return []*csi.VolumeUsage{&volUsage}, nil + } + if proxy, ok := m.Interface.(mounter.CSIProxyMounter); ok { volUsage, err := proxy.GetVolumeStats(ctx, target) + // cache the volume stats per volume + d.volStatsCache.Set(volumeID, *volUsage) return []*csi.VolumeUsage{volUsage}, err } return []*csi.VolumeUsage{}, fmt.Errorf("could not cast to csi proxy class") diff --git a/pkg/azuredisk/azuredisk.go b/pkg/azuredisk/azuredisk.go index d138ab12d5..bd45d14928 100644 --- a/pkg/azuredisk/azuredisk.go +++ b/pkg/azuredisk/azuredisk.go @@ -95,6 +95,7 @@ type DriverCore struct { enableTrafficManager bool trafficManagerPort int64 vmssCacheTTLInSeconds int64 + volStatsCacheExpireInMinutes int64 attachDetachInitialDelayInMs int64 vmType string enableWindowsHostProcess bool @@ -106,6 +107,8 @@ type DriverCore struct { endpoint string disableAVSetNodes bool kubeClient kubernetes.Interface + // a timed cache storing volume stats + volStatsCache azcache.Resource } // Driver is the v1 implementation of the Azure Disk CSI Driver. @@ -144,6 +147,7 @@ func newDriverV1(options *DriverOptions) *Driver { driver.enableTrafficManager = options.EnableTrafficManager driver.trafficManagerPort = options.TrafficManagerPort driver.vmssCacheTTLInSeconds = options.VMSSCacheTTLInSeconds + driver.volStatsCacheExpireInMinutes = options.VolStatsCacheExpireInMinutes driver.vmType = options.VMType driver.enableWindowsHostProcess = options.EnableWindowsHostProcess driver.getNodeIDFromIMDS = options.GetNodeIDFromIMDS @@ -171,6 +175,13 @@ func newDriverV1(options *DriverOptions) *Driver { klog.Fatalf("%v", err) } + if options.VolStatsCacheExpireInMinutes <= 0 { + options.VolStatsCacheExpireInMinutes = 10 // default expire in 10 minutes + } + if driver.volStatsCache, err = azcache.NewTimedCache(time.Duration(options.VolStatsCacheExpireInMinutes)*time.Minute, getter, false); err != nil { + klog.Fatalf("%v", err) + } + userAgent := GetUserAgent(driver.Name, driver.customUserAgent, driver.userAgentSuffix) klog.V(2).Infof("driver userAgent: %s", userAgent) diff --git a/pkg/azuredisk/azuredisk_option.go b/pkg/azuredisk/azuredisk_option.go index 8b8463c6d4..ef262db238 100644 --- a/pkg/azuredisk/azuredisk_option.go +++ b/pkg/azuredisk/azuredisk_option.go @@ -50,6 +50,7 @@ type DriverOptions struct { TrafficManagerPort int64 AttachDetachInitialDelayInMs int64 VMSSCacheTTLInSeconds int64 + VolStatsCacheExpireInMinutes int64 VMType string EnableWindowsHostProcess bool GetNodeIDFromIMDS bool @@ -90,6 +91,7 @@ func (o *DriverOptions) AddFlags() *flag.FlagSet { fs.Int64Var(&o.TrafficManagerPort, "traffic-manager-port", 7788, "default traffic manager port") fs.Int64Var(&o.AttachDetachInitialDelayInMs, "attach-detach-initial-delay-ms", 1000, "initial delay in milliseconds for batch disk attach/detach") fs.Int64Var(&o.VMSSCacheTTLInSeconds, "vmss-cache-ttl-seconds", -1, "vmss cache TTL in seconds (600 by default)") + fs.Int64Var(&o.VolStatsCacheExpireInMinutes, "vol-stats-cache-expire-in-minutes", 10, "The cache expire time in minutes for volume stats cache") fs.StringVar(&o.VMType, "vm-type", "", "type of agent node. available values: vmss, standard") fs.BoolVar(&o.EnableWindowsHostProcess, "enable-windows-host-process", false, "enable windows host process") fs.BoolVar(&o.GetNodeIDFromIMDS, "get-nodeid-from-imds", false, "boolean flag to get NodeID from IMDS") diff --git a/pkg/azuredisk/nodeserver.go b/pkg/azuredisk/nodeserver.go index ae5d399b55..48c4c1c878 100644 --- a/pkg/azuredisk/nodeserver.go +++ b/pkg/azuredisk/nodeserver.go @@ -444,7 +444,7 @@ func (d *Driver) NodeGetVolumeStats(ctx context.Context, req *csi.NodeGetVolumeS return nil, status.Error(codes.InvalidArgument, "NodeGetVolumeStats volume path was empty") } - volUsage, err := GetVolumeStats(ctx, d.mounter, req.VolumePath, d.hostUtil) + volUsage, err := d.GetVolumeStats(ctx, d.mounter, req.VolumeId, req.VolumePath, d.hostUtil) return &csi.NodeGetVolumeStatsResponse{ Usage: volUsage, }, err diff --git a/pkg/azuredisk/nodeserver_v2.go b/pkg/azuredisk/nodeserver_v2.go index 2bce3dc6d6..d21f205247 100644 --- a/pkg/azuredisk/nodeserver_v2.go +++ b/pkg/azuredisk/nodeserver_v2.go @@ -405,7 +405,7 @@ func (d *DriverV2) NodeGetVolumeStats(ctx context.Context, req *csi.NodeGetVolum return nil, status.Error(codes.InvalidArgument, "NodeGetVolumeStats volume path was empty") } - volUsage, err := GetVolumeStats(ctx, d.mounter, req.VolumePath, d.hostUtil) + volUsage, err := d.GetVolumeStats(ctx, d.mounter, req.VolumeId, req.VolumePath, d.hostUtil) return &csi.NodeGetVolumeStatsResponse{ Usage: volUsage, }, err