From 2d9fe32601ff9d7308e12133f8fe5655ff421c1f Mon Sep 17 00:00:00 2001 From: Rafael Franzke Date: Tue, 23 Jun 2020 07:24:15 +0200 Subject: [PATCH] Allow volume attach limit configuration via command line parameter --- cmd/main.go | 1 + cmd/options/node_options.go | 16 +++++++- cmd/options/node_options_test.go | 5 +++ cmd/options_test.go | 17 ++++++++ deploy/kubernetes/base/node.yaml | 1 + docs/design.md | 12 ++++++ pkg/driver/driver.go | 17 +++++--- pkg/driver/driver_test.go | 58 +++++++++++++++++++++++++++ pkg/driver/node.go | 19 +++++---- pkg/driver/node_test.go | 67 ++++++++++++++++++++++---------- pkg/driver/sanity_test.go | 5 ++- 11 files changed, 182 insertions(+), 36 deletions(-) create mode 100644 pkg/driver/driver_test.go diff --git a/cmd/main.go b/cmd/main.go index 6bf98fba9c..855bbb63d4 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -32,6 +32,7 @@ func main() { driver.WithEndpoint(options.ServerOptions.Endpoint), driver.WithExtraVolumeTags(options.ControllerOptions.ExtraVolumeTags), driver.WithMode(options.DriverMode), + driver.WithVolumeAttachLimit(options.NodeOptions.VolumeAttachLimit), ) if err != nil { klog.Fatalln(err) diff --git a/cmd/options/node_options.go b/cmd/options/node_options.go index 0f696e0386..91190f1619 100644 --- a/cmd/options/node_options.go +++ b/cmd/options/node_options.go @@ -21,6 +21,18 @@ import ( ) // NodeOptions contains options and configuration settings for the node service. -type NodeOptions struct{} +type NodeOptions struct { + // VolumeAttachLimit specifies the value that shall be reported as "maximum number of attachable volumes" + // in CSINode objects. It is similar to https://kubernetes.io/docs/concepts/storage/storage-limits/#custom-limits + // which allowed administrators to specify custom volume limits by configuring the kube-scheduler. Also, each AWS + // machine type has different volume limits. By default, the EBS CSI driver parses the machine type name and then + // decides the volume limit. However, this is only a rough approximation and not good enough in most cases. + // Specifying the volume attach limit via command line is the alternative until a more sophisticated solution presents + // itself (dynamically discovering the maximum number of attachable volume per EC2 machine type, see also + // https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/347). + VolumeAttachLimit int64 +} -func (s *NodeOptions) AddFlags(fs *flag.FlagSet) {} +func (o *NodeOptions) AddFlags(fs *flag.FlagSet) { + fs.Int64Var(&o.VolumeAttachLimit, "volume-attach-limit", -1, "Value value for the maximum number of volumes attachable for all nodes. If the flag is not specified then the value is approximated from the instance type.") +} diff --git a/cmd/options/node_options_test.go b/cmd/options/node_options_test.go index 483d7cac00..dc4191a55c 100644 --- a/cmd/options/node_options_test.go +++ b/cmd/options/node_options_test.go @@ -27,6 +27,11 @@ func TestNodeOptions(t *testing.T) { flag string found bool }{ + { + name: "lookup desired flag", + flag: "volume-attach-limit", + found: true, + }, { name: "fail for non-desired flag", flag: "some-flag", diff --git a/cmd/options_test.go b/cmd/options_test.go index cd58e19def..ef4bffe199 100644 --- a/cmd/options_test.go +++ b/cmd/options_test.go @@ -20,6 +20,7 @@ import ( "flag" "os" "reflect" + "strconv" "testing" "github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/driver" @@ -45,6 +46,9 @@ func TestGetOptions(t *testing.T) { extraVolumeTagKey: extraVolumeTagValue, } + VolumeAttachLimitFlagName := "volume-attach-limit" + var VolumeAttachLimit int64 = 42 + args := append([]string{ "aws-ebs-csi-driver", }, additionalArgs...) @@ -55,6 +59,9 @@ func TestGetOptions(t *testing.T) { if withControllerOptions { args = append(args, "-"+extraVolumeTagsFlagName+"="+extraVolumeTagKey+"="+extraVolumeTagValue) } + if withNodeOptions { + args = append(args, "-"+VolumeAttachLimitFlagName+"="+strconv.FormatInt(VolumeAttachLimit, 10)) + } oldArgs := os.Args defer func() { os.Args = oldArgs }() @@ -82,6 +89,16 @@ func TestGetOptions(t *testing.T) { } } + if withNodeOptions { + VolumeAttachLimitFlag := flagSet.Lookup(VolumeAttachLimitFlagName) + if VolumeAttachLimitFlag == nil { + t.Fatalf("expected %q flag to be added but it is not", VolumeAttachLimitFlagName) + } + if options.NodeOptions.VolumeAttachLimit != VolumeAttachLimit { + t.Fatalf("expected VolumeAttachLimit to be %d but it is %d", VolumeAttachLimit, options.NodeOptions.VolumeAttachLimit) + } + } + return options } diff --git a/deploy/kubernetes/base/node.yaml b/deploy/kubernetes/base/node.yaml index 7a84b8b72d..6c28a04bec 100644 --- a/deploy/kubernetes/base/node.yaml +++ b/deploy/kubernetes/base/node.yaml @@ -43,6 +43,7 @@ spec: args: - node - --endpoint=$(CSI_ENDPOINT) + # - --volume-attach-limit=42 - --logtostderr - --v=5 env: diff --git a/docs/design.md b/docs/design.md index d34909420c..96c3ffae29 100644 --- a/docs/design.md +++ b/docs/design.md @@ -235,3 +235,15 @@ Example 2: `AWS_REGION=us-west-1 /bin/aws-ebs-csi-driver controller --extra-volu - `node`: This will only start the node service of the CSI driver.\ Example: `/bin/aws-ebs-csi-driver node --endpoint=unix://...` + +## Custom volume limits + +For the Kubernetes in-tree volume provisioners (including the `kubernetes.io/aws-ebs` provisioner) it was possible for administrators to provide a custom volume limit overwrite (see https://kubernetes.io/docs/concepts/storage/storage-limits/#custom-limits). +This solution is not working with CSI any longer. +As part of [#347](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/347) we discuss how we can implement a sophisticated computation of the volume attach limit per node (e.g., based on the used machine types and already attached network interfaces). +However, it turns out that such optimal implementation is not easily achievable. +Each AWS machine type has different volume limits. +Today, the EBS CSI driver parses the machine type name and then decides the volume limit. +Unfortunately, this is only a rough approximation and not good enough in most cases. +In order to allow existing clusters that are leveraging/relying on this feature to migrate to CSI, the EBS CSI driver is supporting the `--volume-attach-limit` flag. +Specifying the volume attach limit via command line is the alternative until a more sophisticated solution presents itself (dynamically discovering the maximum number of attachable volume per EC2 machine type). diff --git a/pkg/driver/driver.go b/pkg/driver/driver.go index e54df9b994..60305d5b2f 100644 --- a/pkg/driver/driver.go +++ b/pkg/driver/driver.go @@ -53,9 +53,10 @@ type Driver struct { } type DriverOptions struct { - endpoint string - extraVolumeTags map[string]string - mode Mode + endpoint string + extraVolumeTags map[string]string + mode Mode + volumeAttachLimit int64 } func NewDriver(options ...func(*DriverOptions)) (*Driver, error) { @@ -81,10 +82,10 @@ func NewDriver(options ...func(*DriverOptions)) (*Driver, error) { case ControllerMode: driver.controllerService = newControllerService(&driverOptions) case NodeMode: - driver.nodeService = newNodeService() + driver.nodeService = newNodeService(&driverOptions) case AllMode: driver.controllerService = newControllerService(&driverOptions) - driver.nodeService = newNodeService() + driver.nodeService = newNodeService(&driverOptions) default: return nil, fmt.Errorf("unknown mode: %s", driverOptions.mode) } @@ -155,3 +156,9 @@ func WithMode(mode Mode) func(*DriverOptions) { o.mode = mode } } + +func WithVolumeAttachLimit(volumeAttachLimit int64) func(*DriverOptions) { + return func(o *DriverOptions) { + o.volumeAttachLimit = volumeAttachLimit + } +} diff --git a/pkg/driver/driver_test.go b/pkg/driver/driver_test.go new file mode 100644 index 0000000000..70f86002c5 --- /dev/null +++ b/pkg/driver/driver_test.go @@ -0,0 +1,58 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package driver + +import ( + "reflect" + "testing" +) + +func TestWithEndpoint(t *testing.T) { + value := "endpoint" + options := &DriverOptions{} + WithEndpoint(value)(options) + if options.endpoint != value { + t.Fatalf("expected endpoint option got set to %q but is set to %q", value, options.endpoint) + } +} + +func TestWithExtraVolumeTags(t *testing.T) { + value := map[string]string{"foo": "bar"} + options := &DriverOptions{} + WithExtraVolumeTags(value)(options) + if !reflect.DeepEqual(options.extraVolumeTags, value) { + t.Fatalf("expected extraVolumeTags option got set to %+v but is set to %+v", value, options.extraVolumeTags) + } +} + +func TestWithMode(t *testing.T) { + value := Mode("mode") + options := &DriverOptions{} + WithMode(value)(options) + if options.mode != value { + t.Fatalf("expected mode option got set to %q but is set to %q", value, options.mode) + } +} + +func TestWithVolumeAttachLimit(t *testing.T) { + var value int64 = 42 + options := &DriverOptions{} + WithVolumeAttachLimit(value)(options) + if options.volumeAttachLimit != value { + t.Fatalf("expected volumeAttachLimit option got set to %d but is set to %d", value, options.volumeAttachLimit) + } +} diff --git a/pkg/driver/node.go b/pkg/driver/node.go index 649e7c94c4..cea55ad753 100644 --- a/pkg/driver/node.go +++ b/pkg/driver/node.go @@ -70,23 +70,25 @@ var ( // nodeService represents the node service of CSI driver type nodeService struct { - metadata cloud.MetadataService - mounter Mounter - inFlight *internal.InFlight + metadata cloud.MetadataService + mounter Mounter + inFlight *internal.InFlight + driverOptions *DriverOptions } // newNodeService creates a new node service // it panics if failed to create the service -func newNodeService() nodeService { +func newNodeService(driverOptions *DriverOptions) nodeService { metadata, err := cloud.NewMetadata() if err != nil { panic(err) } return nodeService{ - metadata: metadata, - mounter: newNodeMounter(), - inFlight: internal.NewInFlight(), + metadata: metadata, + mounter: newNodeMounter(), + inFlight: internal.NewInFlight(), + driverOptions: driverOptions, } } @@ -514,6 +516,9 @@ func findNvmeVolume(findName string) (device string, err error) { // getVolumesLimit returns the limit of volumes that the node supports func (d *nodeService) getVolumesLimit() int64 { + if d.driverOptions.volumeAttachLimit >= 0 { + return d.driverOptions.volumeAttachLimit + } ebsNitroInstanceTypeRegex := "^[cmr]5.*|t3|z1d" instanceType := d.metadata.GetInstanceType() if ok, _ := regexp.MatchString(ebsNitroInstanceTypeRegex, instanceType); ok { diff --git a/pkg/driver/node_test.go b/pkg/driver/node_test.go index 36714e4edf..8592b2dbec 100644 --- a/pkg/driver/node_test.go +++ b/pkg/driver/node_test.go @@ -1243,25 +1243,44 @@ func TestNodeGetCapabilities(t *testing.T) { func TestNodeGetInfo(t *testing.T) { testCases := []struct { - name string - instanceID string - instanceType string - availabilityZone string - expMaxVolumes int64 + name string + instanceID string + instanceType string + availabilityZone string + volumeAttachLimit int64 + expMaxVolumes int64 }{ { - name: "success normal", - instanceID: "i-123456789abcdef01", - instanceType: "t2.medium", - availabilityZone: "us-west-2b", - expMaxVolumes: 39, + name: "success normal", + instanceID: "i-123456789abcdef01", + instanceType: "t2.medium", + availabilityZone: "us-west-2b", + volumeAttachLimit: -1, + expMaxVolumes: 39, }, { - name: "success normal with NVMe", - instanceID: "i-123456789abcdef01", - instanceType: "m5d.large", - availabilityZone: "us-west-2b", - expMaxVolumes: 25, + name: "success normal with overwrite", + instanceID: "i-123456789abcdef01", + instanceType: "t2.medium", + availabilityZone: "us-west-2b", + volumeAttachLimit: 42, + expMaxVolumes: 42, + }, + { + name: "success normal with NVMe", + instanceID: "i-123456789abcdef01", + instanceType: "m5d.large", + availabilityZone: "us-west-2b", + volumeAttachLimit: -1, + expMaxVolumes: 25, + }, + { + name: "success normal with NVMe and overwrite", + instanceID: "i-123456789abcdef01", + instanceType: "m5d.large", + availabilityZone: "us-west-2b", + volumeAttachLimit: 30, + expMaxVolumes: 30, }, } for _, tc := range testCases { @@ -1269,17 +1288,25 @@ func TestNodeGetInfo(t *testing.T) { mockCtl := gomock.NewController(t) defer mockCtl.Finish() + driverOptions := &DriverOptions{ + volumeAttachLimit: tc.volumeAttachLimit, + } + + mockMounter := mocks.NewMockMounter(mockCtl) + mockMetadata := mocks.NewMockMetadataService(mockCtl) mockMetadata.EXPECT().GetInstanceID().Return(tc.instanceID) - mockMetadata.EXPECT().GetInstanceType().Return(tc.instanceType) mockMetadata.EXPECT().GetAvailabilityZone().Return(tc.availabilityZone) - mockMounter := mocks.NewMockMounter(mockCtl) + if tc.volumeAttachLimit < 0 { + mockMetadata.EXPECT().GetInstanceType().Return(tc.instanceType) + } awsDriver := &nodeService{ - metadata: mockMetadata, - mounter: mockMounter, - inFlight: internal.NewInFlight(), + metadata: mockMetadata, + mounter: mockMounter, + inFlight: internal.NewInFlight(), + driverOptions: driverOptions, } resp, err := awsDriver.NodeGetInfo(context.TODO(), &csi.NodeGetInfoRequest{}) diff --git a/pkg/driver/sanity_test.go b/pkg/driver/sanity_test.go index 799eafdb4d..8afb81dc63 100644 --- a/pkg/driver/sanity_test.go +++ b/pkg/driver/sanity_test.go @@ -53,8 +53,9 @@ func TestSanity(t *testing.T) { Region: "region", AvailabilityZone: "az", }, - mounter: newFakeMounter(), - inFlight: internal.NewInFlight(), + mounter: newFakeMounter(), + inFlight: internal.NewInFlight(), + driverOptions: &DriverOptions{}, }, } defer func() {