diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 8be302e176..d150226f6e 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -25,6 +25,9 @@ import ( "strings" "time" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "github.com/container-storage-interface/spec/lib/go/csi" "github.com/kubernetes-csi/csi-lib-utils/connection" "github.com/kubernetes-csi/external-provisioner/pkg/features" @@ -179,6 +182,7 @@ type csiProvisioner struct { var _ controller.Provisioner = &csiProvisioner{} var _ controller.BlockProvisioner = &csiProvisioner{} +var _ controller.ProvisionerExt = &csiProvisioner{} var ( // Each provisioner have a identify string to distinguish with others. This @@ -353,8 +357,14 @@ func getVolumeCapability( } func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.PersistentVolume, error) { + // The controller should call ProvisionExt() instead, but just in case... + pv, _, err := p.ProvisionExt(options) + return pv, err +} + +func (p *csiProvisioner) ProvisionExt(options controller.ProvisionOptions) (*v1.PersistentVolume, controller.ProvisioningState, error) { if options.StorageClass == nil { - return nil, errors.New("storage class was nil") + return nil, controller.ProvisioningFinished, errors.New("storage class was nil") } migratedVolume := false @@ -367,7 +377,7 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per klog.V(2).Infof("translating storage class for in-tree plugin %s to CSI", options.StorageClass.Provisioner) storageClass, err := csitranslationlib.TranslateInTreeStorageClassToCSI(p.supportsMigrationFromInTreePluginName, options.StorageClass) if err != nil { - return nil, fmt.Errorf("failed to translate storage class: %v", err) + return nil, controller.ProvisioningFinished, fmt.Errorf("failed to translate storage class: %v", err) } options.StorageClass = storageClass migratedVolume = true @@ -381,13 +391,13 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per if options.PVC.Spec.DataSource != nil { // PVC.Spec.DataSource.Name is the name of the VolumeSnapshot API object if options.PVC.Spec.DataSource.Name == "" { - return nil, fmt.Errorf("the PVC source not found for PVC %s", options.PVC.Name) + return nil, controller.ProvisioningFinished, fmt.Errorf("the PVC source not found for PVC %s", options.PVC.Name) } switch options.PVC.Spec.DataSource.Kind { case snapshotKind: if *(options.PVC.Spec.DataSource.APIGroup) != snapshotAPIGroup { - return nil, fmt.Errorf("the PVC source does not belong to the right APIGroup. Expected %s, Got %s", snapshotAPIGroup, *(options.PVC.Spec.DataSource.APIGroup)) + return nil, controller.ProvisioningFinished, fmt.Errorf("the PVC source does not belong to the right APIGroup. Expected %s, Got %s", snapshotAPIGroup, *(options.PVC.Spec.DataSource.APIGroup)) } rc.snapshot = true case pvcKind: @@ -397,16 +407,16 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per } } if err := p.checkDriverCapabilities(rc); err != nil { - return nil, err + return nil, controller.ProvisioningFinished, err } if options.PVC.Spec.Selector != nil { - return nil, fmt.Errorf("claim Selector is not supported") + return nil, controller.ProvisioningFinished, fmt.Errorf("claim Selector is not supported") } pvName, err := makeVolumeName(p.volumeNamePrefix, fmt.Sprintf("%s", options.PVC.ObjectMeta.UID), p.volumeNameUUIDLength) if err != nil { - return nil, err + return nil, controller.ProvisioningFinished, err } fsTypesFound := 0 @@ -421,7 +431,7 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per } } if fsTypesFound > 1 { - return nil, fmt.Errorf("fstype specified in parameters with both \"fstype\" and \"%s\" keys", prefixedFsTypeKey) + return nil, controller.ProvisioningFinished, fmt.Errorf("fstype specified in parameters with both \"fstype\" and \"%s\" keys", prefixedFsTypeKey) } if len(fsType) == 0 { fsType = defaultFSType @@ -449,7 +459,7 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per if options.PVC.Spec.DataSource != nil && (rc.clone || rc.snapshot) { volumeContentSource, err := p.getVolumeContentSource(options) if err != nil { - return nil, fmt.Errorf("error getting handle for DataSource Type %s by Name %s: %v", options.PVC.Spec.DataSource.Kind, options.PVC.Spec.DataSource.Name, err) + return nil, controller.ProvisioningNoChange, fmt.Errorf("error getting handle for DataSource Type %s by Name %s: %v", options.PVC.Spec.DataSource.Kind, options.PVC.Spec.DataSource.Name, err) } req.VolumeContentSource = volumeContentSource } @@ -463,7 +473,7 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per options.SelectedNode, p.strictTopology) if err != nil { - return nil, fmt.Errorf("error generating accessibility requirements: %v", err) + return nil, controller.ProvisioningNoChange, fmt.Errorf("error generating accessibility requirements: %v", err) } req.AccessibilityRequirements = requirements } @@ -480,35 +490,35 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per }, }) if err != nil { - return nil, err + return nil, controller.ProvisioningNoChange, err } provisionerCredentials, err := getCredentials(p.client, provisionerSecretRef) if err != nil { - return nil, err + return nil, controller.ProvisioningNoChange, err } req.Secrets = provisionerCredentials // Resolve controller publish, node stage, node publish secret references controllerPublishSecretRef, err := getSecretReference(controllerPublishSecretParams, options.StorageClass.Parameters, pvName, options.PVC) if err != nil { - return nil, err + return nil, controller.ProvisioningNoChange, err } nodeStageSecretRef, err := getSecretReference(nodeStageSecretParams, options.StorageClass.Parameters, pvName, options.PVC) if err != nil { - return nil, err + return nil, controller.ProvisioningNoChange, err } nodePublishSecretRef, err := getSecretReference(nodePublishSecretParams, options.StorageClass.Parameters, pvName, options.PVC) if err != nil { - return nil, err + return nil, controller.ProvisioningNoChange, err } controllerExpandSecretRef, err := getSecretReference(controllerExpandSecretParams, options.StorageClass.Parameters, pvName, options.PVC) if err != nil { - return nil, err + return nil, controller.ProvisioningNoChange, err } req.Parameters, err = removePrefixedParameters(options.StorageClass.Parameters) if err != nil { - return nil, fmt.Errorf("failed to strip CSI Parameters of prefixed keys: %v", err) + return nil, controller.ProvisioningFinished, fmt.Errorf("failed to strip CSI Parameters of prefixed keys: %v", err) } ctx, cancel := context.WithTimeout(context.Background(), p.timeout) @@ -516,7 +526,10 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per rep, err = p.csiClient.CreateVolume(ctx, &req) if err != nil { - return nil, err + if isFinalError(err) { + return nil, controller.ProvisioningFinished, err + } + return nil, controller.ProvisioningInBackground, err } if rep.Volume != nil { @@ -539,7 +552,8 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per if err != nil { capErr = fmt.Errorf("%v. Cleanup of volume %s failed, volume is orphaned: %v", capErr, pvName, err) } - return nil, capErr + // use InBackground to retry the call, hoping the volume is deleted correctly next time. + return nil, controller.ProvisioningInBackground, capErr } pv := &v1.PersistentVolume{ @@ -589,13 +603,13 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per if err != nil { klog.Warningf("failed to translate CSI PV to in-tree due to: %v. Deleting provisioned PV", err) p.Delete(pv) - return nil, err + return nil, controller.ProvisioningFinished, err } } klog.Infof("successfully created PV %+v", pv.Spec.PersistentVolumeSource) - return pv, nil + return pv, controller.ProvisioningFinished, nil } func (p *csiProvisioner) supportsTopology() bool { @@ -1004,3 +1018,26 @@ func deprecationWarning(deprecatedParam, newParam, removalVersion string) string } return fmt.Sprintf("\"%s\" is deprecated and will be removed in %s%s", deprecatedParam, removalVersion, newParamPhrase) } + +func isFinalError(err error) bool { + // Sources: + // https://github.com/grpc/grpc/blob/master/doc/statuscodes.md + // https://github.com/container-storage-interface/spec/blob/master/spec.md + st, ok := status.FromError(err) + if !ok { + // This is not gRPC error. The operation must have failed before gRPC + // method was called, otherwise we would get gRPC error. + // We don't know if any previous CreateVolume is in progress, be on the safe side. + return false + } + switch st.Code() { + case codes.Canceled, // gRPC: Client Application cancelled the request + codes.DeadlineExceeded, // gRPC: Timeout + codes.Unavailable, // gRPC: Server shutting down, TCP connection broken - previous CreateVolume() may be still in progress. + codes.ResourceExhausted: // gRPC: Server temporarily out of resources - previous Provision() may be still in progress. + return false + } + // All other errors mean that provisioning either did not + // even start or failed. It is for sure not in progress. + return true +} diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index ba13a7f148..ea1ce764ad 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -27,6 +27,9 @@ import ( "testing" "time" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "github.com/container-storage-interface/spec/lib/go/csi" "github.com/golang/mock/gomock" "github.com/kubernetes-csi/csi-lib-utils/connection" @@ -766,7 +769,9 @@ type provisioningTestcase struct { volWithLessCap bool expectedPVSpec *pvSpec withSecretRefs bool + createVolumeError error expectErr bool + expectState controller.ProvisioningState expectCreateVolDo interface{} } @@ -809,6 +814,7 @@ func TestProvision(t *testing.T) { }, }, }, + expectState: controller.ProvisioningFinished, }, "multiple fsType provision": { volOpts: controller.ProvisionOptions{ @@ -822,7 +828,8 @@ func TestProvision(t *testing.T) { PVName: "test-name", PVC: createFakePVC(requestedBytes), }, - expectErr: true, + expectErr: true, + expectState: controller.ProvisioningFinished, }, "provision with prefixed FS Type key": { volOpts: controller.ProvisionOptions{ @@ -855,6 +862,7 @@ func TestProvision(t *testing.T) { t.Errorf("Parameters should have been stripped") } }, + expectState: controller.ProvisioningFinished, }, "provision with access mode multi node multi writer": { volOpts: controller.ProvisionOptions{ @@ -905,6 +913,7 @@ func TestProvision(t *testing.T) { t.Errorf("Expected multi_node_multi_writer") } }, + expectState: controller.ProvisioningFinished, }, "provision with access mode multi node multi readonly": { volOpts: controller.ProvisionOptions{ @@ -955,6 +964,7 @@ func TestProvision(t *testing.T) { t.Errorf("Expected multi_node_reader_only") } }, + expectState: controller.ProvisioningFinished, }, "provision with access mode single writer": { volOpts: controller.ProvisionOptions{ @@ -1005,6 +1015,7 @@ func TestProvision(t *testing.T) { t.Errorf("Expected single_node_writer") } }, + expectState: controller.ProvisioningFinished, }, "provision with multiple access modes": { volOpts: controller.ProvisionOptions{ @@ -1061,6 +1072,7 @@ func TestProvision(t *testing.T) { t.Errorf("Expected single_node_writer") } }, + expectState: controller.ProvisioningFinished, }, "provision with secrets": { volOpts: controller.ProvisionOptions{ @@ -1103,6 +1115,7 @@ func TestProvision(t *testing.T) { }, }, }, + expectState: controller.ProvisioningFinished, }, "provision with volume mode(Filesystem)": { volOpts: controller.ProvisionOptions{ @@ -1129,6 +1142,7 @@ func TestProvision(t *testing.T) { }, }, }, + expectState: controller.ProvisioningFinished, }, "provision with volume mode(Block)": { volOpts: controller.ProvisionOptions{ @@ -1154,6 +1168,7 @@ func TestProvision(t *testing.T) { }, }, }, + expectState: controller.ProvisioningFinished, }, "fail to get secret reference": { volOpts: controller.ProvisionOptions{ @@ -1165,6 +1180,7 @@ func TestProvision(t *testing.T) { }, getSecretRefErr: true, expectErr: true, + expectState: controller.ProvisioningNoChange, }, "fail not nil selector": { volOpts: controller.ProvisionOptions{ @@ -1173,6 +1189,7 @@ func TestProvision(t *testing.T) { }, notNilSelector: true, expectErr: true, + expectState: controller.ProvisioningFinished, }, "fail to make volume name": { volOpts: controller.ProvisionOptions{ @@ -1181,6 +1198,7 @@ func TestProvision(t *testing.T) { }, makeVolumeNameErr: true, expectErr: true, + expectState: controller.ProvisioningFinished, }, "fail to get credentials": { volOpts: controller.ProvisionOptions{ @@ -1192,6 +1210,7 @@ func TestProvision(t *testing.T) { }, getCredentialsErr: true, expectErr: true, + expectState: controller.ProvisioningNoChange, }, "fail vol with less capacity": { volOpts: controller.ProvisionOptions{ @@ -1203,6 +1222,7 @@ func TestProvision(t *testing.T) { }, volWithLessCap: true, expectErr: true, + expectState: controller.ProvisioningInBackground, }, "provision with mount options": { volOpts: controller.ProvisionOptions{ @@ -1261,6 +1281,65 @@ func TestProvision(t *testing.T) { t.Errorf("Expected 2 mount options") } }, + expectState: controller.ProvisioningFinished, + }, + "provision with final error": { + volOpts: controller.ProvisionOptions{ + StorageClass: &storagev1.StorageClass{ + Parameters: map[string]string{}, + ReclaimPolicy: &deletePolicy, + }, + PVName: "test-name", + PVC: &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + UID: "testid", + }, + Spec: v1.PersistentVolumeClaimSpec{ + Selector: nil, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName(v1.ResourceStorage): resource.MustParse(strconv.FormatInt(requestedBytes, 10)), + }, + }, + AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce}, + }, + }, + }, + createVolumeError: status.Error(codes.Unauthenticated, "Mock final error"), + expectCreateVolDo: func(ctx context.Context, req *csi.CreateVolumeRequest) { + // intentionally empty + }, + expectErr: true, + expectState: controller.ProvisioningFinished, + }, + "provision with transient error": { + volOpts: controller.ProvisionOptions{ + StorageClass: &storagev1.StorageClass{ + Parameters: map[string]string{}, + ReclaimPolicy: &deletePolicy, + }, + PVName: "test-name", + PVC: &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + UID: "testid", + }, + Spec: v1.PersistentVolumeClaimSpec{ + Selector: nil, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName(v1.ResourceStorage): resource.MustParse(strconv.FormatInt(requestedBytes, 10)), + }, + }, + AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce}, + }, + }, + }, + createVolumeError: status.Error(codes.DeadlineExceeded, "Mock timeout"), + expectCreateVolDo: func(ctx context.Context, req *csi.CreateVolumeRequest) { + // intentionally empty + }, + expectErr: true, + expectState: controller.ProvisioningInBackground, }, } @@ -1370,18 +1449,18 @@ func runProvisionTest(t *testing.T, k string, tc provisioningTestcase, requested tc.volOpts.StorageClass.Parameters[provisionerSecretNamespaceKey] = "default" } else if tc.volWithLessCap { out.Volume.CapacityBytes = int64(80) - controllerServer.EXPECT().CreateVolume(gomock.Any(), gomock.Any()).Return(out, nil).Times(1) - controllerServer.EXPECT().DeleteVolume(gomock.Any(), gomock.Any()).Return(&csi.DeleteVolumeResponse{}, nil).Times(1) + controllerServer.EXPECT().CreateVolume(gomock.Any(), gomock.Any()).Return(out, tc.createVolumeError).Times(1) + controllerServer.EXPECT().DeleteVolume(gomock.Any(), gomock.Any()).Return(&csi.DeleteVolumeResponse{}, tc.createVolumeError).Times(1) } else if tc.expectCreateVolDo != nil { - controllerServer.EXPECT().CreateVolume(gomock.Any(), gomock.Any()).Do(tc.expectCreateVolDo).Return(out, nil).Times(1) + controllerServer.EXPECT().CreateVolume(gomock.Any(), gomock.Any()).Do(tc.expectCreateVolDo).Return(out, tc.createVolumeError).Times(1) } else { // Setup regular mock call expectations. if !tc.expectErr { - controllerServer.EXPECT().CreateVolume(gomock.Any(), gomock.Any()).Return(out, nil).Times(1) + controllerServer.EXPECT().CreateVolume(gomock.Any(), gomock.Any()).Return(out, tc.createVolumeError).Times(1) } } - pv, err := csiProvisioner.Provision(tc.volOpts) + pv, state, err := csiProvisioner.(controller.ProvisionerExt).ProvisionExt(tc.volOpts) if tc.expectErr && err == nil { t.Errorf("test %q: Expected error, got none", k) } @@ -1389,6 +1468,13 @@ func runProvisionTest(t *testing.T, k string, tc provisioningTestcase, requested t.Errorf("test %q: got error: %v", k, err) } + if tc.expectState == "" { + tc.expectState = controller.ProvisioningFinished + } + if tc.expectState != state { + t.Errorf("test %q: expected ProvisioningState %s, got %s", k, tc.expectState, state) + } + if tc.expectedPVSpec != nil { if pv.Name != tc.expectedPVSpec.Name { t.Errorf("test %q: expected PV name: %q, got: %q", k, tc.expectedPVSpec.Name, pv.Name)