Skip to content

Commit

Permalink
Merge pull request #319 from jsafrane/1.2-retry
Browse files Browse the repository at this point in the history
[1.2] Cherry pick: Retry provisioning of volumes after transient error
  • Loading branch information
k8s-ci-robot authored Jul 17, 2019
2 parents 5a561f1 + c2fbcff commit f887747
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 28 deletions.
2 changes: 2 additions & 0 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

87 changes: 65 additions & 22 deletions pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ import (
"strings"
"time"

"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"

"github.com/container-storage-interface/spec/lib/go/csi"
"github.com/kubernetes-csi/csi-lib-utils/connection"
"github.com/kubernetes-csi/external-provisioner/pkg/features"
Expand Down Expand Up @@ -166,6 +169,7 @@ type csiProvisioner struct {

var _ controller.Provisioner = &csiProvisioner{}
var _ controller.BlockProvisioner = &csiProvisioner{}
var _ controller.ProvisionerExt = &csiProvisioner{}

var (
// Each provisioner have a identify string to distinguish with others. This
Expand Down Expand Up @@ -333,8 +337,14 @@ func getVolumeCapability(
}

func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.PersistentVolume, error) {
// The controller should call ProvisionExt() instead, but just in case...
pv, _, err := p.ProvisionExt(options)
return pv, err
}

func (p *csiProvisioner) ProvisionExt(options controller.ProvisionOptions) (*v1.PersistentVolume, controller.ProvisioningState, error) {
if options.StorageClass == nil {
return nil, errors.New("storage class was nil")
return nil, controller.ProvisioningFinished, errors.New("storage class was nil")
}

migratedVolume := false
Expand All @@ -347,7 +357,7 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per
klog.V(2).Infof("translating storage class for in-tree plugin %s to CSI", options.StorageClass.Provisioner)
storageClass, err := csitranslationlib.TranslateInTreeStorageClassToCSI(p.supportsMigrationFromInTreePluginName, options.StorageClass)
if err != nil {
return nil, fmt.Errorf("failed to translate storage class: %v", err)
return nil, controller.ProvisioningFinished, fmt.Errorf("failed to translate storage class: %v", err)
}
options.StorageClass = storageClass
migratedVolume = true
Expand All @@ -360,28 +370,28 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per
if options.PVC.Spec.DataSource != nil {
// PVC.Spec.DataSource.Name is the name of the VolumeSnapshot API object
if options.PVC.Spec.DataSource.Name == "" {
return nil, fmt.Errorf("the PVC source not found for PVC %s", options.PVC.Name)
return nil, controller.ProvisioningFinished, fmt.Errorf("the PVC source not found for PVC %s", options.PVC.Name)
}
if options.PVC.Spec.DataSource.Kind != snapshotKind {
return nil, fmt.Errorf("the PVC source is not the right type. Expected %s, Got %s", snapshotKind, options.PVC.Spec.DataSource.Kind)
return nil, controller.ProvisioningFinished, fmt.Errorf("the PVC source is not the right type. Expected %s, Got %s", snapshotKind, options.PVC.Spec.DataSource.Kind)
}
if *(options.PVC.Spec.DataSource.APIGroup) != snapshotAPIGroup {
return nil, fmt.Errorf("the PVC source does not belong to the right APIGroup. Expected %s, Got %s", snapshotAPIGroup, *(options.PVC.Spec.DataSource.APIGroup))
return nil, controller.ProvisioningFinished, fmt.Errorf("the PVC source does not belong to the right APIGroup. Expected %s, Got %s", snapshotAPIGroup, *(options.PVC.Spec.DataSource.APIGroup))
}
needSnapshotSupport = true
}

if err := p.checkDriverCapabilities(needSnapshotSupport); err != nil {
return nil, err
return nil, controller.ProvisioningFinished, err
}

if options.PVC.Spec.Selector != nil {
return nil, fmt.Errorf("claim Selector is not supported")
return nil, controller.ProvisioningFinished, fmt.Errorf("claim Selector is not supported")
}

pvName, err := makeVolumeName(p.volumeNamePrefix, fmt.Sprintf("%s", options.PVC.ObjectMeta.UID), p.volumeNameUUIDLength)
if err != nil {
return nil, err
return nil, controller.ProvisioningFinished, err
}

fsTypesFound := 0
Expand All @@ -396,7 +406,7 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per
}
}
if fsTypesFound > 1 {
return nil, fmt.Errorf("fstype specified in parameters with both \"fstype\" and \"%s\" keys", prefixedFsTypeKey)
return nil, controller.ProvisioningFinished, fmt.Errorf("fstype specified in parameters with both \"fstype\" and \"%s\" keys", prefixedFsTypeKey)
}
if len(fsType) == 0 {
fsType = defaultFSType
Expand Down Expand Up @@ -424,7 +434,7 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per
if needSnapshotSupport {
volumeContentSource, err := p.getVolumeContentSource(options)
if err != nil {
return nil, fmt.Errorf("error getting snapshot handle for snapshot %s: %v", options.PVC.Spec.DataSource.Name, err)
return nil, controller.ProvisioningNoChange, fmt.Errorf("error getting snapshot handle for snapshot %s: %v", options.PVC.Spec.DataSource.Name, err)
}
req.VolumeContentSource = volumeContentSource
}
Expand All @@ -438,7 +448,7 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per
options.SelectedNode,
p.strictTopology)
if err != nil {
return nil, fmt.Errorf("error generating accessibility requirements: %v", err)
return nil, controller.ProvisioningNoChange, fmt.Errorf("error generating accessibility requirements: %v", err)
}
req.AccessibilityRequirements = requirements
}
Expand All @@ -455,39 +465,42 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per
},
})
if err != nil {
return nil, err
return nil, controller.ProvisioningNoChange, err
}
provisionerCredentials, err := getCredentials(p.client, provisionerSecretRef)
if err != nil {
return nil, err
return nil, controller.ProvisioningNoChange, err
}
req.Secrets = provisionerCredentials

// Resolve controller publish, node stage, node publish secret references
controllerPublishSecretRef, err := getSecretReference(controllerPublishSecretParams, options.StorageClass.Parameters, pvName, options.PVC)
if err != nil {
return nil, err
return nil, controller.ProvisioningNoChange, err
}
nodeStageSecretRef, err := getSecretReference(nodeStageSecretParams, options.StorageClass.Parameters, pvName, options.PVC)
if err != nil {
return nil, err
return nil, controller.ProvisioningNoChange, err
}
nodePublishSecretRef, err := getSecretReference(nodePublishSecretParams, options.StorageClass.Parameters, pvName, options.PVC)
if err != nil {
return nil, err
return nil, controller.ProvisioningNoChange, err
}

req.Parameters, err = removePrefixedParameters(options.StorageClass.Parameters)
if err != nil {
return nil, fmt.Errorf("failed to strip CSI Parameters of prefixed keys: %v", err)
return nil, controller.ProvisioningFinished, fmt.Errorf("failed to strip CSI Parameters of prefixed keys: %v", err)
}

ctx, cancel := context.WithTimeout(context.Background(), p.timeout)
defer cancel()
rep, err = p.csiClient.CreateVolume(ctx, &req)

if err != nil {
return nil, err
if isFinalError(err) {
return nil, controller.ProvisioningFinished, err
}
return nil, controller.ProvisioningInBackground, err
}

if rep.Volume != nil {
Expand All @@ -510,7 +523,8 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per
if err != nil {
capErr = fmt.Errorf("%v. Cleanup of volume %s failed, volume is orphaned: %v", capErr, pvName, err)
}
return nil, capErr
// use InBackground to retry the call, hoping the volume is deleted correctly next time.
return nil, controller.ProvisioningInBackground, capErr
}

pv := &v1.PersistentVolume{
Expand Down Expand Up @@ -558,14 +572,19 @@ func (p *csiProvisioner) Provision(options controller.ProvisionOptions) (*v1.Per
pv, err = csitranslationlib.TranslateCSIPVToInTree(pv)
if err != nil {
klog.Warningf("failed to translate CSI PV to in-tree due to: %v. Deleting provisioned PV", err)
p.Delete(pv)
return nil, err
deleteErr := p.Delete(pv)
if deleteErr != nil {
klog.Warningf("failed to delete partly provisioned PV: %v", deleteErr)
// Retry the call again to clean up the orphan
return nil, controller.ProvisioningInBackground, err
}
return nil, controller.ProvisioningFinished, err
}
}

klog.Infof("successfully created PV %+v", pv.Spec.PersistentVolumeSource)

return pv, nil
return pv, controller.ProvisioningFinished, nil
}

func (p *csiProvisioner) supportsTopology() bool {
Expand Down Expand Up @@ -919,3 +938,27 @@ func deprecationWarning(deprecatedParam, newParam, removalVersion string) string
}
return fmt.Sprintf("\"%s\" is deprecated and will be removed in %s%s", deprecatedParam, removalVersion, newParamPhrase)
}

func isFinalError(err error) bool {
// Sources:
// https://github.com/grpc/grpc/blob/master/doc/statuscodes.md
// https://github.com/container-storage-interface/spec/blob/master/spec.md
st, ok := status.FromError(err)
if !ok {
// This is not gRPC error. The operation must have failed before gRPC
// method was called, otherwise we would get gRPC error.
// We don't know if any previous CreateVolume is in progress, be on the safe side.
return false
}
switch st.Code() {
case codes.Canceled, // gRPC: Client Application cancelled the request
codes.DeadlineExceeded, // gRPC: Timeout
codes.Unavailable, // gRPC: Server shutting down, TCP connection broken - previous CreateVolume() may be still in progress.
codes.ResourceExhausted, // gRPC: Server temporarily out of resources - previous CreateVolume() may be still in progress.
codes.Aborted: // CSI: Operation pending for volume
return false
}
// All other errors mean that provisioning either did not
// even start or failed. It is for sure not in progress.
return true
}
Loading

0 comments on commit f887747

Please sign in to comment.