-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CSI: failed allocation should not block its own controller unpublish #14484
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
```release-note:bug | ||
csi: Fixed a bug where the server would not send controller unpublish for a failed allocation. | ||
``` | ||
|
||
```release-note:bug | ||
csi: Fixed a data race in the volume unpublish endpoint that could result in claims being incorrectly marked as freed before being persisted to raft. | ||
``` | ||
|
||
```release-note:bug | ||
api: Fixed a bug where the List Volume API did not include the `ControllerRequired` and `ResourceExhausted` fields. | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -659,12 +659,14 @@ func (v *CSIVolume) Unpublish(args *structs.CSIVolumeUnpublishRequest, reply *st | |
case structs.CSIVolumeClaimStateReadyToFree: | ||
goto RELEASE_CLAIM | ||
} | ||
vol = vol.Copy() | ||
err = v.nodeUnpublishVolume(vol, claim) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
NODE_DETACHED: | ||
vol = vol.Copy() | ||
err = v.controllerUnpublishVolume(vol, claim) | ||
if err != nil { | ||
return err | ||
|
@@ -684,6 +686,10 @@ RELEASE_CLAIM: | |
return nil | ||
} | ||
|
||
// nodeUnpublishVolume handles the sending RPCs to the Node plugin to unmount | ||
// it. Typically this task is already completed on the client, but we need to | ||
// have this here so that GC can re-send it in case of client-side | ||
// problems. This function should only be called on a copy of the volume. | ||
func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error { | ||
v.logger.Trace("node unpublish", "vol", vol.ID) | ||
|
||
|
@@ -776,8 +782,12 @@ func (v *CSIVolume) nodeUnpublishVolumeImpl(vol *structs.CSIVolume, claim *struc | |
return nil | ||
} | ||
|
||
// controllerUnpublishVolume handles the sending RPCs to the Controller plugin | ||
// to unpublish the volume (detach it from its host). This function should only | ||
// be called on a copy of the volume. | ||
func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error { | ||
v.logger.Trace("controller unpublish", "vol", vol.ID) | ||
|
||
if !vol.ControllerRequired { | ||
claim.State = structs.CSIVolumeClaimStateReadyToFree | ||
return nil | ||
|
@@ -792,26 +802,39 @@ func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *str | |
} else if plugin == nil { | ||
return fmt.Errorf("no such plugin: %q", vol.PluginID) | ||
} | ||
|
||
if !plugin.HasControllerCapability(structs.CSIControllerSupportsAttachDetach) { | ||
claim.State = structs.CSIVolumeClaimStateReadyToFree | ||
return nil | ||
Comment on lines
+807
to
808
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we skip the call to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This assignment is really just for helping out testing. If we return (Same applies to the one below) |
||
} | ||
|
||
// we only send a controller detach if a Nomad client no longer has | ||
// any claim to the volume, so we need to check the status of claimed | ||
// allocations | ||
vol, err = state.CSIVolumeDenormalize(ws, vol) | ||
if err != nil { | ||
return err | ||
} | ||
for _, alloc := range vol.ReadAllocs { | ||
if alloc != nil && alloc.NodeID == claim.NodeID && !alloc.TerminalStatus() { | ||
|
||
// we only send a controller detach if a Nomad client no longer has any | ||
// claim to the volume, so we need to check the status of any other claimed | ||
// allocations | ||
shouldCancel := func(alloc *structs.Allocation) bool { | ||
if alloc != nil && alloc.ID != claim.AllocationID && | ||
alloc.NodeID == claim.NodeID && !alloc.TerminalStatus() { | ||
claim.State = structs.CSIVolumeClaimStateReadyToFree | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm probably missing something, so just to understand this better, is the claim state updated here because it always needs to be set to |
||
v.logger.Debug( | ||
"controller unpublish canceled: another non-terminal alloc is on this node", | ||
"vol", vol.ID, "alloc", alloc.ID) | ||
return true | ||
} | ||
return false | ||
} | ||
|
||
for _, alloc := range vol.ReadAllocs { | ||
if shouldCancel(alloc) { | ||
return nil | ||
} | ||
} | ||
for _, alloc := range vol.WriteAllocs { | ||
if alloc != nil && alloc.NodeID == claim.NodeID && !alloc.TerminalStatus() { | ||
claim.State = structs.CSIVolumeClaimStateReadyToFree | ||
if shouldCancel(alloc) { | ||
return nil | ||
} | ||
} | ||
|
@@ -837,6 +860,8 @@ func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *str | |
if err != nil { | ||
return fmt.Errorf("could not detach from controller: %v", err) | ||
} | ||
|
||
v.logger.Trace("controller detach complete", "vol", vol.ID) | ||
claim.State = structs.CSIVolumeClaimStateReadyToFree | ||
return v.checkpointClaim(vol, claim) | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just checking my understanding: are the two
Copy
calls required (instead of, for example, copying it once before theswitch
statement) becausenodeUnpublishVolume
will eventually callCSIVolumeDenormalize
which will read the volume from the state store again?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right. That isn't guaranteed because
nodeUnpublishVolume
might return before that point if the node has been GC'd, so we can end up copying one extra time uselessly. That's unfortunate but doesn't feel like a big deal as it's a bit of a corner case. The other option would be to try to make it really precise about when we need to copy, but I think we've found that to be really error-prone. (And maybe something we could solve for in the state store itself at some point.)