Skip to content

Commit

Permalink
CSI: skip node unpublish on GC'd or down nodes (#13301)
Browse files Browse the repository at this point in the history
If the node has been GC'd or is down, we can't send it a node
unpublish. The CSI spec requires that we don't send the controller
unpublish before the node unpublish, but in the case where a node is
gone we can't know the final fate of the node unpublish step.

The `csi_hook` on the client will unpublish if the allocation has
stopped and if the host is terminated there's no mount for the volume
anyways. So we'll now assume that the node has unpublished at its
end. If it hasn't, any controller unpublish will potentially hang or
error and need to be retried.
  • Loading branch information
tgross authored Jun 9, 2022
1 parent f877436 commit dd1bbbe
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .changelog/13301.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
csi: Fixed a bug where volume claims on lost or garbage collected nodes could not be freed
```
22 changes: 20 additions & 2 deletions nomad/csi_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,25 @@ RELEASE_CLAIM:

func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error {
v.logger.Trace("node unpublish", "vol", vol.ID)

store := v.srv.fsm.State()

// If the node has been GC'd or is down, we can't send it a node
// unpublish. We need to assume the node has unpublished at its
// end. If it hasn't, any controller unpublish will potentially
// hang or error and need to be retried.
if claim.NodeID != "" {
node, err := store.NodeByID(memdb.NewWatchSet(), claim.NodeID)
if err != nil {
return err
}
if node == nil || node.Status == structs.NodeStatusDown {
v.logger.Debug("skipping node unpublish for down or GC'd node")
claim.State = structs.CSIVolumeClaimStateNodeDetached
return v.checkpointClaim(vol, claim)
}
}

if claim.AllocationID != "" {
err := v.nodeUnpublishVolumeImpl(vol, claim)
if err != nil {
Expand All @@ -698,8 +717,7 @@ func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.C
// The RPC sent from the 'nomad node detach' command or GC won't have an
// allocation ID set so we try to unpublish every terminal or invalid
// alloc on the node, all of which will be in PastClaims after denormalizing
state := v.srv.fsm.State()
vol, err := state.CSIVolumeDenormalize(memdb.NewWatchSet(), vol)
vol, err := store.CSIVolumeDenormalize(memdb.NewWatchSet(), vol)
if err != nil {
return err
}
Expand Down
14 changes: 12 additions & 2 deletions nomad/csi_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -504,22 +504,32 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
type tc struct {
name string
startingState structs.CSIVolumeClaimState
nodeID string
expectedErrMsg string
}
testCases := []tc{
{
name: "success",
startingState: structs.CSIVolumeClaimStateControllerDetached,
nodeID: node.ID,
},
{
name: "unpublish previously detached node",
startingState: structs.CSIVolumeClaimStateNodeDetached,
expectedErrMsg: "could not detach from controller: controller detach volume: No path to node",
nodeID: node.ID,
},
{
name: "unpublish claim on garbage collected node",
startingState: structs.CSIVolumeClaimStateTaken,
expectedErrMsg: "could not detach from controller: controller detach volume: No path to node",
nodeID: uuid.Generate(),
},
{
name: "first unpublish",
startingState: structs.CSIVolumeClaimStateTaken,
expectedErrMsg: "could not detach from controller: controller detach volume: No path to node",
nodeID: node.ID,
},
}

Expand All @@ -545,7 +555,7 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {

// setup: create an alloc that will claim our volume
alloc := mock.BatchAlloc()
alloc.NodeID = node.ID
alloc.NodeID = tc.nodeID
alloc.ClientStatus = structs.AllocClientStatusFailed

index++
Expand All @@ -554,7 +564,7 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
// setup: claim the volume for our alloc
claim := &structs.CSIVolumeClaim{
AllocationID: alloc.ID,
NodeID: node.ID,
NodeID: tc.nodeID,
ExternalNodeID: "i-example",
Mode: structs.CSIVolumeClaimRead,
}
Expand Down

0 comments on commit dd1bbbe

Please sign in to comment.