From 16cfd8e467e604e27db14739f5bcda40d82c146f Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 7 Sep 2022 16:33:39 -0400 Subject: [PATCH 1/4] csi: fix missing copies in volume unpublish workflow Entities we get from the state store should always be copied before altering. Ensure that we copy the volume in the top-level unpublish workflow before handing off to the steps. --- nomad/csi_endpoint.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/nomad/csi_endpoint.go b/nomad/csi_endpoint.go index 7286450d19c..51087198103 100644 --- a/nomad/csi_endpoint.go +++ b/nomad/csi_endpoint.go @@ -659,12 +659,14 @@ func (v *CSIVolume) Unpublish(args *structs.CSIVolumeUnpublishRequest, reply *st case structs.CSIVolumeClaimStateReadyToFree: goto RELEASE_CLAIM } + vol = vol.Copy() err = v.nodeUnpublishVolume(vol, claim) if err != nil { return err } NODE_DETACHED: + vol = vol.Copy() err = v.controllerUnpublishVolume(vol, claim) if err != nil { return err @@ -684,6 +686,10 @@ RELEASE_CLAIM: return nil } +// nodeUnpublishVolume handles the sending RPCs to the Node plugin to unmount +// it. Typically this task is already completed on the client, but we need to +// have this here so that GC can re-send it in case of client-side +// problems. This function should only be called on a copy of the volume. func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error { v.logger.Trace("node unpublish", "vol", vol.ID) @@ -776,6 +782,9 @@ func (v *CSIVolume) nodeUnpublishVolumeImpl(vol *structs.CSIVolume, claim *struc return nil } +// controllerUnpublishVolume handles the sending RPCs to the Controller plugin +// to unpublish the volume (detach it from its host). This function should only +// be called on a copy of the volume. func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error { v.logger.Trace("controller unpublish", "vol", vol.ID) if !vol.ControllerRequired { From 25e230247c3b7f57c1134823a1f09d338ba06289 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 7 Sep 2022 16:34:00 -0400 Subject: [PATCH 2/4] csi: fix unpopulated fields in List Volumes API The list stub object for volumes in `nomad/structs` did not match the stub object in `api`. The `api` package also did not include the current readers/writers fields that are expected by the UI. True up the two objects and add the previously undocumented fields to the docs. --- api/csi.go | 2 ++ nomad/structs/csi.go | 13 ++++++++----- website/content/api-docs/volumes.mdx | 2 ++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/api/csi.go b/api/csi.go index c9d355d625e..dc31db8f2d4 100644 --- a/api/csi.go +++ b/api/csi.go @@ -384,6 +384,8 @@ type CSIVolumeListStub struct { Topologies []*CSITopology AccessMode CSIVolumeAccessMode AttachmentMode CSIVolumeAttachmentMode + CurrentReaders int + CurrentWriters int Schedulable bool PluginID string Provider string diff --git a/nomad/structs/csi.go b/nomad/structs/csi.go index 75601706b31..447ee523e36 100644 --- a/nomad/structs/csi.go +++ b/nomad/structs/csi.go @@ -367,12 +367,15 @@ type CSIVolListStub struct { Schedulable bool PluginID string Provider string + ControllerRequired bool ControllersHealthy int ControllersExpected int NodesHealthy int NodesExpected int - CreateIndex uint64 - ModifyIndex uint64 + ResourceExhausted time.Time + + CreateIndex uint64 + ModifyIndex uint64 } // NewCSIVolume creates the volume struct. No side-effects @@ -409,7 +412,7 @@ func (v *CSIVolume) RemoteID() string { } func (v *CSIVolume) Stub() *CSIVolListStub { - stub := CSIVolListStub{ + return &CSIVolListStub{ ID: v.ID, Namespace: v.Namespace, Name: v.Name, @@ -422,15 +425,15 @@ func (v *CSIVolume) Stub() *CSIVolListStub { Schedulable: v.Schedulable, PluginID: v.PluginID, Provider: v.Provider, + ControllerRequired: v.ControllerRequired, ControllersHealthy: v.ControllersHealthy, ControllersExpected: v.ControllersExpected, NodesHealthy: v.NodesHealthy, NodesExpected: v.NodesExpected, + ResourceExhausted: v.ResourceExhausted, CreateIndex: v.CreateIndex, ModifyIndex: v.ModifyIndex, } - - return &stub } // ReadSchedulable determines if the volume is potentially schedulable diff --git a/website/content/api-docs/volumes.mdx b/website/content/api-docs/volumes.mdx index d49a8e94a0f..0606b8975b7 100644 --- a/website/content/api-docs/volumes.mdx +++ b/website/content/api-docs/volumes.mdx @@ -78,6 +78,8 @@ $ curl \ ], "AccessMode": "multi-node-single-writer", "AttachmentMode": "file-system", + "CurrentReaders": 2, + "CurrentWriters": 1, "Schedulable": true, "PluginID": "plugin-id1", "Provider": "ebs", From 2dc870cd219c884b5b6a890eff79104aee28b777 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 7 Sep 2022 16:54:34 -0400 Subject: [PATCH 3/4] CSI: allocation should not block its own controller unpublish Nomad user reported problems with CSI volumes associated with failed allocations, where the Nomad server did not send a controller unpublish RPC. The controller unpublish is skipped if other non-terminal allocations on the same node claim the volume. The check has a bug where the allocation belonging to the claim being freed was included in the check incorrectly. During a normal allocation stop for job stop or a new version of the job, the allocation is terminal. But allocations that fail are not yet marked terminal at the point in time when the client sends the unpublish RPC to the server. For CSI plugins that support controller attach/detach, this means that the controller will not be able to detach the volume from the allocation's host and the replacement claim will fail until a GC is run. This changeset fixes the conditional so that the claim's own allocation is not included, and makes the logic easier to read. Include a test case covering this path. --- nomad/csi_endpoint.go | 30 +++++++++++++---- nomad/csi_endpoint_test.go | 69 +++++++++++++++++++++++++++++++------- 2 files changed, 79 insertions(+), 20 deletions(-) diff --git a/nomad/csi_endpoint.go b/nomad/csi_endpoint.go index 51087198103..a8695982be2 100644 --- a/nomad/csi_endpoint.go +++ b/nomad/csi_endpoint.go @@ -787,6 +787,7 @@ func (v *CSIVolume) nodeUnpublishVolumeImpl(vol *structs.CSIVolume, claim *struc // be called on a copy of the volume. func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error { v.logger.Trace("controller unpublish", "vol", vol.ID) + if !vol.ControllerRequired { claim.State = structs.CSIVolumeClaimStateReadyToFree return nil @@ -801,26 +802,39 @@ func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *str } else if plugin == nil { return fmt.Errorf("no such plugin: %q", vol.PluginID) } + if !plugin.HasControllerCapability(structs.CSIControllerSupportsAttachDetach) { + claim.State = structs.CSIVolumeClaimStateReadyToFree return nil } - // we only send a controller detach if a Nomad client no longer has - // any claim to the volume, so we need to check the status of claimed - // allocations vol, err = state.CSIVolumeDenormalize(ws, vol) if err != nil { return err } - for _, alloc := range vol.ReadAllocs { - if alloc != nil && alloc.NodeID == claim.NodeID && !alloc.TerminalStatus() { + + // we only send a controller detach if a Nomad client no longer has any + // claim to the volume, so we need to check the status of any other claimed + // allocations + shouldCancel := func(alloc *structs.Allocation) bool { + if alloc != nil && alloc.ID != claim.AllocationID && + alloc.NodeID == claim.NodeID && !alloc.TerminalStatus() { claim.State = structs.CSIVolumeClaimStateReadyToFree + v.logger.Debug( + "controller unpublish canceled: another non-terminal alloc is on this node", + "vol", vol.ID, "alloc", alloc.ID) + return true + } + return false + } + + for _, alloc := range vol.ReadAllocs { + if shouldCancel(alloc) { return nil } } for _, alloc := range vol.WriteAllocs { - if alloc != nil && alloc.NodeID == claim.NodeID && !alloc.TerminalStatus() { - claim.State = structs.CSIVolumeClaimStateReadyToFree + if shouldCancel(alloc) { return nil } } @@ -846,6 +860,8 @@ func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *str if err != nil { return fmt.Errorf("could not detach from controller: %v", err) } + + v.logger.Trace("controller detach complete", "vol", vol.ID) claim.State = structs.CSIVolumeClaimStateReadyToFree return v.checkpointClaim(vol, claim) } diff --git a/nomad/csi_endpoint_test.go b/nomad/csi_endpoint_test.go index 65e661d35e2..9f24fc12c3b 100644 --- a/nomad/csi_endpoint_test.go +++ b/nomad/csi_endpoint_test.go @@ -7,6 +7,11 @@ import ( "time" msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" + "github.com/shoenig/test" + "github.com/shoenig/test/must" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/hashicorp/nomad/acl" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client" @@ -17,7 +22,6 @@ import ( "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" - "github.com/stretchr/testify/require" ) func TestCSIVolumeEndpoint_Get(t *testing.T) { @@ -499,12 +503,14 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) { }, } index++ - require.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, index, node)) + must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, index, node)) type tc struct { name string startingState structs.CSIVolumeClaimState + endState structs.CSIVolumeClaimState nodeID string + otherNodeID string expectedErrMsg string } testCases := []tc{ @@ -512,24 +518,37 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) { name: "success", startingState: structs.CSIVolumeClaimStateControllerDetached, nodeID: node.ID, + otherNodeID: uuid.Generate(), + }, + { + name: "non-terminal allocation on same node", + startingState: structs.CSIVolumeClaimStateNodeDetached, + nodeID: node.ID, + otherNodeID: node.ID, }, { name: "unpublish previously detached node", startingState: structs.CSIVolumeClaimStateNodeDetached, + endState: structs.CSIVolumeClaimStateNodeDetached, expectedErrMsg: "could not detach from controller: controller detach volume: No path to node", nodeID: node.ID, + otherNodeID: uuid.Generate(), }, { name: "unpublish claim on garbage collected node", startingState: structs.CSIVolumeClaimStateTaken, + endState: structs.CSIVolumeClaimStateNodeDetached, expectedErrMsg: "could not detach from controller: controller detach volume: No path to node", nodeID: uuid.Generate(), + otherNodeID: uuid.Generate(), }, { name: "first unpublish", startingState: structs.CSIVolumeClaimStateTaken, + endState: structs.CSIVolumeClaimStateNodeDetached, expectedErrMsg: "could not detach from controller: controller detach volume: No path to node", nodeID: node.ID, + otherNodeID: uuid.Generate(), }, } @@ -551,15 +570,20 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) { index++ err = state.UpsertCSIVolume(index, []*structs.CSIVolume{vol}) - require.NoError(t, err) + must.NoError(t, err) // setup: create an alloc that will claim our volume alloc := mock.BatchAlloc() alloc.NodeID = tc.nodeID alloc.ClientStatus = structs.AllocClientStatusFailed + otherAlloc := mock.BatchAlloc() + otherAlloc.NodeID = tc.otherNodeID + otherAlloc.ClientStatus = structs.AllocClientStatusRunning + index++ - require.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc})) + must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, index, + []*structs.Allocation{alloc, otherAlloc})) // setup: claim the volume for our alloc claim := &structs.CSIVolumeClaim{ @@ -572,7 +596,20 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) { index++ claim.State = structs.CSIVolumeClaimStateTaken err = state.CSIVolumeClaim(index, ns, volID, claim) - require.NoError(t, err) + must.NoError(t, err) + + // setup: claim the volume for our other alloc + otherClaim := &structs.CSIVolumeClaim{ + AllocationID: otherAlloc.ID, + NodeID: tc.otherNodeID, + ExternalNodeID: "i-example", + Mode: structs.CSIVolumeClaimRead, + } + + index++ + otherClaim.State = structs.CSIVolumeClaimStateTaken + err = state.CSIVolumeClaim(index, ns, volID, otherClaim) + must.NoError(t, err) // test: unpublish and check the results claim.State = tc.startingState @@ -589,17 +626,23 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) { err = msgpackrpc.CallWithCodec(codec, "CSIVolume.Unpublish", req, &structs.CSIVolumeUnpublishResponse{}) + vol, volErr := state.CSIVolumeByID(nil, ns, volID) + must.NoError(t, volErr) + must.NotNil(t, vol) + if tc.expectedErrMsg == "" { - require.NoError(t, err) - vol, err = state.CSIVolumeByID(nil, ns, volID) - require.NoError(t, err) - require.NotNil(t, vol) - require.Len(t, vol.ReadAllocs, 0) + must.NoError(t, err) + assert.Len(t, vol.ReadAllocs, 1) } else { - require.Error(t, err) - require.True(t, strings.Contains(err.Error(), tc.expectedErrMsg), - "error message %q did not contain %q", err.Error(), tc.expectedErrMsg) + must.Error(t, err) + assert.Len(t, vol.ReadAllocs, 2) + test.True(t, strings.Contains(err.Error(), tc.expectedErrMsg), + test.Sprintf("error %v did not contain %q", err, tc.expectedErrMsg)) + claim = vol.PastClaims[alloc.ID] + must.NotNil(t, claim) + test.Eq(t, tc.endState, claim.State) } + }) } From 1038b6874dcfff08ff5f058d6cbf2b82f8360b01 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 7 Sep 2022 16:55:25 -0400 Subject: [PATCH 4/4] changelog entry --- .changelog/14484.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .changelog/14484.txt diff --git a/.changelog/14484.txt b/.changelog/14484.txt new file mode 100644 index 00000000000..305e9b065ad --- /dev/null +++ b/.changelog/14484.txt @@ -0,0 +1,11 @@ +```release-note:bug +csi: Fixed a bug where the server would not send controller unpublish for a failed allocation. +``` + +```release-note:bug +csi: Fixed a data race in the volume unpublish endpoint that could result in claims being incorrectly marked as freed before being persisted to raft. +``` + +```release-note:bug +api: Fixed a bug where the List Volume API did not include the `ControllerRequired` and `ResourceExhausted` fields. +```