From 7cff2de942a00bf02cc55e916effe8bf90b458d5 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 7 Oct 2020 10:43:23 -0400 Subject: [PATCH] csi: allow more than 1 writer claim for multi-writer mode (#9040) Fixes a bug where CSI volumes with the `MULTI_NODE_MULTI_WRITER` access mode were using the same logic as `MULTI_NODE_SINGLE_WRITER` to determine whether the volume had writer claims available for scheduling. Extends CSI claim endpoint test to exercise multi-reader and make sure `WriteFreeClaims` is exercised for multi-writer in feasibility test. --- CHANGELOG.md | 1 + nomad/csi_endpoint_test.go | 19 +++++++++++++++++++ nomad/structs/csi.go | 7 ++++++- nomad/structs/csi_test.go | 5 +++++ scheduler/feasible_test.go | 2 +- 5 files changed, 32 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 823b2b729b9..01bf9f07a61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ BUG FIXES: * core: Fixed a bug where blocking queries would not include the query's maximum wait time when calculating whether it was safe to retry. [[GH-8921](https://github.com/hashicorp/nomad/issues/8921)] * consul: Fixed a bug to correctly validate task when using script-checks in group-level services [[GH-8952](https://github.com/hashicorp/nomad/issues/8952)] + * csi: Fixed a bug where multi-writer volumes were allowed only 1 write claim. [[GH-9040](https://github.com/hashicorp/nomad/issues/9040)] ## 0.12.5 (September 17, 2020) diff --git a/nomad/csi_endpoint_test.go b/nomad/csi_endpoint_test.go index 0763f7418c8..03194c3914d 100644 --- a/nomad/csi_endpoint_test.go +++ b/nomad/csi_endpoint_test.go @@ -325,6 +325,25 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) { require.Equal(t, id0, volGetResp.Volume.ID) require.Len(t, volGetResp.Volume.ReadAllocs, 1) require.Len(t, volGetResp.Volume.WriteAllocs, 1) + + // Make a second reader claim + alloc3 := mock.Alloc() + alloc3.JobID = uuid.Generate() + summary = mock.JobSummary(alloc3.JobID) + index++ + require.NoError(t, state.UpsertJobSummary(index, summary)) + index++ + require.NoError(t, state.UpsertAllocs(index, []*structs.Allocation{alloc3})) + claimReq.AllocationID = alloc3.ID + err = msgpackrpc.CallWithCodec(codec, "CSIVolume.Claim", claimReq, claimResp) + require.NoError(t, err) + + // Verify the new claim was set + err = msgpackrpc.CallWithCodec(codec, "CSIVolume.Get", volGetReq, volGetResp) + require.NoError(t, err) + require.Equal(t, id0, volGetResp.Volume.ID) + require.Len(t, volGetResp.Volume.ReadAllocs, 2) + require.Len(t, volGetResp.Volume.WriteAllocs, 1) } // TestCSIVolumeEndpoint_ClaimWithController exercises the VolumeClaim RPC diff --git a/nomad/structs/csi.go b/nomad/structs/csi.go index 2c2b688853c..30404344ddf 100644 --- a/nomad/structs/csi.go +++ b/nomad/structs/csi.go @@ -384,8 +384,13 @@ func (v *CSIVolume) WriteSchedulable() bool { // WriteFreeClaims determines if there are any free write claims available func (v *CSIVolume) WriteFreeClaims() bool { switch v.AccessMode { - case CSIVolumeAccessModeSingleNodeWriter, CSIVolumeAccessModeMultiNodeSingleWriter, CSIVolumeAccessModeMultiNodeMultiWriter: + case CSIVolumeAccessModeSingleNodeWriter, CSIVolumeAccessModeMultiNodeSingleWriter: return len(v.WriteAllocs) == 0 + case CSIVolumeAccessModeMultiNodeMultiWriter: + // the CSI spec doesn't allow for setting a max number of writers. + // we track node resource exhaustion through v.ResourceExhausted + // which is checked in WriteSchedulable + return true default: return false } diff --git a/nomad/structs/csi_test.go b/nomad/structs/csi_test.go index e048ec938a2..d5f63b2413d 100644 --- a/nomad/structs/csi_test.go +++ b/nomad/structs/csi_test.go @@ -36,6 +36,11 @@ func TestCSIVolumeClaim(t *testing.T) { vol.ClaimRelease(claim) require.True(t, vol.ReadSchedulable()) require.True(t, vol.WriteFreeClaims()) + + vol.AccessMode = CSIVolumeAccessModeMultiNodeMultiWriter + require.NoError(t, vol.ClaimWrite(claim, alloc)) + require.NoError(t, vol.ClaimWrite(claim, alloc)) + require.True(t, vol.WriteFreeClaims()) } func TestCSIPluginJobs(t *testing.T) { diff --git a/scheduler/feasible_test.go b/scheduler/feasible_test.go index 941fda3e5f2..1e87bb9a01c 100644 --- a/scheduler/feasible_test.go +++ b/scheduler/feasible_test.go @@ -292,7 +292,7 @@ func TestCSIVolumeChecker(t *testing.T) { vol := structs.NewCSIVolume(vid, index) vol.PluginID = "foo" vol.Namespace = structs.DefaultNamespace - vol.AccessMode = structs.CSIVolumeAccessModeMultiNodeSingleWriter + vol.AccessMode = structs.CSIVolumeAccessModeMultiNodeMultiWriter vol.AttachmentMode = structs.CSIVolumeAttachmentModeFilesystem err := state.CSIVolumeRegister(index, []*structs.CSIVolume{vol}) require.NoError(t, err)