diff --git a/e2e/csi/csi.go b/e2e/csi/csi.go index 58e784cf6ca..395e3e68b95 100644 --- a/e2e/csi/csi.go +++ b/e2e/csi/csi.go @@ -104,8 +104,13 @@ func (tc *CSIVolumesTest) TestEBSVolumeClaim(f *framework.F) { // Shutdown the writer so we can run a reader. // we could mount the EBS volume with multi-attach, but we // want this test to exercise the unpublish workflow. - // this runs the equivalent of 'nomad job stop -purge' - nomadClient.Jobs().Deregister(writeJobID, true, nil) + // + // TODO(tgross): we should pass true here to run the equivalent + // of 'nomad job stop -purge' but this makes the test really + // racy. Once the unmount hang problem with -purge is fixed, + // we can restore this. + nomadClient.Jobs().Deregister(writeJobID, false, nil) + e2eutil.WaitForAllocStopped(t, nomadClient, writeAllocID) // deploy a job so we can read from the volume readJobID := "read-ebs-" + uuid[0:8] @@ -179,6 +184,7 @@ func (tc *CSIVolumesTest) TestEFSVolumeClaim(f *framework.F) { // does not. // this runs the equivalent of 'nomad job stop' nomadClient.Jobs().Deregister(writeJobID, false, nil) + e2eutil.WaitForAllocStopped(t, nomadClient, writeAllocID) // deploy a job that reads from the volume. readJobID := "read-efs-" + uuid[0:8] diff --git a/e2e/e2eutil/utils.go b/e2e/e2eutil/utils.go index b74af455206..15ce69461a7 100644 --- a/e2e/e2eutil/utils.go +++ b/e2e/e2eutil/utils.go @@ -162,6 +162,29 @@ func WaitForAllocNotPending(t *testing.T, nomadClient *api.Client, allocID strin }) } +func WaitForAllocStopped(t *testing.T, nomadClient *api.Client, allocID string) { + testutil.WaitForResultRetries(retries, func() (bool, error) { + time.Sleep(time.Millisecond * 100) + alloc, _, err := nomadClient.Allocations().Info(allocID, nil) + if err != nil { + return false, err + } + switch alloc.ClientStatus { + case structs.AllocClientStatusComplete: + return true, nil + case structs.AllocClientStatusFailed: + return true, nil + case structs.AllocClientStatusLost: + return true, nil + default: + return false, fmt.Errorf("expected stopped alloc, but was: %s", + alloc.ClientStatus) + } + }, func(err error) { + t.Fatalf("failed to wait on alloc: %v", err) + }) +} + func AllocIDsFromAllocationListStubs(allocs []*api.AllocationListStub) []string { allocIDs := make([]string, 0, len(allocs)) for _, alloc := range allocs { diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go index 5f06ccdde4c..a451817fb58 100644 --- a/nomad/job_endpoint.go +++ b/nomad/job_endpoint.go @@ -709,10 +709,12 @@ func (j *Job) Deregister(args *structs.JobDeregisterRequest, reply *structs.JobD // For a job with volumes, find its volumes before deleting the job volumesToGC := make(map[string]*structs.VolumeRequest) - for _, tg := range job.TaskGroups { - for _, vol := range tg.Volumes { - if vol.Type == structs.VolumeTypeCSI { - volumesToGC[vol.Source] = vol + if job != nil { + for _, tg := range job.TaskGroups { + for _, vol := range tg.Volumes { + if vol.Type == structs.VolumeTypeCSI { + volumesToGC[vol.Source] = vol + } } } }