Skip to content

Commit

Permalink
Prevent deadlocks in persistable channel pause test (#18410)
Browse files Browse the repository at this point in the history
* Prevent deadlocks in persistable channel pause test

Because of reuse of the old paused/resumed channels in this test there
was a potential for deadlock. This PR ensures that the channels are always
reobtained.

It further adds some control code to detect hangs in future - and it
ensures that the pausing warning is not shown on shutdown.

Signed-off-by: Andrew Thornton <[email protected]>

* do not warn but do pause

Signed-off-by: Andrew Thornton <[email protected]>
  • Loading branch information
zeripath authored Jan 25, 2022
1 parent b53fd5f commit 713985b
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 6 deletions.
48 changes: 45 additions & 3 deletions modules/queue/queue_disk_channel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,11 +287,16 @@ func TestPersistableChannelQueue_Pause(t *testing.T) {
assert.Nil(t, result2)

pausable.Resume()
paused, resumed = pausable.IsPausedIsResumed()

select {
case <-paused:
assert.Fail(t, "Queue should be resumed")
return
case <-resumed:
default:
assert.Fail(t, "Queue should be resumed")
return
}

select {
Expand Down Expand Up @@ -345,16 +350,22 @@ func TestPersistableChannelQueue_Pause(t *testing.T) {

pausable.Resume()

paused, resumed = pausable.IsPausedIsResumed()
select {
case <-paused:
assert.Fail(t, "Queue should not be paused")
return
case <-resumed:
default:
assert.Fail(t, "Queue should be resumed")
return
}

select {
case result1 = <-handleChan:
case <-time.After(500 * time.Millisecond):
assert.Fail(t, "handler chan should contain test1")
return
}
assert.Equal(t, test1.TestString, result1.TestString)
assert.Equal(t, test1.TestInt, result1.TestInt)
Expand All @@ -369,7 +380,12 @@ func TestPersistableChannelQueue_Pause(t *testing.T) {
}

// Wait til it is closed
<-queue.(*PersistableChannelQueue).closed
select {
case <-queue.(*PersistableChannelQueue).closed:
case <-time.After(5 * time.Second):
assert.Fail(t, "queue should close")
return
}

err = queue.Push(&test1)
assert.NoError(t, err)
Expand All @@ -378,6 +394,7 @@ func TestPersistableChannelQueue_Pause(t *testing.T) {
select {
case <-handleChan:
assert.Fail(t, "Handler processing should have stopped")
return
default:
}

Expand All @@ -393,6 +410,7 @@ func TestPersistableChannelQueue_Pause(t *testing.T) {
select {
case <-handleChan:
assert.Fail(t, "Handler processing should have stopped")
return
default:
}

Expand Down Expand Up @@ -431,6 +449,7 @@ func TestPersistableChannelQueue_Pause(t *testing.T) {
select {
case <-handleChan:
assert.Fail(t, "Handler processing should have stopped")
return
case <-paused:
}

Expand All @@ -449,13 +468,36 @@ func TestPersistableChannelQueue_Pause(t *testing.T) {
select {
case <-handleChan:
assert.Fail(t, "Handler processing should have stopped")
return
default:
}

pausable.Resume()
paused, resumed = pausable.IsPausedIsResumed()
select {
case <-paused:
assert.Fail(t, "Queue should not be paused")
return
case <-resumed:
default:
assert.Fail(t, "Queue should be resumed")
return
}

result3 := <-handleChan
result4 := <-handleChan
var result3, result4 *testData

select {
case result3 = <-handleChan:
case <-time.After(1 * time.Second):
assert.Fail(t, "Handler processing should have resumed")
return
}
select {
case result4 = <-handleChan:
case <-time.After(1 * time.Second):
assert.Fail(t, "Handler processing should have resumed")
return
}
if result4.TestString == test1.TestString {
result3, result4 = result4, result3
}
Expand Down
11 changes: 8 additions & 3 deletions modules/queue/workerpool.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,14 @@ func (p *WorkerPool) addWorkers(ctx context.Context, cancel context.CancelFunc,
cancel()
}
if p.hasNoWorkerScaling() {
log.Warn(
"Queue: %d is configured to be non-scaling and has no workers - this configuration is likely incorrect.\n"+
"The queue will be paused to prevent data-loss with the assumption that you will add workers and unpause as required.", p.qid)
select {
case <-p.baseCtx.Done():
// Don't warn if the baseCtx is shutdown
default:
log.Warn(
"Queue: %d is configured to be non-scaling and has no workers - this configuration is likely incorrect.\n"+
"The queue will be paused to prevent data-loss with the assumption that you will add workers and unpause as required.", p.qid)
}
p.pause()
}
p.lock.Unlock()
Expand Down

0 comments on commit 713985b

Please sign in to comment.