Skip to content

Commit

Permalink
Fix race condition in engine start/stop (apache#8995)
Browse files Browse the repository at this point in the history
  • Loading branch information
piiswrong authored and zheng-da committed Jun 28, 2018
1 parent 7f7e8dd commit 3f8c019
Showing 1 changed file with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions src/engine/threaded_engine_perdevice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,10 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
int cpu_priority_nthreads = dmlc::GetEnv("MXNET_CPU_PRIORITY_NTHREADS", 4);
cpu_priority_worker_.reset(new ThreadWorkerBlock<kPriorityQueue>());
cpu_priority_worker_->pool.reset(new ThreadPool(
cpu_priority_nthreads, [this]() {
this->CPUWorker(Context(), cpu_priority_worker_.get());
}));
cpu_priority_nthreads,
[this](std::shared_ptr<ThreadPool::SimpleEvent> ready_event) {
this->CPUWorker(Context(), cpu_priority_worker_.get(), ready_event);
}, true));
// GPU tasks will be created lazily
}

Expand All @@ -122,9 +123,10 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
auto ptr =
cpu_normal_workers_.Get(dev_id, [this, ctx, nthread]() {
auto blk = new ThreadWorkerBlock<kWorkerQueue>();
blk->pool.reset(new ThreadPool(nthread, [this, ctx, blk] () {
this->CPUWorker(ctx, blk);
}));
blk->pool.reset(new ThreadPool(nthread,
[this, ctx, blk](std::shared_ptr<ThreadPool::SimpleEvent> ready_event) {
this->CPUWorker(ctx, blk, ready_event);
}, true));
return blk;
});
if (ptr) {
Expand Down Expand Up @@ -259,12 +261,14 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
*/
template<dmlc::ConcurrentQueueType type>
inline void CPUWorker(Context ctx,
ThreadWorkerBlock<type> *block) {
ThreadWorkerBlock<type> *block,
std::shared_ptr<ThreadPool::SimpleEvent> ready_event) {
this->is_worker_ = true;
auto* task_queue = &(block->task_queue);
RunContext run_ctx{ctx, nullptr};
// execute task
OprBlock* opr_block;
ready_event->signal();
while (task_queue->Pop(&opr_block)) {
this->ExecuteOprBlock(run_ctx, opr_block);
}
Expand Down

0 comments on commit 3f8c019

Please sign in to comment.