From f6de248323c2fbb7cbb59b51d7448b2322caec4d Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Fri, 30 Mar 2018 14:08:17 +0800 Subject: [PATCH] fix server shutdown --- paddle/fluid/operators/detail/grpc_server.cc | 10 +++++----- paddle/fluid/operators/listen_and_serv_op.cc | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/operators/detail/grpc_server.cc b/paddle/fluid/operators/detail/grpc_server.cc index 9691d1e86b111d..109c762e749064 100644 --- a/paddle/fluid/operators/detail/grpc_server.cc +++ b/paddle/fluid/operators/detail/grpc_server.cc @@ -174,13 +174,13 @@ void AsyncGRPCServer::ShutdownQueue() { std::unique_lock lock(cq_mutex_); cq_send_->Shutdown(); cq_get_->Shutdown(); - is_shut_down_ = true; } // This URL explains why shutdown is complicate: void AsyncGRPCServer::ShutDown() { - server_->Shutdown(); + is_shut_down_ = true; ShutdownQueue(); + server_->Shutdown(); } void AsyncGRPCServer::TryToRegisterNewSendOne() { @@ -213,14 +213,14 @@ void AsyncGRPCServer::HandleRequest(::grpc::ServerCompletionQueue* cq, bool ok = false; while (true) { if (!cq->Next(&tag, &ok)) { - LOG(INFO) << cq_name << " get CompletionQueue shutdown!"; + LOG(INFO) << cq_name << " CompletionQueue shutdown!"; break; } PADDLE_ENFORCE(tag); // FIXME(typhoonzero): de-couple the barriers with recv_op - if (cq_name == "cq_get") WaitCond(1); - if (cq_name == "cq_send") WaitCond(0); + if (!is_shut_down_ && cq_name == "cq_get") WaitCond(1); + if (!is_shut_down_ && cq_name == "cq_send") WaitCond(0); RequestBase* base = (RequestBase*)tag; // reference: diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index 08b83375dd5462..e45e81a56ee86d 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -70,7 +70,6 @@ class ListenAndServOp : public framework::OperatorBase { void Stop() override { rpc_service_->Push(LISTEN_TERMINATE_MESSAGE); - rpc_service_->ShutDown(); server_thread_->join(); }