From ac209793d926601e1447518f70f36111363dac19 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Wed, 31 May 2023 23:15:34 -0700 Subject: [PATCH] Remove lock returned from scheduler update --- src/backend_model.cc | 13 ++++++------- src/dynamic_batch_scheduler.cc | 6 +----- src/dynamic_batch_scheduler.h | 2 +- src/ensemble_scheduler.cc | 2 +- src/ensemble_scheduler.h | 2 +- src/model_lifecycle.cc | 9 ++++++--- src/scheduler.h | 11 ++++------- src/sequence_batch_scheduler.cc | 10 +++------- src/sequence_batch_scheduler.h | 2 +- 9 files changed, 24 insertions(+), 33 deletions(-) diff --git a/src/backend_model.cc b/src/backend_model.cc index 9dcf768af..adcd6d41c 100644 --- a/src/backend_model.cc +++ b/src/backend_model.cc @@ -264,17 +264,16 @@ TritonModel::UpdateInstanceGroup(const inference::ModelConfig& new_model_config) min_compute_capability_, backend_->BackendAttributes().preferred_groups_, &model_config)); RETURN_IF_ERROR(ValidateInstanceGroup(model_config, min_compute_capability_)); + // Updating the 'config_' has to be an atomic operation, because it can be + // accessed by other threads concurrently. + *config_.mutable_instance_group() = model_config.instance_group(); // Prepare the new instances on the new config. RETURN_IF_ERROR(TritonModelInstance::SetInstances( this, backend_cmdline_config_map_, host_policy_map_, model_config)); - - // At this point, the new model config and instances are ready but not yet - // written into this object. Update the scheduler and retrieve the lock - // holding off new inference requests, and then commit the changes. - std::unique_ptr> lock; - RETURN_IF_ERROR(scheduler_->Update(&lock)); - *config_.mutable_instance_group() = model_config.instance_group(); + // Update the scheduler while the union of the old and new set of instances + // are presence. + RETURN_IF_ERROR(scheduler_->Update()); RETURN_IF_ERROR(CommitInstances()); return Status::Success; diff --git a/src/dynamic_batch_scheduler.cc b/src/dynamic_batch_scheduler.cc index 35627682b..e6c0faeb6 100644 --- a/src/dynamic_batch_scheduler.cc +++ b/src/dynamic_batch_scheduler.cc @@ -171,17 +171,13 @@ DynamicBatchScheduler::~DynamicBatchScheduler() } Status -DynamicBatchScheduler::Update( - std::unique_ptr>* lock) +DynamicBatchScheduler::Update() { if (model_instance_ != nullptr) { return Status( Status::Code::INTERNAL, "Cannot update dynamic batch scheduler specific to a model instance"); } - if (lock != nullptr) { - lock->reset(new std::lock_guard(update_mu_)); - } return Status::Success; } diff --git a/src/dynamic_batch_scheduler.h b/src/dynamic_batch_scheduler.h index a30c64a29..d126995b9 100644 --- a/src/dynamic_batch_scheduler.h +++ b/src/dynamic_batch_scheduler.h @@ -85,7 +85,7 @@ class DynamicBatchScheduler : public Scheduler { } // \see Scheduler::Update() - Status Update(std::unique_ptr>* lock) override; + Status Update() override; // \see Scheduler::Stop() void Stop() override { stop_ = true; } diff --git a/src/ensemble_scheduler.cc b/src/ensemble_scheduler.cc index 007494442..4bc7f0d5b 100644 --- a/src/ensemble_scheduler.cc +++ b/src/ensemble_scheduler.cc @@ -1379,7 +1379,7 @@ EnsembleScheduler::EnsembleScheduler( } Status -EnsembleScheduler::Update(std::unique_ptr>* lock) +EnsembleScheduler::Update() { return Status(Status::Code::INTERNAL, "ensemble scheduler cannot be updated"); } diff --git a/src/ensemble_scheduler.h b/src/ensemble_scheduler.h index 85f938b8c..59f0522b5 100644 --- a/src/ensemble_scheduler.h +++ b/src/ensemble_scheduler.h @@ -99,7 +99,7 @@ class EnsembleScheduler : public Scheduler { size_t InflightInferenceCount() override { return inflight_count_; } // \see Scheduler::Update() - Status Update(std::unique_ptr>* lock) override; + Status Update() override; // \see Scheduler::Stop() void Stop() override {} diff --git a/src/model_lifecycle.cc b/src/model_lifecycle.cc index 98189ab2a..e86d422ff 100644 --- a/src/model_lifecycle.cc +++ b/src/model_lifecycle.cc @@ -646,9 +646,12 @@ ModelLifeCycle::UpdateModelConfig( return; } - // Update model instance group. It is the responsibility of the model to - // safeguard the update process, so the 'model_info_lock' is released during - // update to allow concurrent inference. + // Update model instance group. The 'model_info->mtx_' may be released while + // the model is updating its instance group, because no model info from this + // model lifecycle object is being accessed by the model during the update, + // and model repository manager will ensure no concurrent update on the same + // model may take place. Therefore, releasing the lock allows the model to be + // accessed which enables inference during update. model_info_lock.unlock(); Status status = model->UpdateInstanceGroup(new_model_config); model_info_lock.lock(); diff --git a/src/scheduler.h b/src/scheduler.h index cab549ac0..8954ab928 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -73,13 +73,10 @@ class Scheduler { // Return the number of in-flight inferences tracked by the scheduler. virtual size_t InflightInferenceCount() = 0; - // Update the scheduler during a model update (i.e. instances). This function - // should block any new inference requests into 'Enqueue()' when ready (i.e. - // sequence models), and provide the mutex blocking new requests in locked - // state back to the caller, so the caller can finalize its own internal - // update and then release the mutex. If Status::Success is returned, then the - // is successful. Otherwise, the update has failed. - virtual Status Update(std::unique_ptr>* lock) = 0; + // Update the scheduler during a model instance update. + // If Status::Success is returned, then the update is successful. Otherwise, + // the update has failed. + virtual Status Update() = 0; // Instruct the scheduler to stop processing future requests unless they are // considered as in-flight. diff --git a/src/sequence_batch_scheduler.cc b/src/sequence_batch_scheduler.cc index 8beac5649..b7da1474f 100644 --- a/src/sequence_batch_scheduler.cc +++ b/src/sequence_batch_scheduler.cc @@ -133,8 +133,7 @@ SequenceBatchScheduler::Create( } Status -SequenceBatchScheduler::Update( - std::unique_ptr>* lock) +SequenceBatchScheduler::Update() { std::unique_lock lk(mu_); @@ -162,13 +161,10 @@ SequenceBatchScheduler::Update( queue_request_cnts_.resize(instance_count, 0); CreateBatchers(); - // The update is complete, 'Enqueue()' may resume after the mutex is released. + // The update is completed. updating_ = false; update_complete_cv_.notify_all(); - if (lock != nullptr) { - lock->reset( - new std::lock_guard(*lk.release(), std::adopt_lock)); - } + return Status::Success; } diff --git a/src/sequence_batch_scheduler.h b/src/sequence_batch_scheduler.h index cbdf66c42..9192ebc2d 100644 --- a/src/sequence_batch_scheduler.h +++ b/src/sequence_batch_scheduler.h @@ -74,7 +74,7 @@ class SequenceBatchScheduler : public Scheduler { } // \see Scheduler::Update() - Status Update(std::unique_ptr>* lock) override; + Status Update() override; // \see Scheduler::Stop() void Stop() override { stop_ = true; }