From ac209793d926601e1447518f70f36111363dac19 Mon Sep 17 00:00:00 2001
From: kthui <18255193+kthui@users.noreply.github.com>
Date: Wed, 31 May 2023 23:15:34 -0700
Subject: [PATCH] Remove lock returned from scheduler update

---
 src/backend_model.cc            | 13 ++++++-------
 src/dynamic_batch_scheduler.cc  |  6 +-----
 src/dynamic_batch_scheduler.h   |  2 +-
 src/ensemble_scheduler.cc       |  2 +-
 src/ensemble_scheduler.h        |  2 +-
 src/model_lifecycle.cc          |  9 ++++++---
 src/scheduler.h                 | 11 ++++-------
 src/sequence_batch_scheduler.cc | 10 +++-------
 src/sequence_batch_scheduler.h  |  2 +-
 9 files changed, 24 insertions(+), 33 deletions(-)

diff --git a/src/backend_model.cc b/src/backend_model.cc
index 9dcf768af..adcd6d41c 100644
--- a/src/backend_model.cc
+++ b/src/backend_model.cc
@@ -264,17 +264,16 @@ TritonModel::UpdateInstanceGroup(const inference::ModelConfig& new_model_config)
       min_compute_capability_, backend_->BackendAttributes().preferred_groups_,
       &model_config));
   RETURN_IF_ERROR(ValidateInstanceGroup(model_config, min_compute_capability_));
+  // Updating the 'config_' has to be an atomic operation, because it can be
+  // accessed by other threads concurrently.
+  *config_.mutable_instance_group() = model_config.instance_group();
 
   // Prepare the new instances on the new config.
   RETURN_IF_ERROR(TritonModelInstance::SetInstances(
       this, backend_cmdline_config_map_, host_policy_map_, model_config));
-
-  // At this point, the new model config and instances are ready but not yet
-  // written into this object. Update the scheduler and retrieve the lock
-  // holding off new inference requests, and then commit the changes.
-  std::unique_ptr<std::lock_guard<std::mutex>> lock;
-  RETURN_IF_ERROR(scheduler_->Update(&lock));
-  *config_.mutable_instance_group() = model_config.instance_group();
+  // Update the scheduler while the union of the old and new set of instances
+  // are presence.
+  RETURN_IF_ERROR(scheduler_->Update());
   RETURN_IF_ERROR(CommitInstances());
 
   return Status::Success;
diff --git a/src/dynamic_batch_scheduler.cc b/src/dynamic_batch_scheduler.cc
index 35627682b..e6c0faeb6 100644
--- a/src/dynamic_batch_scheduler.cc
+++ b/src/dynamic_batch_scheduler.cc
@@ -171,17 +171,13 @@ DynamicBatchScheduler::~DynamicBatchScheduler()
 }
 
 Status
-DynamicBatchScheduler::Update(
-    std::unique_ptr<std::lock_guard<std::mutex>>* lock)
+DynamicBatchScheduler::Update()
 {
   if (model_instance_ != nullptr) {
     return Status(
         Status::Code::INTERNAL,
         "Cannot update dynamic batch scheduler specific to a model instance");
   }
-  if (lock != nullptr) {
-    lock->reset(new std::lock_guard<std::mutex>(update_mu_));
-  }
   return Status::Success;
 }
 
diff --git a/src/dynamic_batch_scheduler.h b/src/dynamic_batch_scheduler.h
index a30c64a29..d126995b9 100644
--- a/src/dynamic_batch_scheduler.h
+++ b/src/dynamic_batch_scheduler.h
@@ -85,7 +85,7 @@ class DynamicBatchScheduler : public Scheduler {
   }
 
   // \see Scheduler::Update()
-  Status Update(std::unique_ptr<std::lock_guard<std::mutex>>* lock) override;
+  Status Update() override;
 
   // \see Scheduler::Stop()
   void Stop() override { stop_ = true; }
diff --git a/src/ensemble_scheduler.cc b/src/ensemble_scheduler.cc
index 007494442..4bc7f0d5b 100644
--- a/src/ensemble_scheduler.cc
+++ b/src/ensemble_scheduler.cc
@@ -1379,7 +1379,7 @@ EnsembleScheduler::EnsembleScheduler(
 }
 
 Status
-EnsembleScheduler::Update(std::unique_ptr<std::lock_guard<std::mutex>>* lock)
+EnsembleScheduler::Update()
 {
   return Status(Status::Code::INTERNAL, "ensemble scheduler cannot be updated");
 }
diff --git a/src/ensemble_scheduler.h b/src/ensemble_scheduler.h
index 85f938b8c..59f0522b5 100644
--- a/src/ensemble_scheduler.h
+++ b/src/ensemble_scheduler.h
@@ -99,7 +99,7 @@ class EnsembleScheduler : public Scheduler {
   size_t InflightInferenceCount() override { return inflight_count_; }
 
   // \see Scheduler::Update()
-  Status Update(std::unique_ptr<std::lock_guard<std::mutex>>* lock) override;
+  Status Update() override;
 
   // \see Scheduler::Stop()
   void Stop() override {}
diff --git a/src/model_lifecycle.cc b/src/model_lifecycle.cc
index 98189ab2a..e86d422ff 100644
--- a/src/model_lifecycle.cc
+++ b/src/model_lifecycle.cc
@@ -646,9 +646,12 @@ ModelLifeCycle::UpdateModelConfig(
     return;
   }
 
-  // Update model instance group. It is the responsibility of the model to
-  // safeguard the update process, so the 'model_info_lock' is released during
-  // update to allow concurrent inference.
+  // Update model instance group. The 'model_info->mtx_' may be released while
+  // the model is updating its instance group, because no model info from this
+  // model lifecycle object is being accessed by the model during the update,
+  // and model repository manager will ensure no concurrent update on the same
+  // model may take place. Therefore, releasing the lock allows the model to be
+  // accessed which enables inference during update.
   model_info_lock.unlock();
   Status status = model->UpdateInstanceGroup(new_model_config);
   model_info_lock.lock();
diff --git a/src/scheduler.h b/src/scheduler.h
index cab549ac0..8954ab928 100644
--- a/src/scheduler.h
+++ b/src/scheduler.h
@@ -73,13 +73,10 @@ class Scheduler {
   // Return the number of in-flight inferences tracked by the scheduler.
   virtual size_t InflightInferenceCount() = 0;
 
-  // Update the scheduler during a model update (i.e. instances). This function
-  // should block any new inference requests into 'Enqueue()' when ready (i.e.
-  // sequence models), and provide the mutex blocking new requests in locked
-  // state back to the caller, so the caller can finalize its own internal
-  // update and then release the mutex. If Status::Success is returned, then the
-  // is successful. Otherwise, the update has failed.
-  virtual Status Update(std::unique_ptr<std::lock_guard<std::mutex>>* lock) = 0;
+  // Update the scheduler during a model instance update.
+  // If Status::Success is returned, then the update is successful. Otherwise,
+  // the update has failed.
+  virtual Status Update() = 0;
 
   // Instruct the scheduler to stop processing future requests unless they are
   // considered as in-flight.
diff --git a/src/sequence_batch_scheduler.cc b/src/sequence_batch_scheduler.cc
index 8beac5649..b7da1474f 100644
--- a/src/sequence_batch_scheduler.cc
+++ b/src/sequence_batch_scheduler.cc
@@ -133,8 +133,7 @@ SequenceBatchScheduler::Create(
 }
 
 Status
-SequenceBatchScheduler::Update(
-    std::unique_ptr<std::lock_guard<std::mutex>>* lock)
+SequenceBatchScheduler::Update()
 {
   std::unique_lock<std::mutex> lk(mu_);
 
@@ -162,13 +161,10 @@ SequenceBatchScheduler::Update(
   queue_request_cnts_.resize(instance_count, 0);
   CreateBatchers();
 
-  // The update is complete, 'Enqueue()' may resume after the mutex is released.
+  // The update is completed.
   updating_ = false;
   update_complete_cv_.notify_all();
-  if (lock != nullptr) {
-    lock->reset(
-        new std::lock_guard<std::mutex>(*lk.release(), std::adopt_lock));
-  }
+
   return Status::Success;
 }
 
diff --git a/src/sequence_batch_scheduler.h b/src/sequence_batch_scheduler.h
index cbdf66c42..9192ebc2d 100644
--- a/src/sequence_batch_scheduler.h
+++ b/src/sequence_batch_scheduler.h
@@ -74,7 +74,7 @@ class SequenceBatchScheduler : public Scheduler {
   }
 
   // \see Scheduler::Update()
-  Status Update(std::unique_ptr<std::lock_guard<std::mutex>>* lock) override;
+  Status Update() override;
 
   // \see Scheduler::Stop()
   void Stop() override { stop_ = true; }