Skip to content

Commit

Permalink
Remove lock returned from scheduler update
Browse files Browse the repository at this point in the history
  • Loading branch information
kthui committed Jun 1, 2023
1 parent e2fc02b commit ac20979
Show file tree
Hide file tree
Showing 9 changed files with 24 additions and 33 deletions.
13 changes: 6 additions & 7 deletions src/backend_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -264,17 +264,16 @@ TritonModel::UpdateInstanceGroup(const inference::ModelConfig& new_model_config)
min_compute_capability_, backend_->BackendAttributes().preferred_groups_,
&model_config));
RETURN_IF_ERROR(ValidateInstanceGroup(model_config, min_compute_capability_));
// Updating the 'config_' has to be an atomic operation, because it can be
// accessed by other threads concurrently.
*config_.mutable_instance_group() = model_config.instance_group();

// Prepare the new instances on the new config.
RETURN_IF_ERROR(TritonModelInstance::SetInstances(
this, backend_cmdline_config_map_, host_policy_map_, model_config));

// At this point, the new model config and instances are ready but not yet
// written into this object. Update the scheduler and retrieve the lock
// holding off new inference requests, and then commit the changes.
std::unique_ptr<std::lock_guard<std::mutex>> lock;
RETURN_IF_ERROR(scheduler_->Update(&lock));
*config_.mutable_instance_group() = model_config.instance_group();
// Update the scheduler while the union of the old and new set of instances
// are presence.
RETURN_IF_ERROR(scheduler_->Update());
RETURN_IF_ERROR(CommitInstances());

return Status::Success;
Expand Down
6 changes: 1 addition & 5 deletions src/dynamic_batch_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,17 +171,13 @@ DynamicBatchScheduler::~DynamicBatchScheduler()
}

Status
DynamicBatchScheduler::Update(
std::unique_ptr<std::lock_guard<std::mutex>>* lock)
DynamicBatchScheduler::Update()
{
if (model_instance_ != nullptr) {
return Status(
Status::Code::INTERNAL,
"Cannot update dynamic batch scheduler specific to a model instance");
}
if (lock != nullptr) {
lock->reset(new std::lock_guard<std::mutex>(update_mu_));
}
return Status::Success;
}

Expand Down
2 changes: 1 addition & 1 deletion src/dynamic_batch_scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class DynamicBatchScheduler : public Scheduler {
}

// \see Scheduler::Update()
Status Update(std::unique_ptr<std::lock_guard<std::mutex>>* lock) override;
Status Update() override;

// \see Scheduler::Stop()
void Stop() override { stop_ = true; }
Expand Down
2 changes: 1 addition & 1 deletion src/ensemble_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1379,7 +1379,7 @@ EnsembleScheduler::EnsembleScheduler(
}

Status
EnsembleScheduler::Update(std::unique_ptr<std::lock_guard<std::mutex>>* lock)
EnsembleScheduler::Update()
{
return Status(Status::Code::INTERNAL, "ensemble scheduler cannot be updated");
}
Expand Down
2 changes: 1 addition & 1 deletion src/ensemble_scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class EnsembleScheduler : public Scheduler {
size_t InflightInferenceCount() override { return inflight_count_; }

// \see Scheduler::Update()
Status Update(std::unique_ptr<std::lock_guard<std::mutex>>* lock) override;
Status Update() override;

// \see Scheduler::Stop()
void Stop() override {}
Expand Down
9 changes: 6 additions & 3 deletions src/model_lifecycle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -646,9 +646,12 @@ ModelLifeCycle::UpdateModelConfig(
return;
}

// Update model instance group. It is the responsibility of the model to
// safeguard the update process, so the 'model_info_lock' is released during
// update to allow concurrent inference.
// Update model instance group. The 'model_info->mtx_' may be released while
// the model is updating its instance group, because no model info from this
// model lifecycle object is being accessed by the model during the update,
// and model repository manager will ensure no concurrent update on the same
// model may take place. Therefore, releasing the lock allows the model to be
// accessed which enables inference during update.
model_info_lock.unlock();
Status status = model->UpdateInstanceGroup(new_model_config);
model_info_lock.lock();
Expand Down
11 changes: 4 additions & 7 deletions src/scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,10 @@ class Scheduler {
// Return the number of in-flight inferences tracked by the scheduler.
virtual size_t InflightInferenceCount() = 0;

// Update the scheduler during a model update (i.e. instances). This function
// should block any new inference requests into 'Enqueue()' when ready (i.e.
// sequence models), and provide the mutex blocking new requests in locked
// state back to the caller, so the caller can finalize its own internal
// update and then release the mutex. If Status::Success is returned, then the
// is successful. Otherwise, the update has failed.
virtual Status Update(std::unique_ptr<std::lock_guard<std::mutex>>* lock) = 0;
// Update the scheduler during a model instance update.
// If Status::Success is returned, then the update is successful. Otherwise,
// the update has failed.
virtual Status Update() = 0;

// Instruct the scheduler to stop processing future requests unless they are
// considered as in-flight.
Expand Down
10 changes: 3 additions & 7 deletions src/sequence_batch_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,7 @@ SequenceBatchScheduler::Create(
}

Status
SequenceBatchScheduler::Update(
std::unique_ptr<std::lock_guard<std::mutex>>* lock)
SequenceBatchScheduler::Update()
{
std::unique_lock<std::mutex> lk(mu_);

Expand Down Expand Up @@ -162,13 +161,10 @@ SequenceBatchScheduler::Update(
queue_request_cnts_.resize(instance_count, 0);
CreateBatchers();

// The update is complete, 'Enqueue()' may resume after the mutex is released.
// The update is completed.
updating_ = false;
update_complete_cv_.notify_all();
if (lock != nullptr) {
lock->reset(
new std::lock_guard<std::mutex>(*lk.release(), std::adopt_lock));
}

return Status::Success;
}

Expand Down
2 changes: 1 addition & 1 deletion src/sequence_batch_scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class SequenceBatchScheduler : public Scheduler {
}

// \see Scheduler::Update()
Status Update(std::unique_ptr<std::lock_guard<std::mutex>>* lock) override;
Status Update() override;

// \see Scheduler::Stop()
void Stop() override { stop_ = true; }
Expand Down

0 comments on commit ac20979

Please sign in to comment.