Skip to content

Commit

Permalink
fix: should check model status before start it (#1277)
Browse files Browse the repository at this point in the history
* fix: check model status before start

* fix: read timeout for checking update

* fix: only set logger for engine once
  • Loading branch information
vansangpfiev authored Sep 20, 2024
1 parent d9c5b41 commit 3f7d3ec
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 38 deletions.
31 changes: 7 additions & 24 deletions engine/commands/chat_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
#include "httplib.h"

#include "cortex_upd_cmd.h"
#include "model_status_cmd.h"
#include "server_start_cmd.h"
#include "trantor/utils/Logger.h"
#include "utils/logging_utils.h"
#include "server_start_cmd.h"

namespace commands {
namespace {
Expand Down Expand Up @@ -45,29 +46,11 @@ void ChatCmd::Exec(std::string msg) {
}

auto address = host_ + ":" + std::to_string(port_);
// Check if model is loaded
// TODO(sang) only llamacpp support modelstatus for now
if (mc_.engine.find("llamacpp") != std::string::npos) {
httplib::Client cli(address);
nlohmann::json json_data;
json_data["model"] = mc_.name;
json_data["engine"] = mc_.engine;

auto data_str = json_data.dump();

// TODO: move this to another message?
auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
data_str.data(), data_str.size(), "application/json");
if (res) {
if (res->status != httplib::StatusCode::OK_200) {
CTL_ERR(res->body);
return;
}
} else {
auto err = res.error();
CTL_ERR("HTTP error: " << httplib::to_string(err));
return;
}
// Only check if llamacpp engine
if ((mc_.engine.find("llamacpp") != std::string::npos) &&
!commands::ModelStatusCmd().IsLoaded(host_, port_, mc_)) {
CLI_LOG("Model is not loaded yet!");
return;
}

// Some instruction for user here
Expand Down
1 change: 1 addition & 0 deletions engine/commands/cortex_upd_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ inline void CheckNewUpdate() {

httplib::Client cli(host_name);
cli.set_connection_timeout(kTimeoutCheckUpdate);
cli.set_read_timeout(kTimeoutCheckUpdate);
if (auto res = cli.Get(release_path)) {
if (res->status == httplib::StatusCode::OK_200) {
try {
Expand Down
7 changes: 7 additions & 0 deletions engine/commands/model_start_cmd.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "model_start_cmd.h"
#include "cortex_upd_cmd.h"
#include "httplib.h"
#include "model_status_cmd.h"
#include "nlohmann/json.hpp"
#include "server_start_cmd.h"
#include "trantor/utils/Logger.h"
Expand All @@ -19,6 +20,12 @@ bool ModelStartCmd::Exec() {
<< commands::GetCortexBinary() << " start` to start server!");
return false;
}
// Only check for llamacpp for now
if ((mc_.engine.find("llamacpp") != std::string::npos) &&
commands::ModelStatusCmd().IsLoaded(host_, port_, mc_)) {
CLI_LOG("Model has already been started!");
return true;
}

httplib::Client cli(host_ + ":" + std::to_string(port_));

Expand Down
31 changes: 31 additions & 0 deletions engine/commands/model_status_cmd.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "model_status_cmd.h"
#include "config/yaml_config.h"
#include "httplib.h"
#include "nlohmann/json.hpp"
#include "utils/logging_utils.h"

namespace commands {
bool ModelStatusCmd::IsLoaded(const std::string& host, int port,
const config::ModelConfig& mc) {
httplib::Client cli(host + ":" + std::to_string(port));
nlohmann::json json_data;
json_data["model"] = mc.name;
json_data["engine"] = mc.engine;

auto data_str = json_data.dump();

auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
data_str.data(), data_str.size(), "application/json");
if (res) {
if (res->status == httplib::StatusCode::OK_200) {
return true;
}
} else {
auto err = res.error();
CTL_WRN("HTTP error: " << httplib::to_string(err));
return false;
}

return false;
}
} // namespace commands
12 changes: 12 additions & 0 deletions engine/commands/model_status_cmd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once
#include <string>
#include "config/yaml_config.h"

namespace commands {

class ModelStatusCmd {
public:
bool IsLoaded(const std::string& host, int port,
const config::ModelConfig& mc);
};
} // namespace commands
17 changes: 12 additions & 5 deletions engine/commands/run_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "cmd_info.h"
#include "config/yaml_config.h"
#include "model_start_cmd.h"
#include "model_status_cmd.h"
#include "server_start_cmd.h"
#include "utils/file_manager_utils.h"

Expand Down Expand Up @@ -47,21 +48,27 @@ void RunCmd::Exec() {
}
}

// Start model
config::YamlHandler yaml_handler;
yaml_handler.ModelConfigFromFile(
file_manager_utils::GetModelsContainerPath().string() + "/" + model_file +
".yaml");
auto mc = yaml_handler.GetModelConfig();

// Always start model if not llamacpp
// If it is llamacpp, then check model status first
{
ModelStartCmd msc(host_, port_, yaml_handler.GetModelConfig());
if (!msc.Exec()) {
return;
if ((mc.engine.find("llamacpp") == std::string::npos) ||
!commands::ModelStatusCmd().IsLoaded(host_, port_, mc)) {
ModelStartCmd msc(host_, port_, mc);
if (!msc.Exec()) {
return;
}
}
}

// Chat
{
ChatCmd cc(host_, port_, yaml_handler.GetModelConfig());
ChatCmd cc(host_, port_, mc);
cc.Exec("");
}
}
Expand Down
20 changes: 11 additions & 9 deletions engine/controllers/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -342,20 +342,22 @@ void server::LoadModel(const HttpRequestPtr& req,
auto func =
engines_[engine_type].dl->get_function<EngineI*()>("get_engine");
engines_[engine_type].engine = func();

auto& en = std::get<EngineI*>(engines_[engine_type].engine);
if (engine_type == kLlamaEngine) { //fix for llamacpp engine first
auto config = file_manager_utils::GetCortexConfig();
if (en->IsSupported("SetFileLogger")) {
en->SetFileLogger(config.maxLogLines, config.logFolderPath + "/" +
cortex_utils::logs_base_name);
} else {
LOG_WARN << "Method SetFileLogger is not supported yet";
}
}
LOG_INFO << "Loaded engine: " << engine_type;
}

LOG_TRACE << "Load model";
auto& en = std::get<EngineI*>(engines_[engine_type].engine);
if (engine_type == kLlamaEngine) { //fix for llamacpp engine first
auto config = file_manager_utils::GetCortexConfig();
if (en->IsSupported("SetFileLogger")) {
en->SetFileLogger(config.maxLogLines, config.logFolderPath + "/" +
cortex_utils::logs_base_name);
} else {
LOG_WARN << "Method SetFileLogger is not supported yet";
}
}
en->LoadModel(req->getJsonObject(), [cb = std::move(callback)](
Json::Value status, Json::Value res) {
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
Expand Down

0 comments on commit 3f7d3ec

Please sign in to comment.