Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: should check model status before start it #1277

Merged
merged 3 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 7 additions & 24 deletions engine/commands/chat_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
#include "httplib.h"

#include "cortex_upd_cmd.h"
#include "model_status_cmd.h"
#include "server_start_cmd.h"
#include "trantor/utils/Logger.h"
#include "utils/logging_utils.h"
#include "server_start_cmd.h"

namespace commands {
namespace {
Expand Down Expand Up @@ -45,29 +46,11 @@ void ChatCmd::Exec(std::string msg) {
}

auto address = host_ + ":" + std::to_string(port_);
// Check if model is loaded
// TODO(sang) only llamacpp support modelstatus for now
if (mc_.engine.find("llamacpp") != std::string::npos) {
httplib::Client cli(address);
nlohmann::json json_data;
json_data["model"] = mc_.name;
json_data["engine"] = mc_.engine;

auto data_str = json_data.dump();

// TODO: move this to another message?
auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
data_str.data(), data_str.size(), "application/json");
if (res) {
if (res->status != httplib::StatusCode::OK_200) {
CTL_ERR(res->body);
return;
}
} else {
auto err = res.error();
CTL_ERR("HTTP error: " << httplib::to_string(err));
return;
}
// Only check if llamacpp engine
if ((mc_.engine.find("llamacpp") != std::string::npos) &&
!commands::ModelStatusCmd().IsLoaded(host_, port_, mc_)) {
CLI_LOG("Model is not loaded yet!");
return;
}

// Some instruction for user here
Expand Down
1 change: 1 addition & 0 deletions engine/commands/cortex_upd_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ inline void CheckNewUpdate() {

httplib::Client cli(host_name);
cli.set_connection_timeout(kTimeoutCheckUpdate);
cli.set_read_timeout(kTimeoutCheckUpdate);
if (auto res = cli.Get(release_path)) {
if (res->status == httplib::StatusCode::OK_200) {
try {
Expand Down
7 changes: 7 additions & 0 deletions engine/commands/model_start_cmd.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "model_start_cmd.h"
#include "cortex_upd_cmd.h"
#include "httplib.h"
#include "model_status_cmd.h"
#include "nlohmann/json.hpp"
#include "server_start_cmd.h"
#include "trantor/utils/Logger.h"
Expand All @@ -19,6 +20,12 @@ bool ModelStartCmd::Exec() {
<< commands::GetCortexBinary() << " start` to start server!");
return false;
}
// Only check for llamacpp for now
if ((mc_.engine.find("llamacpp") != std::string::npos) &&
commands::ModelStatusCmd().IsLoaded(host_, port_, mc_)) {
CLI_LOG("Model has already been started!");
return true;
}

httplib::Client cli(host_ + ":" + std::to_string(port_));

Expand Down
31 changes: 31 additions & 0 deletions engine/commands/model_status_cmd.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "model_status_cmd.h"
#include "config/yaml_config.h"
#include "httplib.h"
#include "nlohmann/json.hpp"
#include "utils/logging_utils.h"

namespace commands {
bool ModelStatusCmd::IsLoaded(const std::string& host, int port,
const config::ModelConfig& mc) {
httplib::Client cli(host + ":" + std::to_string(port));
nlohmann::json json_data;
json_data["model"] = mc.name;
json_data["engine"] = mc.engine;

auto data_str = json_data.dump();

auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
data_str.data(), data_str.size(), "application/json");
if (res) {
if (res->status == httplib::StatusCode::OK_200) {
return true;
}
} else {
auto err = res.error();
CTL_WRN("HTTP error: " << httplib::to_string(err));
return false;
}

return false;
}
} // namespace commands
12 changes: 12 additions & 0 deletions engine/commands/model_status_cmd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once
#include <string>
#include "config/yaml_config.h"

namespace commands {

class ModelStatusCmd {
public:
bool IsLoaded(const std::string& host, int port,
const config::ModelConfig& mc);
};
} // namespace commands
17 changes: 12 additions & 5 deletions engine/commands/run_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "cmd_info.h"
#include "config/yaml_config.h"
#include "model_start_cmd.h"
#include "model_status_cmd.h"
#include "server_start_cmd.h"
#include "utils/file_manager_utils.h"

Expand Down Expand Up @@ -47,21 +48,27 @@ void RunCmd::Exec() {
}
}

// Start model
config::YamlHandler yaml_handler;
yaml_handler.ModelConfigFromFile(
file_manager_utils::GetModelsContainerPath().string() + "/" + model_file +
".yaml");
auto mc = yaml_handler.GetModelConfig();

// Always start model if not llamacpp
// If it is llamacpp, then check model status first
{
ModelStartCmd msc(host_, port_, yaml_handler.GetModelConfig());
if (!msc.Exec()) {
return;
if ((mc.engine.find("llamacpp") == std::string::npos) ||
!commands::ModelStatusCmd().IsLoaded(host_, port_, mc)) {
ModelStartCmd msc(host_, port_, mc);
if (!msc.Exec()) {
return;
}
}
}

// Chat
{
ChatCmd cc(host_, port_, yaml_handler.GetModelConfig());
ChatCmd cc(host_, port_, mc);
cc.Exec("");
}
}
Expand Down
20 changes: 11 additions & 9 deletions engine/controllers/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -342,20 +342,22 @@ void server::LoadModel(const HttpRequestPtr& req,
auto func =
engines_[engine_type].dl->get_function<EngineI*()>("get_engine");
engines_[engine_type].engine = func();

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only set logger for engine once, or it will crash if load/unload/load model

auto& en = std::get<EngineI*>(engines_[engine_type].engine);
if (engine_type == kLlamaEngine) { //fix for llamacpp engine first
auto config = file_manager_utils::GetCortexConfig();
if (en->IsSupported("SetFileLogger")) {
en->SetFileLogger(config.maxLogLines, config.logFolderPath + "/" +
cortex_utils::logs_base_name);
} else {
LOG_WARN << "Method SetFileLogger is not supported yet";
}
}
LOG_INFO << "Loaded engine: " << engine_type;
}

LOG_TRACE << "Load model";
auto& en = std::get<EngineI*>(engines_[engine_type].engine);
if (engine_type == kLlamaEngine) { //fix for llamacpp engine first
auto config = file_manager_utils::GetCortexConfig();
if (en->IsSupported("SetFileLogger")) {
en->SetFileLogger(config.maxLogLines, config.logFolderPath + "/" +
cortex_utils::logs_base_name);
} else {
LOG_WARN << "Method SetFileLogger is not supported yet";
}
}
en->LoadModel(req->getJsonObject(), [cb = std::move(callback)](
Json::Value status, Json::Value res) {
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
Expand Down
Loading