fix: should check model status before start it (#1277)

* fix: check model status before start * fix: read timeout for checking update * fix: only set logger for engine once
janhq · Sep 20, 2024 · 3f7d3ec · 3f7d3ec
1 parent d9c5b41
commit 3f7d3ec
Show file tree

Hide file tree

Showing 7 changed files with 81 additions and 38 deletions.
diff --git a/engine/commands/chat_cmd.cc b/engine/commands/chat_cmd.cc
@@ -2,9 +2,10 @@
 #include "httplib.h"
 
 #include "cortex_upd_cmd.h"
+#include "model_status_cmd.h"
+#include "server_start_cmd.h"
 #include "trantor/utils/Logger.h"
 #include "utils/logging_utils.h"
-#include "server_start_cmd.h"
 
 namespace commands {
 namespace {
@@ -45,29 +46,11 @@ void ChatCmd::Exec(std::string msg) {
   }
 
   auto address = host_ + ":" + std::to_string(port_);
-  // Check if model is loaded
-  // TODO(sang) only llamacpp support modelstatus for now
-  if (mc_.engine.find("llamacpp") != std::string::npos) {
-    httplib::Client cli(address);
-    nlohmann::json json_data;
-    json_data["model"] = mc_.name;
-    json_data["engine"] = mc_.engine;
-
-    auto data_str = json_data.dump();
-
-    // TODO: move this to another message?
-    auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
-                        data_str.data(), data_str.size(), "application/json");
-    if (res) {
-      if (res->status != httplib::StatusCode::OK_200) {
-        CTL_ERR(res->body);
-        return;
-      }
-    } else {
-      auto err = res.error();
-      CTL_ERR("HTTP error: " << httplib::to_string(err));
-      return;
-    }
+  // Only check if llamacpp engine
+  if ((mc_.engine.find("llamacpp") != std::string::npos) &&
+      !commands::ModelStatusCmd().IsLoaded(host_, port_, mc_)) {
+    CLI_LOG("Model is not loaded yet!");
+    return;
   }
 
   // Some instruction for user here

diff --git a/engine/commands/cortex_upd_cmd.h b/engine/commands/cortex_upd_cmd.h
@@ -70,6 +70,7 @@ inline void CheckNewUpdate() {
 
   httplib::Client cli(host_name);
   cli.set_connection_timeout(kTimeoutCheckUpdate);
+  cli.set_read_timeout(kTimeoutCheckUpdate);
   if (auto res = cli.Get(release_path)) {
     if (res->status == httplib::StatusCode::OK_200) {
       try {

diff --git a/engine/commands/model_start_cmd.cc b/engine/commands/model_start_cmd.cc
@@ -1,6 +1,7 @@
 #include "model_start_cmd.h"
 #include "cortex_upd_cmd.h"
 #include "httplib.h"
+#include "model_status_cmd.h"
 #include "nlohmann/json.hpp"
 #include "server_start_cmd.h"
 #include "trantor/utils/Logger.h"
@@ -19,6 +20,12 @@ bool ModelStartCmd::Exec() {
             << commands::GetCortexBinary() << " start` to start server!");
     return false;
   }
+  // Only check for llamacpp for now
+  if ((mc_.engine.find("llamacpp") != std::string::npos) &&
+      commands::ModelStatusCmd().IsLoaded(host_, port_, mc_)) {
+    CLI_LOG("Model has already been started!");
+    return true;
+  }
 
   httplib::Client cli(host_ + ":" + std::to_string(port_));
 

diff --git a/engine/commands/model_status_cmd.cc b/engine/commands/model_status_cmd.cc
@@ -0,0 +1,31 @@
+#include "model_status_cmd.h"
+#include "config/yaml_config.h"
+#include "httplib.h"
+#include "nlohmann/json.hpp"
+#include "utils/logging_utils.h"
+
+namespace commands {
+bool ModelStatusCmd::IsLoaded(const std::string& host, int port,
+                              const config::ModelConfig& mc) {
+  httplib::Client cli(host + ":" + std::to_string(port));
+  nlohmann::json json_data;
+  json_data["model"] = mc.name;
+  json_data["engine"] = mc.engine;
+
+  auto data_str = json_data.dump();
+
+  auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
+                      data_str.data(), data_str.size(), "application/json");
+  if (res) {
+    if (res->status == httplib::StatusCode::OK_200) {
+      return true;
+    }
+  } else {
+    auto err = res.error();
+    CTL_WRN("HTTP error: " << httplib::to_string(err));
+    return false;
+  }
+
+  return false;
+}
+}  // namespace commands
diff --git a/engine/commands/model_status_cmd.h b/engine/commands/model_status_cmd.h
@@ -0,0 +1,12 @@
+#pragma once
+#include <string>
+#include "config/yaml_config.h"
+
+namespace commands {
+
+class ModelStatusCmd {
+ public:
+  bool IsLoaded(const std::string& host, int port,
+                const config::ModelConfig& mc);
+};
+}  // namespace commands
diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc
@@ -3,6 +3,7 @@
 #include "cmd_info.h"
 #include "config/yaml_config.h"
 #include "model_start_cmd.h"
+#include "model_status_cmd.h"
 #include "server_start_cmd.h"
 #include "utils/file_manager_utils.h"
 
@@ -47,21 +48,27 @@ void RunCmd::Exec() {
     }
   }
 
-  // Start model
   config::YamlHandler yaml_handler;
   yaml_handler.ModelConfigFromFile(
       file_manager_utils::GetModelsContainerPath().string() + "/" + model_file +
       ".yaml");
+  auto mc = yaml_handler.GetModelConfig();
+
+  // Always start model if not llamacpp
+  // If it is llamacpp, then check model status first
   {
-    ModelStartCmd msc(host_, port_, yaml_handler.GetModelConfig());
-    if (!msc.Exec()) {
-      return;
+    if ((mc.engine.find("llamacpp") == std::string::npos) ||
+        !commands::ModelStatusCmd().IsLoaded(host_, port_, mc)) {
+      ModelStartCmd msc(host_, port_, mc);
+      if (!msc.Exec()) {
+        return;
+      }
     }
   }
 
   // Chat
   {
-    ChatCmd cc(host_, port_, yaml_handler.GetModelConfig());
+    ChatCmd cc(host_, port_, mc);
     cc.Exec("");
   }
 }

diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc
@@ -342,20 +342,22 @@ void server::LoadModel(const HttpRequestPtr& req,
     auto func =
         engines_[engine_type].dl->get_function<EngineI*()>("get_engine");
     engines_[engine_type].engine = func();
+
+    auto& en = std::get<EngineI*>(engines_[engine_type].engine);
+    if (engine_type == kLlamaEngine) {  //fix for llamacpp engine first
+      auto config = file_manager_utils::GetCortexConfig();
+      if (en->IsSupported("SetFileLogger")) {
+        en->SetFileLogger(config.maxLogLines, config.logFolderPath + "/" +
+                                                  cortex_utils::logs_base_name);
+      } else {
+        LOG_WARN << "Method SetFileLogger is not supported yet";
+      }
+    }
     LOG_INFO << "Loaded engine: " << engine_type;
   }
 
   LOG_TRACE << "Load model";
   auto& en = std::get<EngineI*>(engines_[engine_type].engine);
-  if (engine_type == kLlamaEngine) {  //fix for llamacpp engine first
-    auto config = file_manager_utils::GetCortexConfig();
-    if (en->IsSupported("SetFileLogger")) {
-      en->SetFileLogger(config.maxLogLines, config.logFolderPath + "/" +
-                                                cortex_utils::logs_base_name);
-    } else {
-      LOG_WARN << "Method SetFileLogger is not supported yet";
-    }
-  }
   en->LoadModel(req->getJsonObject(), [cb = std::move(callback)](
                                           Json::Value status, Json::Value res) {
     auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);