chore: cleanup

janhq · Dec 31, 2024 · 051e9d6 · 051e9d6
1 parent b4db561
commit 051e9d6
Show file tree

Hide file tree

Showing 6 changed files with 143 additions and 106 deletions.
diff --git a/base/cortex-common/enginei.h b/base/cortex-common/enginei.h
@@ -6,6 +6,7 @@
 #include <vector>
 
 #include "json/value.h"
+#include "trantor/utils/AsyncFileLogger.h"
 #include "trantor/utils/Logger.h"
 
 // Interface for inference engine.
@@ -22,6 +23,7 @@ class EngineI {
     std::filesystem::path log_path;
     int max_log_lines;
     trantor::Logger::LogLevel log_level;
+    trantor::AsyncFileLogger* logger;
   };
 
   struct EngineUnloadOption {

diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
@@ -15,6 +15,7 @@ add_executable(${PROJECT_NAME}
     server.cc
     dylib.h
     httplib.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../src/file_logger.cc
 )
 
 set(THIRD_PARTY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../build_deps/_install)

diff --git a/examples/server/server.cc b/examples/server/server.cc
@@ -8,21 +8,32 @@
 #include <condition_variable>
 #include <mutex>
 #include <queue>
-#include "trantor/utils/Logger.h"
+#include "../../src/file_logger.h"
+#include "../../src/llama_utils.h"
+
 class Server {
  public:
-  Server() {
-    dylib_ = std::make_unique<dylib>("./engines/cortex.llamacpp", "engine");
-    auto func = dylib_->get_function<EngineI*()>("get_engine");
-    engine_ = func();
-  }
+  Server() {}
 
   ~Server() {
     if (engine_) {
       delete engine_;
     }
   }
 
+  void Initialize(trantor::AsyncFileLogger* logger) {
+    dylib_ = std::make_unique<dylib>("./engines/cortex.llamacpp", "engine");
+    auto func = dylib_->get_function<EngineI*()>("get_engine");
+    engine_ = func();
+    EngineI::EngineLoadOption opts;
+    opts.engine_path = llama_utils::GetExecutableFolderContainerPath() /
+                       "engines" / "cortex.llamacpp";
+    opts.log_path = "./logs/cortex.log";
+    opts.max_log_lines = 10000;
+    opts.logger = logger;
+    engine_->Load(opts);
+  }
+
   void ForceStopInferencing(const std::string& model_id) {
     if (engine_) {
       engine_->StopInferencing(model_id);
@@ -86,16 +97,16 @@ inline void signal_handler(int signal) {
 using SyncQueue = Server::SyncQueue;
 
 int main(int argc, char** argv) {
-  //  std::filesystem::create_directories("./logs");
-  // trantor::AsyncFileLogger asyncFileLogger;
-  // asyncFileLogger.setFileName("logs/cortex");
-  // asyncFileLogger.startLogging();
-  // trantor::Logger::setOutputFunction(
-  //     [&](const char* msg, const uint64_t len) {
-  //       asyncFileLogger.output(msg, len);
-  //     },
-  //     [&]() { asyncFileLogger.flush(); });
-  // asyncFileLogger.setFileSizeLimit(100000000);
+  std::filesystem::create_directories("./logs");
+  trantor::FileLogger async_file_logger;
+  async_file_logger.setFileName("logs/cortex.log");
+  async_file_logger.startLogging();
+  trantor::Logger::setOutputFunction(
+      [&](const char* msg, const uint64_t len) {
+        async_file_logger.output_(msg, len);
+      },
+      [&]() { async_file_logger.flush(); });
+  async_file_logger.setFileSizeLimit(100000000);
 
   std::string hostname = "127.0.0.1";
   int port = 3928;
@@ -109,6 +120,8 @@ int main(int argc, char** argv) {
   }
 
   Server server;
+
+  server.Initialize(&async_file_logger);
   //set logger here
   // server.engine_->SetFileLogger();
 

diff --git a/src/llama_data.h b/src/llama_data.h
@@ -0,0 +1,60 @@
+#pragma once
+#include "json/json.h"
+
+struct IsDone {
+  bool is_done;
+  int operator()() { return is_done; }
+};
+
+struct HasError {
+  bool has_error;
+  int operator()() { return has_error; }
+};
+
+struct IsStream {
+  bool is_stream;
+  int operator()() { return is_stream; }
+};
+
+struct StatusCode {
+  int status_code;
+  int operator()() { return status_code; }
+};
+
+struct ResStatus {
+ private:
+  IsDone is_done;
+  HasError has_error;
+  IsStream is_stream;
+  StatusCode status_code;
+
+ public:
+  ResStatus(IsDone is_done, HasError has_error, IsStream is_stream,
+            StatusCode status_code)
+      : is_done(is_done),
+        has_error(has_error),
+        is_stream(is_stream),
+        status_code(status_code) {}
+
+  Json::Value ToJson() {
+    Json::Value status;
+    status["is_done"] = is_done();
+    status["has_error"] = has_error();
+    status["is_stream"] = is_stream();
+    status["status_code"] = status_code();
+    return status;
+  };
+};
+
+struct ResStreamData {
+ private:
+  std::string s;
+
+ public:
+  ResStreamData(std::string s) : s(std::move(s)) {}
+  Json::Value ToJson() {
+    Json::Value d;
+    d["data"] = s;
+    return d;
+  }
+};
diff --git a/src/llama_engine.cc b/src/llama_engine.cc
@@ -333,7 +333,8 @@ Json::Value ParseJsonString(const std::string& json_str) {
 }  // namespace
 
 void LlamaEngine::Load(EngineLoadOption opts) {
-  LOG_INFO << "Loading engine..";
+  load_opt_ = opts;
+  LOG_DEBUG << "Loading engine..";
 
   LOG_DEBUG << "Is custom engine path: " << opts.is_custom_engine_path;
   LOG_DEBUG << "Engine path: " << opts.engine_path.string();
@@ -350,9 +351,6 @@ void LlamaEngine::Unload(EngineUnloadOption opts) {
 
 LlamaEngine::LlamaEngine(int log_option) {
   trantor::Logger::setLogLevel(trantor::Logger::kInfo);
-  if (log_option == kFileLoggerOption) {
-    async_file_logger_ = std::make_unique<trantor::FileLogger>();
-  }
 
   common_log_pause(common_log_main());
 
@@ -379,7 +377,6 @@ LlamaEngine::~LlamaEngine() {
     l.ReleaseResources();
   }
   server_map_.clear();
-  async_file_logger_.reset();
 
   LOG_INFO << "LlamaEngine destructed successfully";
 }
@@ -570,21 +567,17 @@ void LlamaEngine::StopInferencing(const std::string& model_id) {
 
 void LlamaEngine::SetFileLogger(int max_log_lines,
                                 const std::string& log_path) {
-  if (!async_file_logger_) {
-    async_file_logger_ = std::make_unique<trantor::FileLogger>();
-  }
-
-  async_file_logger_->setFileName(log_path);
-  async_file_logger_->setMaxLines(max_log_lines);  // Keep last 100000 lines
-  async_file_logger_->startLogging();
   trantor::Logger::setOutputFunction(
       [&](const char* msg, const uint64_t len) {
-        if (async_file_logger_)
-          async_file_logger_->output_(msg, len);
+        if (load_opt_.logger) {
+          if (auto l = static_cast<trantor::FileLogger*>(load_opt_.logger); l) {
+            l->output_(msg, len);
+          }
+        }
       },
       [&]() {
-        if (async_file_logger_)
-          async_file_logger_->flush();
+        if (load_opt_.logger)
+          load_opt_.logger->flush();
       });
   llama_log_set(
       [](ggml_log_level level, const char* text, void* user_data) {
@@ -601,8 +594,10 @@ void LlamaEngine::SetFileLogger(int max_log_lines,
         }
       },
       nullptr);
-  freopen(log_path.c_str(), "a", stderr);
-  freopen(log_path.c_str(), "a", stdout);
+  if (!freopen(log_path.c_str(), "a", stderr))
+    LOG_WARN << "Could not open stream for stderr";
+  if (!freopen(log_path.c_str(), "a", stdout))
+    LOG_WARN << "Could not open stream for stdout";
 }
 
 bool LlamaEngine::LoadModelImpl(std::shared_ptr<Json::Value> json_body) {
@@ -1388,11 +1383,8 @@ bool LlamaEngine::SpawnLlamaServer(const Json::Value& json_params) {
   params += " --host " + s.host + " --port " + std::to_string(s.port);
 
   std::string exe_w = "llama-server.exe";
-  std::string current_path_w =
-      (llama_utils::GetExecutableFolderContainerPath() / "engines" /
-       "cortex.llamacpp")
-          .string();
-  std::string wcmds = current_path_w + "/" + exe_w + " " + params;
+  std::string wcmds =
+      load_opt_.engine_path.string() + "/" + exe_w + " " + params;
   LOG_DEBUG << "wcmds: " << wcmds;
   std::vector<wchar_t> mutable_cmds(wcmds.begin(), wcmds.end());
   mutable_cmds.push_back(L'\0');
@@ -1432,9 +1424,7 @@ bool LlamaEngine::SpawnLlamaServer(const Json::Value& json_params) {
   } else if (s.pid == 0) {
     // Some engines requires to add lib search path before process being created
     std::string exe = "llama-server";
-    std::string p = (llama_utils::GetExecutableFolderContainerPath() /
-                     "engines" / "cortex.llamacpp" / exe)
-                        .string();
+    std::string p = (load_opt_.engine_path / exe).string();
     std::vector<std::string> params = ConvertJsonToParamsVector(json_params);
     params.push_back("--host");
     params.push_back(s.host);