From 051e9d6ef325eb0db06697e88d2c79f32f7613de Mon Sep 17 00:00:00 2001
From: vansangpfiev <sang@jan.ai>
Date: Tue, 31 Dec 2024 10:59:23 +0700
Subject: [PATCH] chore: cleanup

---
 base/cortex-common/enginei.h   |  2 +
 examples/server/CMakeLists.txt |  1 +
 examples/server/server.cc      | 45 ++++++++++------
 src/llama_data.h               | 60 +++++++++++++++++++++
 src/llama_engine.cc            | 42 ++++++---------
 src/llama_engine.h             | 99 ++++++++++++----------------------
 6 files changed, 143 insertions(+), 106 deletions(-)
 create mode 100644 src/llama_data.h
diff --git a/base/cortex-common/enginei.h b/base/cortex-common/enginei.h
index 200808ed..11df283e 100644
--- a/base/cortex-common/enginei.h
+++ b/base/cortex-common/enginei.h
@@ -6,6 +6,7 @@
 #include <vector>
 
 #include "json/value.h"
+#include "trantor/utils/AsyncFileLogger.h"
 #include "trantor/utils/Logger.h"
 
 // Interface for inference engine.
@@ -22,6 +23,7 @@ class EngineI {
     std::filesystem::path log_path;
     int max_log_lines;
     trantor::Logger::LogLevel log_level;
+    trantor::AsyncFileLogger* logger;
   };
 
   struct EngineUnloadOption {
diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
index eac61127..1c743b3e 100644
--- a/examples/server/CMakeLists.txt
+++ b/examples/server/CMakeLists.txt
@@ -15,6 +15,7 @@ add_executable(${PROJECT_NAME}
     server.cc
     dylib.h
     httplib.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../src/file_logger.cc
 )
 
 set(THIRD_PARTY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../build_deps/_install)
diff --git a/examples/server/server.cc b/examples/server/server.cc
index 6531934c..94237312 100644
--- a/examples/server/server.cc
+++ b/examples/server/server.cc
@@ -8,14 +8,12 @@
 #include <condition_variable>
 #include <mutex>
 #include <queue>
-#include "trantor/utils/Logger.h"
+#include "../../src/file_logger.h"
+#include "../../src/llama_utils.h"
+
 class Server {
  public:
-  Server() {
-    dylib_ = std::make_unique<dylib>("./engines/cortex.llamacpp", "engine");
-    auto func = dylib_->get_function<EngineI*()>("get_engine");
-    engine_ = func();
-  }
+  Server() {}
 
   ~Server() {
     if (engine_) {
@@ -23,6 +21,19 @@ class Server {
     }
   }
 
+  void Initialize(trantor::AsyncFileLogger* logger) {
+    dylib_ = std::make_unique<dylib>("./engines/cortex.llamacpp", "engine");
+    auto func = dylib_->get_function<EngineI*()>("get_engine");
+    engine_ = func();
+    EngineI::EngineLoadOption opts;
+    opts.engine_path = llama_utils::GetExecutableFolderContainerPath() /
+                       "engines" / "cortex.llamacpp";
+    opts.log_path = "./logs/cortex.log";
+    opts.max_log_lines = 10000;
+    opts.logger = logger;
+    engine_->Load(opts);
+  }
+
   void ForceStopInferencing(const std::string& model_id) {
     if (engine_) {
       engine_->StopInferencing(model_id);
@@ -86,16 +97,16 @@ inline void signal_handler(int signal) {
 using SyncQueue = Server::SyncQueue;
 
 int main(int argc, char** argv) {
-  //  std::filesystem::create_directories("./logs");
-  // trantor::AsyncFileLogger asyncFileLogger;
-  // asyncFileLogger.setFileName("logs/cortex");
-  // asyncFileLogger.startLogging();
-  // trantor::Logger::setOutputFunction(
-  //     [&](const char* msg, const uint64_t len) {
-  //       asyncFileLogger.output(msg, len);
-  //     },
-  //     [&]() { asyncFileLogger.flush(); });
-  // asyncFileLogger.setFileSizeLimit(100000000);
+  std::filesystem::create_directories("./logs");
+  trantor::FileLogger async_file_logger;
+  async_file_logger.setFileName("logs/cortex.log");
+  async_file_logger.startLogging();
+  trantor::Logger::setOutputFunction(
+      [&](const char* msg, const uint64_t len) {
+        async_file_logger.output_(msg, len);
+      },
+      [&]() { async_file_logger.flush(); });
+  async_file_logger.setFileSizeLimit(100000000);
 
   std::string hostname = "127.0.0.1";
   int port = 3928;
@@ -109,6 +120,8 @@ int main(int argc, char** argv) {
   }
 
   Server server;
+
+  server.Initialize(&async_file_logger);
   //set logger here
   // server.engine_->SetFileLogger();
 
diff --git a/src/llama_data.h b/src/llama_data.h
new file mode 100644
index 00000000..7753cd21
--- /dev/null
+++ b/src/llama_data.h
@@ -0,0 +1,60 @@
+#pragma once
+#include "json/json.h"
+
+struct IsDone {
+  bool is_done;
+  int operator()() { return is_done; }
+};
+
+struct HasError {
+  bool has_error;
+  int operator()() { return has_error; }
+};
+
+struct IsStream {
+  bool is_stream;
+  int operator()() { return is_stream; }
+};
+
+struct StatusCode {
+  int status_code;
+  int operator()() { return status_code; }
+};
+
+struct ResStatus {
+ private:
+  IsDone is_done;
+  HasError has_error;
+  IsStream is_stream;
+  StatusCode status_code;
+
+ public:
+  ResStatus(IsDone is_done, HasError has_error, IsStream is_stream,
+            StatusCode status_code)
+      : is_done(is_done),
+        has_error(has_error),
+        is_stream(is_stream),
+        status_code(status_code) {}
+
+  Json::Value ToJson() {
+    Json::Value status;
+    status["is_done"] = is_done();
+    status["has_error"] = has_error();
+    status["is_stream"] = is_stream();
+    status["status_code"] = status_code();
+    return status;
+  };
+};
+
+struct ResStreamData {
+ private:
+  std::string s;
+
+ public:
+  ResStreamData(std::string s) : s(std::move(s)) {}
+  Json::Value ToJson() {
+    Json::Value d;
+    d["data"] = s;
+    return d;
+  }
+};
\ No newline at end of file
diff --git a/src/llama_engine.cc b/src/llama_engine.cc
index ba5bdf75..a20f13e0 100644
--- a/src/llama_engine.cc
+++ b/src/llama_engine.cc
@@ -333,7 +333,8 @@ Json::Value ParseJsonString(const std::string& json_str) {
 }  // namespace
 
 void LlamaEngine::Load(EngineLoadOption opts) {
-  LOG_INFO << "Loading engine..";
+  load_opt_ = opts;
+  LOG_DEBUG << "Loading engine..";
 
   LOG_DEBUG << "Is custom engine path: " << opts.is_custom_engine_path;
   LOG_DEBUG << "Engine path: " << opts.engine_path.string();
@@ -350,9 +351,6 @@ void LlamaEngine::Unload(EngineUnloadOption opts) {
 
 LlamaEngine::LlamaEngine(int log_option) {
   trantor::Logger::setLogLevel(trantor::Logger::kInfo);
-  if (log_option == kFileLoggerOption) {
-    async_file_logger_ = std::make_unique<trantor::FileLogger>();
-  }
 
   common_log_pause(common_log_main());
 
@@ -379,7 +377,6 @@ LlamaEngine::~LlamaEngine() {
     l.ReleaseResources();
   }
   server_map_.clear();
-  async_file_logger_.reset();
 
   LOG_INFO << "LlamaEngine destructed successfully";
 }
@@ -570,21 +567,17 @@ void LlamaEngine::StopInferencing(const std::string& model_id) {
 
 void LlamaEngine::SetFileLogger(int max_log_lines,
                                 const std::string& log_path) {
-  if (!async_file_logger_) {
-    async_file_logger_ = std::make_unique<trantor::FileLogger>();
-  }
-
-  async_file_logger_->setFileName(log_path);
-  async_file_logger_->setMaxLines(max_log_lines);  // Keep last 100000 lines
-  async_file_logger_->startLogging();
   trantor::Logger::setOutputFunction(
       [&](const char* msg, const uint64_t len) {
-        if (async_file_logger_)
-          async_file_logger_->output_(msg, len);
+        if (load_opt_.logger) {
+          if (auto l = static_cast<trantor::FileLogger*>(load_opt_.logger); l) {
+            l->output_(msg, len);
+          }
+        }
       },
       [&]() {
-        if (async_file_logger_)
-          async_file_logger_->flush();
+        if (load_opt_.logger)
+          load_opt_.logger->flush();
       });
   llama_log_set(
       [](ggml_log_level level, const char* text, void* user_data) {
@@ -601,8 +594,10 @@ void LlamaEngine::SetFileLogger(int max_log_lines,
         }
       },
       nullptr);
-  freopen(log_path.c_str(), "a", stderr);
-  freopen(log_path.c_str(), "a", stdout);
+  if (!freopen(log_path.c_str(), "a", stderr))
+    LOG_WARN << "Could not open stream for stderr";
+  if (!freopen(log_path.c_str(), "a", stdout))
+    LOG_WARN << "Could not open stream for stdout";
 }
 
 bool LlamaEngine::LoadModelImpl(std::shared_ptr<Json::Value> json_body) {
@@ -1388,11 +1383,8 @@ bool LlamaEngine::SpawnLlamaServer(const Json::Value& json_params) {
   params += " --host " + s.host + " --port " + std::to_string(s.port);
 
   std::string exe_w = "llama-server.exe";
-  std::string current_path_w =
-      (llama_utils::GetExecutableFolderContainerPath() / "engines" /
-       "cortex.llamacpp")
-          .string();
-  std::string wcmds = current_path_w + "/" + exe_w + " " + params;
+  std::string wcmds =
+      load_opt_.engine_path.string() + "/" + exe_w + " " + params;
   LOG_DEBUG << "wcmds: " << wcmds;
   std::vector<wchar_t> mutable_cmds(wcmds.begin(), wcmds.end());
   mutable_cmds.push_back(L'\0');
@@ -1432,9 +1424,7 @@ bool LlamaEngine::SpawnLlamaServer(const Json::Value& json_params) {
   } else if (s.pid == 0) {
     // Some engines requires to add lib search path before process being created
     std::string exe = "llama-server";
-    std::string p = (llama_utils::GetExecutableFolderContainerPath() /
-                     "engines" / "cortex.llamacpp" / exe)
-                        .string();
+    std::string p = (load_opt_.engine_path / exe).string();
     std::vector<std::string> params = ConvertJsonToParamsVector(json_params);
     params.push_back("--host");
     params.push_back(s.host);
diff --git a/src/llama_engine.h b/src/llama_engine.h
index 50629a20..a603ebfc 100644
--- a/src/llama_engine.h
+++ b/src/llama_engine.h
@@ -6,6 +6,7 @@
 #include "cortex-common/enginei.h"
 #include "file_logger.h"
 #include "llama.h"
+#include "llama_data.h"
 #include "llama_server_context.h"
 #include "trantor/utils/ConcurrentTaskQueue.h"
 #include "trantor/utils/Logger.h"
@@ -20,34 +21,53 @@ class LlamaEngine : public EngineI {
 
   ~LlamaEngine() final;
 
-  // #### Interface ####
+  // Load the engine with the specified options.
   void Load(EngineLoadOption opts) final;
 
+  // Unload the engine with the specified options.
   void Unload(EngineUnloadOption opts) final;
 
-  void HandleChatCompletion(std::shared_ptr<Json::Value> jsonBody,
+  // Handle a chat completion request with the provided JSON body and callback.
+  void HandleChatCompletion(std::shared_ptr<Json::Value> json_body,
                             http_callback&& callback) final;
-  void HandleEmbedding(std::shared_ptr<Json::Value> jsonBody,
+
+  // Handle an embedding request with the provided JSON body and callback.
+  void HandleEmbedding(std::shared_ptr<Json::Value> json_body,
                        http_callback&& callback) final;
-  void LoadModel(std::shared_ptr<Json::Value> jsonBody,
+
+  // Load a model with the provided JSON body and callback.
+  void LoadModel(std::shared_ptr<Json::Value> json_body,
                  http_callback&& callback) final;
-  void UnloadModel(std::shared_ptr<Json::Value> jsonBody,
+
+  // Unload a model with the provided JSON body and callback.
+  void UnloadModel(std::shared_ptr<Json::Value> json_body,
                    http_callback&& callback) final;
-  void GetModelStatus(std::shared_ptr<Json::Value> jsonBody,
+
+  // Get the status of a model with the provided JSON body and callback.
+  void GetModelStatus(std::shared_ptr<Json::Value> json_body,
                       http_callback&& callback) final;
-  void GetModels(std::shared_ptr<Json::Value> jsonBody,
+
+  // Get the list of available models with the provided JSON body and callback.
+  void GetModels(std::shared_ptr<Json::Value> json_body,
                  http_callback&& callback) final;
+
+  // Set the file logger with the maximum number of log lines and log file path.
   void SetFileLogger(int max_log_lines, const std::string& log_path) final;
+
+  // Set the log level for the engine.
   void SetLogLevel(trantor::Logger::LogLevel log_level =
                        trantor::Logger::LogLevel::kInfo) final;
+
+  // Stop the inferencing process for the specified model.
   void StopInferencing(const std::string& model_id) final;
 
+
  private:
-  bool LoadModelImpl(std::shared_ptr<Json::Value> jsonBody);
+  bool LoadModelImpl(std::shared_ptr<Json::Value> json_body);
   void HandleInferenceImpl(
       llama::inferences::ChatCompletionRequest&& completion,
       http_callback&& callback);
-  void HandleEmbeddingImpl(std::shared_ptr<Json::Value> jsonBody,
+  void HandleEmbeddingImpl(std::shared_ptr<Json::Value> json_body,
                            http_callback&& callback);
   bool CheckModelLoaded(http_callback& callback, const std::string& model_id);
   void WarmUpModel(const std::string& model_id);
@@ -65,13 +85,17 @@ class LlamaEngine : public EngineI {
                                     http_callback&& callback,
                                     const std::string& model);
 
+  // Handle an OpenAI chat completion request with the provided JSON body, callback, and model.
   void HandleOpenAiChatCompletion(std::shared_ptr<Json::Value> json_body,
                                   http_callback&& callback,
                                   const std::string& model);
+
+  // Handle a non-OpenAI chat completion request with the provided JSON body, callback, and model.
   void HandleNonOpenAiChatCompletion(std::shared_ptr<Json::Value> json_body,
                                      http_callback&& callback,
                                      const std::string& model);
 
+  // Handle a LLaMA C++ embedding request with the provided JSON body, callback, and model.
   bool HandleLlamaCppEmbedding(std::shared_ptr<Json::Value> json_body,
                                http_callback&& callback,
                                const std::string& model);
@@ -79,60 +103,6 @@ class LlamaEngine : public EngineI {
   bool IsLlamaServerModel(const std::string& model) const;
 
  private:
-  struct IsDone {
-    bool is_done;
-    int operator()() { return is_done; }
-  };
-  struct HasError {
-    bool has_error;
-    int operator()() { return has_error; }
-  };
-  struct IsStream {
-    bool is_stream;
-    int operator()() { return is_stream; }
-  };
-  struct StatusCode {
-    int status_code;
-    int operator()() { return status_code; }
-  };
-  struct ResStatus {
-   private:
-    IsDone is_done;
-    HasError has_error;
-    IsStream is_stream;
-    StatusCode status_code;
-
-   public:
-    ResStatus(IsDone is_done, HasError has_error, IsStream is_stream,
-              StatusCode status_code)
-        : is_done(is_done),
-          has_error(has_error),
-          is_stream(is_stream),
-          status_code(status_code) {}
-
-    Json::Value ToJson() {
-      Json::Value status;
-      status["is_done"] = is_done();
-      status["has_error"] = has_error();
-      status["is_stream"] = is_stream();
-      status["status_code"] = status_code();
-      return status;
-    };
-  };
-
-  struct ResStreamData {
-   private:
-    std::string s;
-
-   public:
-    ResStreamData(std::string s) : s(std::move(s)) {}
-    Json::Value ToJson() {
-      Json::Value d;
-      d["data"] = s;
-      return d;
-    }
-  };
-
   struct ServerInfo {
     LlamaServerContext ctx;
     std::unique_ptr<trantor::ConcurrentTaskQueue> q;
@@ -176,7 +146,8 @@ class LlamaEngine : public EngineI {
   std::atomic<int> no_of_chats_ = 0;
 
   bool print_version_ = true;
-  std::unique_ptr<trantor::FileLogger> async_file_logger_;
+
+  EngineLoadOption load_opt_;
 
 #if defined(_WIN32)
   std::vector<DLL_DIRECTORY_COOKIE> cookies_;