Feat background process server (#1043)

janhq · Aug 29, 2024 · 05b4b2c · 05b4b2c
1 parent ba6816f
commit 05b4b2c
Show file tree

Hide file tree

Showing 6 changed files with 122 additions and 55 deletions.
diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
@@ -81,7 +81,10 @@ endif()
 
 add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}")
 
-# add_subdirectory(test)
+option(CMAKE_BUILD_TEST "Enable testing" OFF)
+if(CMAKE_BUILD_TEST)
+  add_subdirectory(test)
+endif()
 
 find_package(jsoncpp CONFIG REQUIRED)
 find_package(Drogon CONFIG REQUIRED)

diff --git a/engine/commands/model_get_cmd.cc b/engine/commands/model_get_cmd.cc
@@ -7,6 +7,7 @@
 #include "utils/cortex_utils.h"
 
 namespace commands {
+
 ModelGetCmd::ModelGetCmd(std::string model_handle)
     : model_handle_(std::move(model_handle)) {}
 
@@ -17,6 +18,7 @@ void ModelGetCmd::Exec() {
     // Iterate through directory
     for (const auto& entry :
          std::filesystem::directory_iterator(cortex_utils::models_folder)) {
+
       if (entry.is_regular_file() && entry.path().stem() == model_handle_ &&
           entry.path().extension() == ".yaml") {
         try {
@@ -60,6 +62,7 @@ void ModelGetCmd::Exec() {
           if (!std::isnan(static_cast<double>(model_config.max_tokens)))
             std::cout << "max_tokens: " << model_config.max_tokens << "\n";
           if (!std::isnan(static_cast<double>(model_config.stream)))
+
             std::cout << "stream: " << std::boolalpha << model_config.stream
                       << "\n";
           if (!std::isnan(static_cast<double>(model_config.ngl)))
@@ -71,6 +74,7 @@ void ModelGetCmd::Exec() {
           if (!model_config.engine.empty())
             std::cout << "engine: " << model_config.engine << "\n";
           if (!model_config.prompt_template.empty())
+
             std::cout << "prompt_template: " << model_config.prompt_template
                       << "\n";
           if (!model_config.system_template.empty())
@@ -86,6 +90,7 @@ void ModelGetCmd::Exec() {
           if (!model_config.gpu_arch.empty())
             std::cout << "gpu_arch: " << model_config.gpu_arch << "\n";
           if (!model_config.quantization_method.empty())
+
             std::cout << "quantization_method: "
                       << model_config.quantization_method << "\n";
           if (!model_config.precision.empty())
@@ -96,6 +101,7 @@ void ModelGetCmd::Exec() {
 
           // Print non-null strings
           if (!model_config.trtllm_version.empty())
+
             std::cout << "trtllm_version: " << model_config.trtllm_version
                       << "\n";
           if (!std::isnan(static_cast<double>(model_config.text_model)))

diff --git a/engine/commands/model_get_cmd.h b/engine/commands/model_get_cmd.h
@@ -1,11 +1,13 @@
 #pragma once
 
+
 #include <cmath>  // For std::isnan
 #include <string>
 namespace commands {
 
 class ModelGetCmd {
  public:
+
   ModelGetCmd(std::string model_handle);
   void Exec();
 

diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
@@ -2,7 +2,6 @@
 #include "commands/engine_init_cmd.h"
 #include "commands/model_list_cmd.h"
 #include "commands/model_get_cmd.h"
-
 #include "commands/model_pull_cmd.h"
 #include "commands/start_model_cmd.h"
 #include "commands/stop_model_cmd.h"
@@ -140,6 +139,7 @@ void CommandLineParser::EngineInstall(CLI::App* parent,
       "install", "Install " + engine_name + " engine");
   install_cmd->add_option("-v, --version", version,
                           "Engine version. Default will be latest");
+
   install_cmd->callback([engine_name, &version] {
     commands::EngineInitCmd eic(engine_name, version);
     eic.Exec();

diff --git a/engine/main.cc b/engine/main.cc
@@ -10,8 +10,10 @@
 #if defined(__APPLE__) && defined(__MACH__)
 #include <libgen.h>  // for dirname()
 #include <mach-o/dyld.h>
+#include <sys/types.h>
 #elif defined(__linux__)
 #include <libgen.h>  // for dirname()
+#include <sys/types.h>
 #include <unistd.h>  // for readlink()
 #elif defined(_WIN32)
 #include <windows.h>
@@ -20,8 +22,104 @@
 #error "Unsupported platform!"
 #endif
 
+
+void RunServer(){
+  // Create logs/ folder and setup log to file
+      std::filesystem::create_directory(cortex_utils::logs_folder);
+      trantor::AsyncFileLogger asyncFileLogger;
+      asyncFileLogger.setFileName(cortex_utils::logs_base_name);
+      asyncFileLogger.startLogging();
+      trantor::Logger::setOutputFunction(
+          [&](const char* msg, const uint64_t len) {
+            asyncFileLogger.output(msg, len);
+          },
+          [&]() { asyncFileLogger.flush(); });
+      asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
+      // Number of cortex.cpp threads
+      // if (argc > 1) {
+      //   thread_num = std::atoi(argv[1]);
+      // }
+
+      // // Check for host argument
+      // if (argc > 2) {
+      //   host = argv[2];
+      // }
+
+      // // Check for port argument
+      // if (argc > 3) {
+      //   port = std::atoi(argv[3]);  // Convert string argument to int
+      // }
+      int thread_num = 1;
+      std::string host = "127.0.0.1";
+      int port = 3928;
+
+      int logical_cores = std::thread::hardware_concurrency();
+      int drogon_thread_num = std::max(thread_num, logical_cores);
+      // cortex_utils::nitro_logo();
+#ifdef CORTEX_CPP_VERSION
+      LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
+#else
+      LOG_INFO << "cortex.cpp version: undefined";
+#endif
+
+      LOG_INFO << "Server started, listening at: " << host << ":" << port;
+      LOG_INFO << "Please load your model";
+      drogon::app().addListener(host, port);
+      drogon::app().setThreadNum(drogon_thread_num);
+      LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
+
+      drogon::app().run();
+      // return 0;
+}
+
+void ForkProcess() {
+#if defined(_WIN32) || defined(_WIN64)
+  // Windows-specific code to create a new process
+  STARTUPINFO si;
+  PROCESS_INFORMATION pi;
+
+  ZeroMemory(&si, sizeof(si));
+  si.cb = sizeof(si);
+  ZeroMemory(&pi, sizeof(pi));
+  std::string cmds = cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server";
+  // Create child process
+  if (!CreateProcess(
+          NULL,  // No module name (use command line)
+          const_cast<char*>(cmds.c_str()),  // Command line (replace with your actual executable)
+          NULL,                   // Process handle not inheritable
+          NULL,                   // Thread handle not inheritable
+          FALSE,                  // Set handle inheritance to FALSE
+          0,                      // No creation flags
+          NULL,                   // Use parent's environment block
+          NULL,                   // Use parent's starting directory
+          &si,                    // Pointer to STARTUPINFO structure
+          &pi))                   // Pointer to PROCESS_INFORMATION structure
+  {
+    std::cout << "Could not start server: " << GetLastError() << std::endl;
+  } else {
+    std::cout << "Server started" << std::endl;
+  }
+
+#else
+  // Unix-like system-specific code to fork a child process
+  pid_t pid = fork();
+
+  if (pid < 0) {
+    // Fork failed
+    std::cerr << "Could not start server: " << std::endl;
+    return;
+  } else if (pid == 0) {
+    // Child process
+    RunServer();
+  } else {
+    // Parent process
+    std::cout << "Server started" << std::endl;
+  }
+#endif
+}
+
 int main(int argc, char* argv[]) {
-  
+
   // Check if this process is for python execution
   if (argc > 1) {
     if (strcmp(argv[1], "--run_python_file") == 0) {
@@ -44,58 +142,16 @@ int main(int argc, char* argv[]) {
   }
 
   if (argc > 1) {
-    CommandLineParser clp;
-    clp.SetupCommand(argc, argv);
-    return 0;
-  }
-
-  // Create logs/ folder and setup log to file
-  std::filesystem::create_directory(cortex_utils::logs_folder);
-  trantor::AsyncFileLogger asyncFileLogger;
-  asyncFileLogger.setFileName(cortex_utils::logs_base_name);
-  asyncFileLogger.startLogging();
-  trantor::Logger::setOutputFunction(
-      [&](const char* msg, const uint64_t len) {
-        asyncFileLogger.output(msg, len);
-      },
-      [&]() { asyncFileLogger.flush(); });
-  asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
-
-  int thread_num = 1;
-  std::string host = "127.0.0.1";
-  int port = 3928;
-
-  // Number of cortex.cpp threads
-  if (argc > 1) {
-    thread_num = std::atoi(argv[1]);
-  }
-
-  // Check for host argument
-  if (argc > 2) {
-    host = argv[2];
-  }
-
-  // Check for port argument
-  if (argc > 3) {
-    port = std::atoi(argv[3]);  // Convert string argument to int
+    if (strcmp(argv[1], "--start-server") == 0) {
+      RunServer();
+      return 0;
+    } else {
+      CommandLineParser clp;
+      clp.SetupCommand(argc, argv);
+      return 0;
+    }
   }
 
-  int logical_cores = std::thread::hardware_concurrency();
-  int drogon_thread_num = std::max(thread_num, logical_cores);
-  // cortex_utils::nitro_logo();
-#ifdef CORTEX_CPP_VERSION
-  LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
-#else
-  LOG_INFO << "cortex.cpp version: undefined";
-#endif
-
-  LOG_INFO << "Server started, listening at: " << host << ":" << port;
-  LOG_INFO << "Please load your model";
-  drogon::app().addListener(host, port);
-  drogon::app().setThreadNum(drogon_thread_num);
-  LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
-
-  drogon::app().run();
-
+  ForkProcess();
   return 0;
 }
diff --git a/engine/test/components/CMakeLists.txt b/engine/test/components/CMakeLists.txt
@@ -8,7 +8,7 @@ add_executable(${PROJECT_NAME} ${SRCS})
 find_package(Drogon CONFIG REQUIRED)
 find_package(GTest CONFIG REQUIRED)
 
-target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gmock 
+target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gtest_main 
                                               ${CMAKE_THREAD_LIBS_INIT})
 target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)