Skip to content

Commit

Permalink
Feat background process server (#1043)
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenhoangthuan99 authored Aug 29, 2024
1 parent ba6816f commit 05b4b2c
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 55 deletions.
5 changes: 4 additions & 1 deletion engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ endif()

add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}")

# add_subdirectory(test)
option(CMAKE_BUILD_TEST "Enable testing" OFF)
if(CMAKE_BUILD_TEST)
add_subdirectory(test)
endif()

find_package(jsoncpp CONFIG REQUIRED)
find_package(Drogon CONFIG REQUIRED)
Expand Down
6 changes: 6 additions & 0 deletions engine/commands/model_get_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "utils/cortex_utils.h"

namespace commands {

ModelGetCmd::ModelGetCmd(std::string model_handle)
: model_handle_(std::move(model_handle)) {}

Expand All @@ -17,6 +18,7 @@ void ModelGetCmd::Exec() {
// Iterate through directory
for (const auto& entry :
std::filesystem::directory_iterator(cortex_utils::models_folder)) {

if (entry.is_regular_file() && entry.path().stem() == model_handle_ &&
entry.path().extension() == ".yaml") {
try {
Expand Down Expand Up @@ -60,6 +62,7 @@ void ModelGetCmd::Exec() {
if (!std::isnan(static_cast<double>(model_config.max_tokens)))
std::cout << "max_tokens: " << model_config.max_tokens << "\n";
if (!std::isnan(static_cast<double>(model_config.stream)))

std::cout << "stream: " << std::boolalpha << model_config.stream
<< "\n";
if (!std::isnan(static_cast<double>(model_config.ngl)))
Expand All @@ -71,6 +74,7 @@ void ModelGetCmd::Exec() {
if (!model_config.engine.empty())
std::cout << "engine: " << model_config.engine << "\n";
if (!model_config.prompt_template.empty())

std::cout << "prompt_template: " << model_config.prompt_template
<< "\n";
if (!model_config.system_template.empty())
Expand All @@ -86,6 +90,7 @@ void ModelGetCmd::Exec() {
if (!model_config.gpu_arch.empty())
std::cout << "gpu_arch: " << model_config.gpu_arch << "\n";
if (!model_config.quantization_method.empty())

std::cout << "quantization_method: "
<< model_config.quantization_method << "\n";
if (!model_config.precision.empty())
Expand All @@ -96,6 +101,7 @@ void ModelGetCmd::Exec() {

// Print non-null strings
if (!model_config.trtllm_version.empty())

std::cout << "trtllm_version: " << model_config.trtllm_version
<< "\n";
if (!std::isnan(static_cast<double>(model_config.text_model)))
Expand Down
2 changes: 2 additions & 0 deletions engine/commands/model_get_cmd.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#pragma once


#include <cmath> // For std::isnan
#include <string>
namespace commands {

class ModelGetCmd {
public:

ModelGetCmd(std::string model_handle);
void Exec();

Expand Down
2 changes: 1 addition & 1 deletion engine/controllers/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#include "commands/engine_init_cmd.h"
#include "commands/model_list_cmd.h"
#include "commands/model_get_cmd.h"

#include "commands/model_pull_cmd.h"
#include "commands/start_model_cmd.h"
#include "commands/stop_model_cmd.h"
Expand Down Expand Up @@ -140,6 +139,7 @@ void CommandLineParser::EngineInstall(CLI::App* parent,
"install", "Install " + engine_name + " engine");
install_cmd->add_option("-v, --version", version,
"Engine version. Default will be latest");

install_cmd->callback([engine_name, &version] {
commands::EngineInitCmd eic(engine_name, version);
eic.Exec();
Expand Down
160 changes: 108 additions & 52 deletions engine/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
#if defined(__APPLE__) && defined(__MACH__)
#include <libgen.h> // for dirname()
#include <mach-o/dyld.h>
#include <sys/types.h>
#elif defined(__linux__)
#include <libgen.h> // for dirname()
#include <sys/types.h>
#include <unistd.h> // for readlink()
#elif defined(_WIN32)
#include <windows.h>
Expand All @@ -20,8 +22,104 @@
#error "Unsupported platform!"
#endif


void RunServer(){
// Create logs/ folder and setup log to file
std::filesystem::create_directory(cortex_utils::logs_folder);
trantor::AsyncFileLogger asyncFileLogger;
asyncFileLogger.setFileName(cortex_utils::logs_base_name);
asyncFileLogger.startLogging();
trantor::Logger::setOutputFunction(
[&](const char* msg, const uint64_t len) {
asyncFileLogger.output(msg, len);
},
[&]() { asyncFileLogger.flush(); });
asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
// Number of cortex.cpp threads
// if (argc > 1) {
// thread_num = std::atoi(argv[1]);
// }

// // Check for host argument
// if (argc > 2) {
// host = argv[2];
// }

// // Check for port argument
// if (argc > 3) {
// port = std::atoi(argv[3]); // Convert string argument to int
// }
int thread_num = 1;
std::string host = "127.0.0.1";
int port = 3928;

int logical_cores = std::thread::hardware_concurrency();
int drogon_thread_num = std::max(thread_num, logical_cores);
// cortex_utils::nitro_logo();
#ifdef CORTEX_CPP_VERSION
LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
#else
LOG_INFO << "cortex.cpp version: undefined";
#endif

LOG_INFO << "Server started, listening at: " << host << ":" << port;
LOG_INFO << "Please load your model";
drogon::app().addListener(host, port);
drogon::app().setThreadNum(drogon_thread_num);
LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();

drogon::app().run();
// return 0;
}

void ForkProcess() {
#if defined(_WIN32) || defined(_WIN64)
// Windows-specific code to create a new process
STARTUPINFO si;
PROCESS_INFORMATION pi;

ZeroMemory(&si, sizeof(si));
si.cb = sizeof(si);
ZeroMemory(&pi, sizeof(pi));
std::string cmds = cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server";
// Create child process
if (!CreateProcess(
NULL, // No module name (use command line)
const_cast<char*>(cmds.c_str()), // Command line (replace with your actual executable)
NULL, // Process handle not inheritable
NULL, // Thread handle not inheritable
FALSE, // Set handle inheritance to FALSE
0, // No creation flags
NULL, // Use parent's environment block
NULL, // Use parent's starting directory
&si, // Pointer to STARTUPINFO structure
&pi)) // Pointer to PROCESS_INFORMATION structure
{
std::cout << "Could not start server: " << GetLastError() << std::endl;
} else {
std::cout << "Server started" << std::endl;
}

#else
// Unix-like system-specific code to fork a child process
pid_t pid = fork();

if (pid < 0) {
// Fork failed
std::cerr << "Could not start server: " << std::endl;
return;
} else if (pid == 0) {
// Child process
RunServer();
} else {
// Parent process
std::cout << "Server started" << std::endl;
}
#endif
}

int main(int argc, char* argv[]) {

// Check if this process is for python execution
if (argc > 1) {
if (strcmp(argv[1], "--run_python_file") == 0) {
Expand All @@ -44,58 +142,16 @@ int main(int argc, char* argv[]) {
}

if (argc > 1) {
CommandLineParser clp;
clp.SetupCommand(argc, argv);
return 0;
}

// Create logs/ folder and setup log to file
std::filesystem::create_directory(cortex_utils::logs_folder);
trantor::AsyncFileLogger asyncFileLogger;
asyncFileLogger.setFileName(cortex_utils::logs_base_name);
asyncFileLogger.startLogging();
trantor::Logger::setOutputFunction(
[&](const char* msg, const uint64_t len) {
asyncFileLogger.output(msg, len);
},
[&]() { asyncFileLogger.flush(); });
asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);

int thread_num = 1;
std::string host = "127.0.0.1";
int port = 3928;

// Number of cortex.cpp threads
if (argc > 1) {
thread_num = std::atoi(argv[1]);
}

// Check for host argument
if (argc > 2) {
host = argv[2];
}

// Check for port argument
if (argc > 3) {
port = std::atoi(argv[3]); // Convert string argument to int
if (strcmp(argv[1], "--start-server") == 0) {
RunServer();
return 0;
} else {
CommandLineParser clp;
clp.SetupCommand(argc, argv);
return 0;
}
}

int logical_cores = std::thread::hardware_concurrency();
int drogon_thread_num = std::max(thread_num, logical_cores);
// cortex_utils::nitro_logo();
#ifdef CORTEX_CPP_VERSION
LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
#else
LOG_INFO << "cortex.cpp version: undefined";
#endif

LOG_INFO << "Server started, listening at: " << host << ":" << port;
LOG_INFO << "Please load your model";
drogon::app().addListener(host, port);
drogon::app().setThreadNum(drogon_thread_num);
LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();

drogon::app().run();

ForkProcess();
return 0;
}
2 changes: 1 addition & 1 deletion engine/test/components/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ add_executable(${PROJECT_NAME} ${SRCS})
find_package(Drogon CONFIG REQUIRED)
find_package(GTest CONFIG REQUIRED)

target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gmock
target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gtest_main
${CMAKE_THREAD_LIBS_INIT})
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)

Expand Down

0 comments on commit 05b4b2c

Please sign in to comment.