From 051e9d6ef325eb0db06697e88d2c79f32f7613de Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 31 Dec 2024 10:59:23 +0700 Subject: [PATCH] chore: cleanup --- base/cortex-common/enginei.h | 2 + examples/server/CMakeLists.txt | 1 + examples/server/server.cc | 45 ++++++++++------ src/llama_data.h | 60 +++++++++++++++++++++ src/llama_engine.cc | 42 ++++++--------- src/llama_engine.h | 99 ++++++++++++---------------------- 6 files changed, 143 insertions(+), 106 deletions(-) create mode 100644 src/llama_data.h diff --git a/base/cortex-common/enginei.h b/base/cortex-common/enginei.h index 200808ed..11df283e 100644 --- a/base/cortex-common/enginei.h +++ b/base/cortex-common/enginei.h @@ -6,6 +6,7 @@ #include #include "json/value.h" +#include "trantor/utils/AsyncFileLogger.h" #include "trantor/utils/Logger.h" // Interface for inference engine. @@ -22,6 +23,7 @@ class EngineI { std::filesystem::path log_path; int max_log_lines; trantor::Logger::LogLevel log_level; + trantor::AsyncFileLogger* logger; }; struct EngineUnloadOption { diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index eac61127..1c743b3e 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -15,6 +15,7 @@ add_executable(${PROJECT_NAME} server.cc dylib.h httplib.h + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/file_logger.cc ) set(THIRD_PARTY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../build_deps/_install) diff --git a/examples/server/server.cc b/examples/server/server.cc index 6531934c..94237312 100644 --- a/examples/server/server.cc +++ b/examples/server/server.cc @@ -8,14 +8,12 @@ #include #include #include -#include "trantor/utils/Logger.h" +#include "../../src/file_logger.h" +#include "../../src/llama_utils.h" + class Server { public: - Server() { - dylib_ = std::make_unique("./engines/cortex.llamacpp", "engine"); - auto func = dylib_->get_function("get_engine"); - engine_ = func(); - } + Server() {} ~Server() { if (engine_) { @@ -23,6 +21,19 @@ class Server { } } + void Initialize(trantor::AsyncFileLogger* logger) { + dylib_ = std::make_unique("./engines/cortex.llamacpp", "engine"); + auto func = dylib_->get_function("get_engine"); + engine_ = func(); + EngineI::EngineLoadOption opts; + opts.engine_path = llama_utils::GetExecutableFolderContainerPath() / + "engines" / "cortex.llamacpp"; + opts.log_path = "./logs/cortex.log"; + opts.max_log_lines = 10000; + opts.logger = logger; + engine_->Load(opts); + } + void ForceStopInferencing(const std::string& model_id) { if (engine_) { engine_->StopInferencing(model_id); @@ -86,16 +97,16 @@ inline void signal_handler(int signal) { using SyncQueue = Server::SyncQueue; int main(int argc, char** argv) { - // std::filesystem::create_directories("./logs"); - // trantor::AsyncFileLogger asyncFileLogger; - // asyncFileLogger.setFileName("logs/cortex"); - // asyncFileLogger.startLogging(); - // trantor::Logger::setOutputFunction( - // [&](const char* msg, const uint64_t len) { - // asyncFileLogger.output(msg, len); - // }, - // [&]() { asyncFileLogger.flush(); }); - // asyncFileLogger.setFileSizeLimit(100000000); + std::filesystem::create_directories("./logs"); + trantor::FileLogger async_file_logger; + async_file_logger.setFileName("logs/cortex.log"); + async_file_logger.startLogging(); + trantor::Logger::setOutputFunction( + [&](const char* msg, const uint64_t len) { + async_file_logger.output_(msg, len); + }, + [&]() { async_file_logger.flush(); }); + async_file_logger.setFileSizeLimit(100000000); std::string hostname = "127.0.0.1"; int port = 3928; @@ -109,6 +120,8 @@ int main(int argc, char** argv) { } Server server; + + server.Initialize(&async_file_logger); //set logger here // server.engine_->SetFileLogger(); diff --git a/src/llama_data.h b/src/llama_data.h new file mode 100644 index 00000000..7753cd21 --- /dev/null +++ b/src/llama_data.h @@ -0,0 +1,60 @@ +#pragma once +#include "json/json.h" + +struct IsDone { + bool is_done; + int operator()() { return is_done; } +}; + +struct HasError { + bool has_error; + int operator()() { return has_error; } +}; + +struct IsStream { + bool is_stream; + int operator()() { return is_stream; } +}; + +struct StatusCode { + int status_code; + int operator()() { return status_code; } +}; + +struct ResStatus { + private: + IsDone is_done; + HasError has_error; + IsStream is_stream; + StatusCode status_code; + + public: + ResStatus(IsDone is_done, HasError has_error, IsStream is_stream, + StatusCode status_code) + : is_done(is_done), + has_error(has_error), + is_stream(is_stream), + status_code(status_code) {} + + Json::Value ToJson() { + Json::Value status; + status["is_done"] = is_done(); + status["has_error"] = has_error(); + status["is_stream"] = is_stream(); + status["status_code"] = status_code(); + return status; + }; +}; + +struct ResStreamData { + private: + std::string s; + + public: + ResStreamData(std::string s) : s(std::move(s)) {} + Json::Value ToJson() { + Json::Value d; + d["data"] = s; + return d; + } +}; \ No newline at end of file diff --git a/src/llama_engine.cc b/src/llama_engine.cc index ba5bdf75..a20f13e0 100644 --- a/src/llama_engine.cc +++ b/src/llama_engine.cc @@ -333,7 +333,8 @@ Json::Value ParseJsonString(const std::string& json_str) { } // namespace void LlamaEngine::Load(EngineLoadOption opts) { - LOG_INFO << "Loading engine.."; + load_opt_ = opts; + LOG_DEBUG << "Loading engine.."; LOG_DEBUG << "Is custom engine path: " << opts.is_custom_engine_path; LOG_DEBUG << "Engine path: " << opts.engine_path.string(); @@ -350,9 +351,6 @@ void LlamaEngine::Unload(EngineUnloadOption opts) { LlamaEngine::LlamaEngine(int log_option) { trantor::Logger::setLogLevel(trantor::Logger::kInfo); - if (log_option == kFileLoggerOption) { - async_file_logger_ = std::make_unique(); - } common_log_pause(common_log_main()); @@ -379,7 +377,6 @@ LlamaEngine::~LlamaEngine() { l.ReleaseResources(); } server_map_.clear(); - async_file_logger_.reset(); LOG_INFO << "LlamaEngine destructed successfully"; } @@ -570,21 +567,17 @@ void LlamaEngine::StopInferencing(const std::string& model_id) { void LlamaEngine::SetFileLogger(int max_log_lines, const std::string& log_path) { - if (!async_file_logger_) { - async_file_logger_ = std::make_unique(); - } - - async_file_logger_->setFileName(log_path); - async_file_logger_->setMaxLines(max_log_lines); // Keep last 100000 lines - async_file_logger_->startLogging(); trantor::Logger::setOutputFunction( [&](const char* msg, const uint64_t len) { - if (async_file_logger_) - async_file_logger_->output_(msg, len); + if (load_opt_.logger) { + if (auto l = static_cast(load_opt_.logger); l) { + l->output_(msg, len); + } + } }, [&]() { - if (async_file_logger_) - async_file_logger_->flush(); + if (load_opt_.logger) + load_opt_.logger->flush(); }); llama_log_set( [](ggml_log_level level, const char* text, void* user_data) { @@ -601,8 +594,10 @@ void LlamaEngine::SetFileLogger(int max_log_lines, } }, nullptr); - freopen(log_path.c_str(), "a", stderr); - freopen(log_path.c_str(), "a", stdout); + if (!freopen(log_path.c_str(), "a", stderr)) + LOG_WARN << "Could not open stream for stderr"; + if (!freopen(log_path.c_str(), "a", stdout)) + LOG_WARN << "Could not open stream for stdout"; } bool LlamaEngine::LoadModelImpl(std::shared_ptr json_body) { @@ -1388,11 +1383,8 @@ bool LlamaEngine::SpawnLlamaServer(const Json::Value& json_params) { params += " --host " + s.host + " --port " + std::to_string(s.port); std::string exe_w = "llama-server.exe"; - std::string current_path_w = - (llama_utils::GetExecutableFolderContainerPath() / "engines" / - "cortex.llamacpp") - .string(); - std::string wcmds = current_path_w + "/" + exe_w + " " + params; + std::string wcmds = + load_opt_.engine_path.string() + "/" + exe_w + " " + params; LOG_DEBUG << "wcmds: " << wcmds; std::vector mutable_cmds(wcmds.begin(), wcmds.end()); mutable_cmds.push_back(L'\0'); @@ -1432,9 +1424,7 @@ bool LlamaEngine::SpawnLlamaServer(const Json::Value& json_params) { } else if (s.pid == 0) { // Some engines requires to add lib search path before process being created std::string exe = "llama-server"; - std::string p = (llama_utils::GetExecutableFolderContainerPath() / - "engines" / "cortex.llamacpp" / exe) - .string(); + std::string p = (load_opt_.engine_path / exe).string(); std::vector params = ConvertJsonToParamsVector(json_params); params.push_back("--host"); params.push_back(s.host); diff --git a/src/llama_engine.h b/src/llama_engine.h index 50629a20..a603ebfc 100644 --- a/src/llama_engine.h +++ b/src/llama_engine.h @@ -6,6 +6,7 @@ #include "cortex-common/enginei.h" #include "file_logger.h" #include "llama.h" +#include "llama_data.h" #include "llama_server_context.h" #include "trantor/utils/ConcurrentTaskQueue.h" #include "trantor/utils/Logger.h" @@ -20,34 +21,53 @@ class LlamaEngine : public EngineI { ~LlamaEngine() final; - // #### Interface #### + // Load the engine with the specified options. void Load(EngineLoadOption opts) final; + // Unload the engine with the specified options. void Unload(EngineUnloadOption opts) final; - void HandleChatCompletion(std::shared_ptr jsonBody, + // Handle a chat completion request with the provided JSON body and callback. + void HandleChatCompletion(std::shared_ptr json_body, http_callback&& callback) final; - void HandleEmbedding(std::shared_ptr jsonBody, + + // Handle an embedding request with the provided JSON body and callback. + void HandleEmbedding(std::shared_ptr json_body, http_callback&& callback) final; - void LoadModel(std::shared_ptr jsonBody, + + // Load a model with the provided JSON body and callback. + void LoadModel(std::shared_ptr json_body, http_callback&& callback) final; - void UnloadModel(std::shared_ptr jsonBody, + + // Unload a model with the provided JSON body and callback. + void UnloadModel(std::shared_ptr json_body, http_callback&& callback) final; - void GetModelStatus(std::shared_ptr jsonBody, + + // Get the status of a model with the provided JSON body and callback. + void GetModelStatus(std::shared_ptr json_body, http_callback&& callback) final; - void GetModels(std::shared_ptr jsonBody, + + // Get the list of available models with the provided JSON body and callback. + void GetModels(std::shared_ptr json_body, http_callback&& callback) final; + + // Set the file logger with the maximum number of log lines and log file path. void SetFileLogger(int max_log_lines, const std::string& log_path) final; + + // Set the log level for the engine. void SetLogLevel(trantor::Logger::LogLevel log_level = trantor::Logger::LogLevel::kInfo) final; + + // Stop the inferencing process for the specified model. void StopInferencing(const std::string& model_id) final; + private: - bool LoadModelImpl(std::shared_ptr jsonBody); + bool LoadModelImpl(std::shared_ptr json_body); void HandleInferenceImpl( llama::inferences::ChatCompletionRequest&& completion, http_callback&& callback); - void HandleEmbeddingImpl(std::shared_ptr jsonBody, + void HandleEmbeddingImpl(std::shared_ptr json_body, http_callback&& callback); bool CheckModelLoaded(http_callback& callback, const std::string& model_id); void WarmUpModel(const std::string& model_id); @@ -65,13 +85,17 @@ class LlamaEngine : public EngineI { http_callback&& callback, const std::string& model); + // Handle an OpenAI chat completion request with the provided JSON body, callback, and model. void HandleOpenAiChatCompletion(std::shared_ptr json_body, http_callback&& callback, const std::string& model); + + // Handle a non-OpenAI chat completion request with the provided JSON body, callback, and model. void HandleNonOpenAiChatCompletion(std::shared_ptr json_body, http_callback&& callback, const std::string& model); + // Handle a LLaMA C++ embedding request with the provided JSON body, callback, and model. bool HandleLlamaCppEmbedding(std::shared_ptr json_body, http_callback&& callback, const std::string& model); @@ -79,60 +103,6 @@ class LlamaEngine : public EngineI { bool IsLlamaServerModel(const std::string& model) const; private: - struct IsDone { - bool is_done; - int operator()() { return is_done; } - }; - struct HasError { - bool has_error; - int operator()() { return has_error; } - }; - struct IsStream { - bool is_stream; - int operator()() { return is_stream; } - }; - struct StatusCode { - int status_code; - int operator()() { return status_code; } - }; - struct ResStatus { - private: - IsDone is_done; - HasError has_error; - IsStream is_stream; - StatusCode status_code; - - public: - ResStatus(IsDone is_done, HasError has_error, IsStream is_stream, - StatusCode status_code) - : is_done(is_done), - has_error(has_error), - is_stream(is_stream), - status_code(status_code) {} - - Json::Value ToJson() { - Json::Value status; - status["is_done"] = is_done(); - status["has_error"] = has_error(); - status["is_stream"] = is_stream(); - status["status_code"] = status_code(); - return status; - }; - }; - - struct ResStreamData { - private: - std::string s; - - public: - ResStreamData(std::string s) : s(std::move(s)) {} - Json::Value ToJson() { - Json::Value d; - d["data"] = s; - return d; - } - }; - struct ServerInfo { LlamaServerContext ctx; std::unique_ptr q; @@ -176,7 +146,8 @@ class LlamaEngine : public EngineI { std::atomic no_of_chats_ = 0; bool print_version_ = true; - std::unique_ptr async_file_logger_; + + EngineLoadOption load_opt_; #if defined(_WIN32) std::vector cookies_;