diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 837bdb720..000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve -title: 'bug: [DESCRIPTION]' -labels: 'type: bug' -assignees: '' - ---- - -**Describe the bug** -A clear and concise description of what the bug is. - -**To Reproduce** -Steps to reproduce the behavior: -1. Go to '...' -2. Click on '....' -3. Scroll down to '....' -4. See error - -**Expected behavior** -A clear and concise description of what you expected to happen. - -**Screenshots** -If applicable, add screenshots to help explain your problem. - -**Desktop (please complete the following information):** - - OS: [e.g. iOS] - - Browser [e.g. chrome, safari] - - Version [e.g. 22] - -**Smartphone (please complete the following information):** - - Device: [e.g. iPhone6] - - OS: [e.g. iOS8.1] - - Browser [e.g. stock browser, safari] - - Version [e.g. 22] - -**Additional context** -Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 000000000..d0182a83a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,50 @@ +name: "\U0001F41B Bug Report" +description: "If something isn't working as expected \U0001F914" +labels: [ "type: bug" ] +title: 'bug: [DESCRIPTION]' + +body: + - type: input + validations: + required: true + attributes: + label: "Cortex version" + description: "**Tip:** The version is in the app's bottom right corner" + + - type: textarea + validations: + required: true + attributes: + label: "Describe the Bug" + description: "A clear & concise description of the bug" + + - type: textarea + attributes: + label: "Steps to Reproduce" + description: | + Please list out steps to reproduce the issue + placeholder: | + 1. Go to '...' + 2. Click on '...' + + - type: textarea + attributes: + label: "Screenshots / Logs" + description: | + You can find logs in: ~/cortex/logs + + - type: checkboxes + attributes: + label: "What is your OS?" + options: + - label: MacOS + - label: Windows + - label: Linux + + - type: checkboxes + attributes: + label: "What engine are you running?" + options: + - label: cortex.llamacpp (default) + - label: cortex.tensorrt-llm (Nvidia GPUs) + - label: cortex.onnx (NPUs, DirectML) \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..56e11b10a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,7 @@ +## To encourage contributors to use issue templates, we don't allow blank issues +blank_issues_enabled: true + +contact_links: + - name: "\1F4AC Cortex Discussions" + url: "https://github.com/orgs/janhq/discussions/categories/q-a" + about: "Get help, discuss features & roadmap, and share your projects" \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/discussion-thread.md b/.github/ISSUE_TEMPLATE/discussion-thread.md deleted file mode 100644 index 09e52ae68..000000000 --- a/.github/ISSUE_TEMPLATE/discussion-thread.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -name: Discussion thread -about: Start an open ended discussion -title: 'Discussion: [TOPIC HERE]' -labels: '' -assignees: '' - ---- - -**Motivation** - -**Discussion** - -**Resources** diff --git a/.github/ISSUE_TEMPLATE/epic-request.md b/.github/ISSUE_TEMPLATE/epic-request.md index bfad8e5f8..486ae90b6 100644 --- a/.github/ISSUE_TEMPLATE/epic-request.md +++ b/.github/ISSUE_TEMPLATE/epic-request.md @@ -8,13 +8,10 @@ assignees: '' --- **Problem** -A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -**Success Criteria** -A clear and concise description of what you want to happen. -**Sub Issues** +**Success Criteria** - -**Additional context** -Add any other context or screenshots about the epic request here. +**Tasklist** +- [ ] diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index 26f586bd0..000000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -name: Feature request -about: Suggest an idea for this project -title: 'feat: [DESCRIPTION]' -labels: 'type: feature request' -assignees: '' - ---- - -**Problem** -A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] - -**Success Criteria** -A clear and concise description of what you want to happen. - -**Additional context** -Add any other context or screenshots about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 000000000..1d267e500 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,19 @@ +name: "\U0001F680 Feature Request" +description: "Suggest an idea for this project \U0001F63B!" +title: 'idea: [DESCRIPTION]' +body: + - type: textarea + validations: + required: true + attributes: + label: "Problem Statement" + description: "Describe the problem you're facing" + placeholder: | + I'm always frustrated when ... + + - type: textarea + validations: + required: true + attributes: + label: "Feature Idea" + description: "Describe what you want instead. Examples are welcome!" \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/model_request.yml b/.github/ISSUE_TEMPLATE/model_request.yml new file mode 100644 index 000000000..c424de8fc --- /dev/null +++ b/.github/ISSUE_TEMPLATE/model_request.yml @@ -0,0 +1,21 @@ +name: "\U0001F929 Model Request" +description: "Request a new model to be compiled" +title: 'feat: [DESCRIPTION]' +labels: 'type: model request' +body: + - type: markdown + attributes: + value: "**Tip:** Download any model with `cortex pull HUGGINGFACE_MODEL_ID`. Use this form for unsupported models only." + - type: textarea + validations: + required: true + attributes: + label: "Model Requests" + description: "If applicable, include the source URL, licenses, and any other relevant information" + - type: checkboxes + attributes: + label: "Which formats?" + options: + - label: GGUF (llama.cpp) + - label: TensorRT (TensorRT-LLM) + - label: ONNX (Onnx Runtime) diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index 3be4705da..58661c508 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -86,7 +86,7 @@ if(DEFINED CMAKE_JS_INC) # define NPI_VERSION add_compile_definitions(NAPI_VERSION=8) endif() - + add_compile_definitions(CORTEX_VARIANT="${CORTEX_VARIANT}") add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}") add_compile_definitions(CORTEX_CONFIG_FILE_PATH="${CORTEX_CONFIG_FILE_PATH}") @@ -115,6 +115,7 @@ if(DEFINED CMAKE_JS_INC) add_library(${PROJECT_NAME} SHARED addon.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc + ${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc ${CMAKE_JS_SRC} ) @@ -131,6 +132,7 @@ if(DEFINED CMAKE_JS_INC) else() # Official build add_executable(${PROJECT_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc + ${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc ) endif() diff --git a/engine/commands/cortex_upd_cmd.cc b/engine/commands/cortex_upd_cmd.cc index 9027203e7..a4343e55c 100644 --- a/engine/commands/cortex_upd_cmd.cc +++ b/engine/commands/cortex_upd_cmd.cc @@ -4,6 +4,7 @@ #include "cortex_upd_cmd.h" #include "httplib.h" #include "nlohmann/json.hpp" +#include "server_stop_cmd.h" #include "services/download_service.h" #include "utils/archive_utils.h" #include "utils/file_manager_utils.h" @@ -12,21 +13,38 @@ namespace commands { -namespace { -const std::string kCortexBinary = "cortex-cpp"; -} - CortexUpdCmd::CortexUpdCmd() {} void CortexUpdCmd::Exec(std::string v) { - // TODO(sang) stop server if it is running + { + auto config = file_manager_utils::GetCortexConfig(); + httplib::Client cli(config.apiServerHost + ":" + config.apiServerPort); + auto res = cli.Get("/health/healthz"); + if (res) { + CLI_LOG("Server is running. Stopping server before updating!"); + commands::ServerStopCmd ssc(config.apiServerHost, + std::stoi(config.apiServerPort)); + ssc.Exec(); + } + } + if (CORTEX_VARIANT == file_manager_utils::kNightlyVariant) { + if (!GetNightly(v)) + return; + } else { + if (!GetStableAndBeta(v)) + return; + } + CLI_LOG("Update cortex sucessfully"); +} + +bool CortexUpdCmd::GetStableAndBeta(const std::string& v) { // Check if the architecture and OS are supported auto system_info = system_info_utils::GetSystemInfo(); if (system_info.arch == system_info_utils::kUnsupported || system_info.os == system_info_utils::kUnsupported) { CTL_ERR("Unsupported OS or architecture: " << system_info.os << ", " << system_info.arch); - return; + return false; } CTL_INF("OS: " << system_info.os << ", Arch: " << system_info.arch); @@ -50,7 +68,7 @@ void CortexUpdCmd::Exec(std::string v) { std::string matched_variant = ""; for (auto& asset : assets) { auto asset_name = asset["name"].get(); - if (asset_name.find("cortex-cpp") != std::string::npos && + if (asset_name.find(kCortexBinary) != std::string::npos && asset_name.find(os_arch) != std::string::npos) { matched_variant = asset_name; break; @@ -59,7 +77,7 @@ void CortexUpdCmd::Exec(std::string v) { } if (matched_variant.empty()) { CTL_ERR("No variant found for " << os_arch); - return; + return false; } CTL_INF("Matched variant: " << matched_variant); @@ -99,14 +117,6 @@ void CortexUpdCmd::Exec(std::string v) { archive_utils::ExtractArchive(download_path.string(), extract_path.string()); - // remove the downloaded file - // TODO(any) Could not delete file on Windows because it is currently hold by httplib(?) - // Not sure about other platforms - try { - std::filesystem::remove(absolute_path); - } catch (const std::exception& e) { - CTL_WRN("Could not delete file: " << e.what()); - } CTL_INF("Finished!"); }); break; @@ -114,58 +124,75 @@ void CortexUpdCmd::Exec(std::string v) { } } catch (const nlohmann::json::parse_error& e) { std::cerr << "JSON parse error: " << e.what() << std::endl; - return; + return false; } } else { CTL_ERR("HTTP error: " << res->status); - return; + return false; } } else { auto err = res.error(); CTL_ERR("HTTP error: " << httplib::to_string(err)); - return; + return false; } -#if defined(_WIN32) - auto executable_path = file_manager_utils::GetExecutableFolderContainerPath(); - auto temp = executable_path / "cortex_tmp.exe"; - remove(temp.string().c_str()); // ignore return code - - auto src = - executable_path / "cortex" / kCortexBinary / (kCortexBinary + ".exe"); - auto dst = executable_path / (kCortexBinary + ".exe"); - // Rename - rename(dst.string().c_str(), temp.string().c_str()); - // Update - CopyFile(const_cast(src.string().c_str()), - const_cast(dst.string().c_str()), false); - auto download_folder = executable_path / "cortex"; - remove(download_folder); - remove(temp.string().c_str()); -#else + + // Replace binary file auto executable_path = file_manager_utils::GetExecutableFolderContainerPath(); - auto temp = executable_path / "cortex_tmp"; - auto src = executable_path / "cortex" / kCortexBinary / kCortexBinary; - auto dst = executable_path / kCortexBinary; - if (std::rename(dst.string().c_str(), temp.string().c_str())) { - CTL_ERR("Failed to rename from " << dst.string() << " to " - << temp.string()); - return; - } - try { - std::filesystem::copy_file( - src, dst, std::filesystem::copy_options::overwrite_existing); - std::filesystem::permissions(dst, std::filesystem::perms::owner_all | - std::filesystem::perms::group_all | - std::filesystem::perms::others_read | - std::filesystem::perms::others_exec); - std::filesystem::remove(temp); - auto download_folder = executable_path / "cortex/"; - std::filesystem::remove_all(download_folder); - } catch (const std::exception& e) { - CTL_WRN("Something wrong happened: " << e.what()); - return; + auto src = executable_path / "cortex" / kCortexBinary / GetCortexBinary(); + auto dst = executable_path / GetCortexBinary(); + return ReplaceBinaryInflight(src, dst); +} + +bool CortexUpdCmd::GetNightly(const std::string& v) { + // Check if the architecture and OS are supported + auto system_info = system_info_utils::GetSystemInfo(); + if (system_info.arch == system_info_utils::kUnsupported || + system_info.os == system_info_utils::kUnsupported) { + CTL_ERR("Unsupported OS or architecture: " << system_info.os << ", " + << system_info.arch); + return false; } -#endif - CLI_LOG("Update cortex sucessfully"); + CTL_INF("OS: " << system_info.os << ", Arch: " << system_info.arch); + + // Download file + std::string version = v.empty() ? "latest" : std::move(v); + std::ostringstream release_path; + release_path << "cortex/" << version << "/" << system_info.os << "-" + << system_info.arch << "/" << kNightlyFileName; + CTL_INF("Engine release path: " << kNightlyHost << release_path.str()); + + auto download_task = DownloadTask{.id = "cortex", + .type = DownloadType::Cortex, + .error = std::nullopt, + .items = {DownloadItem{ + .id = "cortex", + .host = kNightlyHost, + .fileName = kNightlyFileName, + .type = DownloadType::Cortex, + .path = release_path.str(), + }}}; + + DownloadService download_service; + download_service.AddDownloadTask( + download_task, [this](const std::string& absolute_path, bool unused) { + // try to unzip the downloaded file + std::filesystem::path download_path{absolute_path}; + CTL_INF("Downloaded engine path: " << download_path.string()); + + std::filesystem::path extract_path = + download_path.parent_path().parent_path(); + + archive_utils::ExtractArchive(download_path.string(), + extract_path.string()); + + CTL_INF("Finished!"); + }); + + // Replace binay file + auto executable_path = file_manager_utils::GetExecutableFolderContainerPath(); + auto src = executable_path / "cortex" / GetCortexBinary(); + auto dst = executable_path / GetCortexBinary(); + return ReplaceBinaryInflight(src, dst); } + } // namespace commands \ No newline at end of file diff --git a/engine/commands/cortex_upd_cmd.h b/engine/commands/cortex_upd_cmd.h index 2606dbbd9..396c6cdaf 100644 --- a/engine/commands/cortex_upd_cmd.h +++ b/engine/commands/cortex_upd_cmd.h @@ -1,13 +1,124 @@ #pragma once -#include #include +#include + +#include "httplib.h" +#include "nlohmann/json.hpp" +#include "utils/file_manager_utils.h" +#include "utils/logging_utils.h" namespace commands { +#ifndef CORTEX_VARIANT +#define CORTEX_VARIANT file_manager_utils::kProdVariant +#endif +constexpr const auto kNightlyHost = "https://delta.jan.ai"; +constexpr const auto kNightlyFileName = "cortex-nightly.tar.gz"; +const std::string kCortexBinary = "cortex"; + +inline std::string GetCortexBinary() { +#if defined(_WIN32) + constexpr const bool has_exe = true; +#else + constexpr const bool has_exe = false; +#endif + if (CORTEX_VARIANT == file_manager_utils::kNightlyVariant) { + return has_exe ? kCortexBinary + "-nightly.exe" + : kCortexBinary + "-nightly"; + } else if (CORTEX_VARIANT == file_manager_utils::kBetaVariant) { + return has_exe ? kCortexBinary + "-beta.exe" : kCortexBinary + "-beta"; + } else { + return has_exe ? kCortexBinary + ".exe" : kCortexBinary; + } +} + +inline std::string GetHostName() { + if (CORTEX_VARIANT == file_manager_utils::kNightlyVariant) { + return "https://delta.jan.ai"; + } else { + return "https://api.github.com"; + } +} + +inline std::string GetReleasePath() { + if (CORTEX_VARIANT == file_manager_utils::kNightlyVariant) { + return "/cortex/latest/version.json"; + } else { + return "/repos/janhq/cortex.cpp/releases/latest"; + } +} + +inline void CheckNewUpdate() { + auto host_name = GetHostName(); + auto release_path = GetReleasePath(); + CTL_INF("Engine release path: " << host_name << release_path); -class CortexUpdCmd{ + httplib::Client cli(host_name); + if (auto res = cli.Get(release_path)) { + if (res->status == httplib::StatusCode::OK_200) { + try { + auto json_res = nlohmann::json::parse(res->body); + std::string latest_version = json_res["tag_name"].get(); + std::string current_version = CORTEX_CPP_VERSION; + if (current_version != latest_version) { + CLI_LOG("\nA new release of cortex is available: " + << current_version << " -> " << latest_version); + CLI_LOG("To upgrade, run: cortex update"); + // CLI_LOG(json_res["html_url"].get()); + } + } catch (const nlohmann::json::parse_error& e) { + CTL_INF("JSON parse error: " << e.what()); + } + } else { + CTL_INF("HTTP error: " << res->status); + } + } else { + auto err = res.error(); + CTL_INF("HTTP error: " << httplib::to_string(err)); + } +} + +inline bool ReplaceBinaryInflight(const std::filesystem::path& src, + const std::filesystem::path& dst) { + if (src == dst) { + // Already has the newest + return true; + } + std::filesystem::path temp = std::filesystem::temp_directory_path() / "cortex_temp"; + + try { + if (std::filesystem::exists(temp)) { + std::filesystem::remove(temp); + } + + std::rename(dst.string().c_str(), temp.string().c_str()); + std::filesystem::copy_file( + src, dst, std::filesystem::copy_options::overwrite_existing); + std::filesystem::permissions(dst, std::filesystem::perms::owner_all | + std::filesystem::perms::group_all | + std::filesystem::perms::others_read | + std::filesystem::perms::others_exec); + auto download_folder = src.parent_path(); + std::filesystem::remove_all(download_folder); + } catch (const std::exception& e) { + CTL_ERR("Something wrong happened: " << e.what()); + if (std::filesystem::exists(temp)) { + std::rename(temp.string().c_str(), dst.string().c_str()); + CLI_LOG("Restored binary file"); + } + return false; + } + + return true; +} + +class CortexUpdCmd { public: CortexUpdCmd(); void Exec(std::string version); + + private: + bool GetStableAndBeta(const std::string& v); + bool GetNightly(const std::string& v); }; } // namespace commands \ No newline at end of file diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc index b3ab8bae5..7b0153f69 100644 --- a/engine/commands/engine_init_cmd.cc +++ b/engine/commands/engine_init_cmd.cc @@ -75,7 +75,9 @@ bool EngineInitCmd::Exec() const { matched_variant = engine_matcher_utils::ValidateOnnx( variants, system_info.os, system_info.arch); } else if (engineName_ == "cortex.llamacpp") { - auto suitable_avx = engine_matcher_utils::GetSuitableAvxVariant(); + cortex::cpuid::CpuInfo cpu_info; + auto suitable_avx = + engine_matcher_utils::GetSuitableAvxVariant(cpu_info); matched_variant = engine_matcher_utils::Validate( variants, system_info.os, system_info.arch, suitable_avx, cuda_driver_version); diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index b8ad72f58..987aa693a 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -16,7 +16,6 @@ #include "config/yaml_config.h" #include "httplib.h" #include "services/engine_service.h" -#include "utils/cortex_utils.h" #include "utils/file_manager_utils.h" #include "utils/logging_utils.h" @@ -24,7 +23,7 @@ CommandLineParser::CommandLineParser() : app_("Cortex.cpp CLI"), engine_service_{EngineService()} {} bool CommandLineParser::SetupCommand(int argc, char** argv) { - + auto config = file_manager_utils::GetCortexConfig(); std::string model_id; // Models group commands @@ -34,7 +33,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { auto start_cmd = models_cmd->add_subcommand("start", "Start a model by ID"); start_cmd->add_option("model_id", model_id, ""); - start_cmd->callback([&model_id]() { + start_cmd->callback([&model_id, &config]() { commands::CmdInfo ci(model_id); std::string model_file = ci.branch == "main" ? ci.model_name : ci.model_name + "-" + ci.branch; @@ -42,7 +41,8 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { yaml_handler.ModelConfigFromFile( file_manager_utils::GetModelsContainerPath().string() + "/" + model_file + ".yaml"); - commands::ModelStartCmd msc("127.0.0.1", 3928, + commands::ModelStartCmd msc(config.apiServerHost, + std::stoi(config.apiServerPort), yaml_handler.GetModelConfig()); msc.Exec(); }); @@ -50,7 +50,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { auto stop_model_cmd = models_cmd->add_subcommand("stop", "Stop a model by ID"); stop_model_cmd->add_option("model_id", model_id, ""); - stop_model_cmd->callback([&model_id]() { + stop_model_cmd->callback([&model_id, &config]() { commands::CmdInfo ci(model_id); std::string model_file = ci.branch == "main" ? ci.model_name : ci.model_name + "-" + ci.branch; @@ -58,7 +58,8 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { yaml_handler.ModelConfigFromFile( file_manager_utils::GetModelsContainerPath().string() + "/" + model_file + ".yaml"); - commands::ModelStopCmd smc("127.0.0.1", 3928, + commands::ModelStopCmd smc(config.apiServerHost, + std::stoi(config.apiServerPort), yaml_handler.GetModelConfig()); smc.Exec(); }); @@ -105,7 +106,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { chat_cmd->add_option("model_id", model_id, ""); chat_cmd->add_option("-m,--message", msg, "Message to chat with model"); - chat_cmd->callback([&model_id, &msg] { + chat_cmd->callback([&model_id, &msg, &config] { commands::CmdInfo ci(model_id); std::string model_file = ci.branch == "main" ? ci.model_name : ci.model_name + "-" + ci.branch; @@ -113,7 +114,9 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { yaml_handler.ModelConfigFromFile( file_manager_utils::GetModelsContainerPath().string() + "/" + model_file + ".yaml"); - commands::ChatCmd cc("127.0.0.1", 3928, yaml_handler.GetModelConfig()); + commands::ChatCmd cc(config.apiServerHost, + std::stoi(config.apiServerPort), + yaml_handler.GetModelConfig()); cc.Exec(msg); }); } @@ -135,9 +138,19 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { command.Exec(); }); + auto install_cmd = engines_cmd->add_subcommand("install", "Install engine"); + install_cmd->callback([] { CLI_LOG("Engine name can't be empty!"); }); + for (auto& engine : engine_service_.kSupportEngines) { + std::string engine_name{engine}; + EngineInstall(install_cmd, engine_name, version); + } + + auto uninstall_cmd = + engines_cmd->add_subcommand("uninstall", "Uninstall engine"); + uninstall_cmd->callback([] { CLI_LOG("Engine name can't be empty!"); }); for (auto& engine : engine_service_.kSupportEngines) { std::string engine_name{engine}; - EngineManagement(engines_cmd, engine_name, version); + EngineUninstall(uninstall_cmd, engine_name); } EngineGet(engines_cmd); @@ -149,17 +162,18 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { app_.add_subcommand("run", "Shortcut to start a model and chat"); std::string model_id; run_cmd->add_option("model_id", model_id, ""); - run_cmd->callback([&model_id] { - commands::RunCmd rc("127.0.0.1", 3928, model_id); + run_cmd->callback([&model_id, &config] { + commands::RunCmd rc(config.apiServerHost, std::stoi(config.apiServerPort), + model_id); rc.Exec(); }); } auto stop_cmd = app_.add_subcommand("stop", "Stop the API server"); - stop_cmd->callback([] { - // TODO get info from config file - commands::ServerStopCmd ssc("127.0.0.1", 3928); + stop_cmd->callback([&config] { + commands::ServerStopCmd ssc(config.apiServerHost, + std::stoi(config.apiServerPort)); ssc.Exec(); }); @@ -193,59 +207,29 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { // Check new update, only check for stable release for now #ifdef CORTEX_CPP_VERSION if (check_update) { - constexpr auto github_host = "https://api.github.com"; - std::ostringstream release_path; - release_path << "/repos/janhq/cortex.cpp/releases/latest"; - CTL_INF("Engine release path: " << github_host << release_path.str()); - - httplib::Client cli(github_host); - if (auto res = cli.Get(release_path.str())) { - if (res->status == httplib::StatusCode::OK_200) { - try { - auto json_res = nlohmann::json::parse(res->body); - std::string latest_version = json_res["tag_name"].get(); - std::string current_version = CORTEX_CPP_VERSION; - if (current_version != latest_version) { - CLI_LOG("\nA new release of cortex is available: " - << current_version << " -> " << latest_version); - CLI_LOG("To upgrade, run: cortex update"); - CLI_LOG(json_res["html_url"].get()); - } - } catch (const nlohmann::json::parse_error& e) { - CTL_INF("JSON parse error: " << e.what()); - } - } else { - CTL_INF("HTTP error: " << res->status); - } - } else { - auto err = res.error(); - CTL_INF("HTTP error: " << httplib::to_string(err)); - } + commands::CheckNewUpdate(); } #endif return true; } -void CommandLineParser::EngineManagement(CLI::App* parent, - const std::string& engine_name, - std::string& version) { - auto engine_cmd = - parent->add_subcommand(engine_name, "Manage " + engine_name + " engine"); - - auto install_cmd = engine_cmd->add_subcommand( - "install", "Install " + engine_name + " engine"); - install_cmd->add_option("-v, --version", version, - "Engine version. Default will be latest"); +void CommandLineParser::EngineInstall(CLI::App* parent, + const std::string& engine_name, + std::string& version) { + auto install_engine_cmd = parent->add_subcommand(engine_name, ""); - install_cmd->callback([engine_name, &version] { + install_engine_cmd->callback([&] { commands::EngineInitCmd eic(engine_name, version); eic.Exec(); }); +} + +void CommandLineParser::EngineUninstall(CLI::App* parent, + const std::string& engine_name) { + auto uninstall_engine_cmd = parent->add_subcommand(engine_name, ""); - auto uninstall_desc{"Uninstall " + engine_name + " engine"}; - auto uninstall_cmd = engine_cmd->add_subcommand("uninstall", uninstall_desc); - uninstall_cmd->callback([engine_name] { + uninstall_engine_cmd->callback([&] { commands::EngineUninstallCmd cmd(engine_name); cmd.Exec(); }); diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h index 0fa89e241..e4a2f47c5 100644 --- a/engine/controllers/command_line_parser.h +++ b/engine/controllers/command_line_parser.h @@ -9,8 +9,10 @@ class CommandLineParser { bool SetupCommand(int argc, char** argv); private: - void EngineManagement(CLI::App* parent, const std::string& engine_name, - std::string& version); + void EngineInstall(CLI::App* parent, const std::string& engine_name, + std::string& version); + + void EngineUninstall(CLI::App* parent, const std::string& engine_name); void EngineGet(CLI::App* parent); diff --git a/engine/main.cc b/engine/main.cc index 5c83d1103..1450d887c 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -6,6 +6,7 @@ #include "utils/archive_utils.h" #include "utils/cortex_utils.h" #include "utils/dylib.h" +#include "utils/file_logger.h" #include "utils/file_manager_utils.h" #include "utils/logging_utils.h" @@ -26,19 +27,21 @@ void RunServer() { auto config = file_manager_utils::GetCortexConfig(); - LOG_INFO << "Host: " << config.host << " Port: " << config.port << "\n"; + LOG_INFO << "Host: " << config.apiServerHost << " Port: " << config.apiServerPort << "\n"; // Create logs/ folder and setup log to file - std::filesystem::create_directory(cortex_utils::logs_folder); - trantor::AsyncFileLogger asyncFileLogger; - asyncFileLogger.setFileName(cortex_utils::logs_base_name); + std::filesystem::create_directory(config.logFolderPath + "/" + + cortex_utils::logs_folder); + trantor::FileLogger asyncFileLogger; + asyncFileLogger.setFileName(config.logFolderPath + "/" + + cortex_utils::logs_base_name); + asyncFileLogger.setMaxLines(config.maxLogLines); // Keep last 100000 lines asyncFileLogger.startLogging(); trantor::Logger::setOutputFunction( [&](const char* msg, const uint64_t len) { - asyncFileLogger.output(msg, len); + asyncFileLogger.output_(msg, len); }, [&]() { asyncFileLogger.flush(); }); - asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit); // Number of cortex.cpp threads // if (argc > 1) { // thread_num = std::atoi(argv[1]); @@ -66,10 +69,10 @@ void RunServer() { LOG_INFO << "cortex.cpp version: undefined"; #endif - LOG_INFO << "Server started, listening at: " << config.host << ":" - << config.port; + LOG_INFO << "Server started, listening at: " << config.apiServerHost << ":" + << config.apiServerPort; LOG_INFO << "Please load your model"; - drogon::app().addListener(config.host, std::stoi(config.port)); + drogon::app().addListener(config.apiServerHost, std::stoi(config.apiServerPort)); drogon::app().setThreadNum(drogon_thread_num); LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum(); @@ -154,6 +157,18 @@ int main(int argc, char* argv[]) { RunServer(); return 0; } else { + auto config = file_manager_utils::GetCortexConfig(); + trantor::FileLogger asyncFileLogger; + asyncFileLogger.setFileName(config.logFolderPath + "/" + + cortex_utils::logs_cli_base_name); + asyncFileLogger.setMaxLines( + config.maxLogLines); // Keep last 100000 lines + asyncFileLogger.startLogging(); + trantor::Logger::setOutputFunction( + [&](const char* msg, const uint64_t len) { + asyncFileLogger.output_(msg, len); + }, + [&]() { asyncFileLogger.flush(); }); CommandLineParser clp; clp.SetupCommand(argc, argv); return 0; diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc index ffb25f02d..78a0c4757 100644 --- a/engine/services/download_service.cc +++ b/engine/services/download_service.cc @@ -81,6 +81,7 @@ void DownloadService::StartDownloadItem( } if (current == total) { outputFile.flush(); + outputFile.close(); CLI_LOG("Done download: " << static_cast(total) / 1024 / 1024 << " MiB"); if (callback.has_value()) { diff --git a/engine/test/components/test_cortex_upd_cmd.cc b/engine/test/components/test_cortex_upd_cmd.cc new file mode 100644 index 000000000..9b8f3e764 --- /dev/null +++ b/engine/test/components/test_cortex_upd_cmd.cc @@ -0,0 +1,56 @@ +#include "commands/cortex_upd_cmd.h" +#include "gtest/gtest.h" + +namespace { +constexpr const auto kNewReleaseFolder = "./cortex-release"; +constexpr const auto kNewReleaseFile = "./cortex-release/cortexexe"; +constexpr const auto kCurReleaseFile = "./cortexexe"; +std::filesystem::path GetCortexTemp() { + return std::filesystem::temp_directory_path() / "cortex_temp"; +} +} // namespace + +class CortexUpdCmdTest : public ::testing::Test { + void SetUp() { + // Create new release folder and files + std::filesystem::path folder_path(kNewReleaseFolder); + std::filesystem::create_directory(folder_path); + std::ofstream src(kNewReleaseFile); + src.close(); + std::ofstream dst(kCurReleaseFile); + dst.close(); + } + + void TearDown() { + std::filesystem::path folder_path(kNewReleaseFolder); + if (std::filesystem::exists(folder_path)) { + std::filesystem::remove_all(folder_path); + } + + if (std::filesystem::exists(kCurReleaseFile)) { + std::filesystem::remove(kCurReleaseFile); + } + + if (std::filesystem::exists(GetCortexTemp())) { + std::filesystem::remove(GetCortexTemp()); + } + } +}; + +TEST_F(CortexUpdCmdTest, return_true_if_self_replace) { + EXPECT_TRUE(commands::ReplaceBinaryInflight("test", "test")); +} + +TEST_F(CortexUpdCmdTest, replace_binary_successfully) { + std::filesystem::path new_binary(kNewReleaseFile); + std::filesystem::path cur_binary(kCurReleaseFile); + EXPECT_TRUE(commands::ReplaceBinaryInflight(new_binary, cur_binary)); + EXPECT_TRUE(std::filesystem::exists(GetCortexTemp())); +} + +TEST_F(CortexUpdCmdTest, should_restore_old_binary_if_has_error) { + std::filesystem::path new_binary("Non-exist"); + std::filesystem::path cur_binary(kCurReleaseFile); + EXPECT_FALSE(commands::ReplaceBinaryInflight(new_binary, cur_binary)); + EXPECT_FALSE(std::filesystem::exists(GetCortexTemp())); +} \ No newline at end of file diff --git a/engine/test/components/test_engine_matcher_utils.cc b/engine/test/components/test_engine_matcher_utils.cc new file mode 100644 index 000000000..d724c3fde --- /dev/null +++ b/engine/test/components/test_engine_matcher_utils.cc @@ -0,0 +1,137 @@ +#include +#include +#include "gtest/gtest.h" +#include "utils/engine_matcher_utils.h" + +class EngineMatcherUtilsTestSuite : public ::testing::Test { + protected: + const std::vector cortex_llamacpp_variants{ + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-vulkan.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-mac-amd64.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-mac-arm64.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-vulkan.tar.gz", + }; + + const std::vector cortex_tensorrt_variants{ + "cortex.tensorrt-llm-0.0.9-linux-cuda-12-4.tar.gz", + "cortex.tensorrt-llm-0.0.9-windows-cuda-12-4.tar.gz"}; + + const std::vector cortex_onnx_variants{ + "cortex.onnx-0.1.7-windows-amd64.tar.gz"}; +}; + +TEST_F(EngineMatcherUtilsTestSuite, TestValidateOnnx) { + + { + auto expect_matched_variant = cortex_onnx_variants[0]; + auto result = engine_matcher_utils::ValidateOnnx(cortex_onnx_variants, + "windows", "amd64"); + + EXPECT_EQ(result, expect_matched_variant); + } + + { + // should return an empty variant because no variant matched + auto expect_matched_variant{""}; + auto windows_arm_result = engine_matcher_utils::ValidateOnnx( + cortex_onnx_variants, "windows", "arm"); + auto mac_arm64_result = engine_matcher_utils::ValidateOnnx( + cortex_onnx_variants, "mac", "arm64"); + + EXPECT_EQ(windows_arm_result, expect_matched_variant); + EXPECT_EQ(mac_arm64_result, expect_matched_variant); + } +} + +TEST_F(EngineMatcherUtilsTestSuite, TestValidateTensorrt) { + + { + auto windows_expect_matched_variant{cortex_tensorrt_variants[1]}; + auto linux_expect_matched_variant{cortex_tensorrt_variants[0]}; + auto windows{"windows"}; + auto linux{"linux"}; + auto cuda_version{"12.4"}; + auto windows_result = engine_matcher_utils::ValidateTensorrtLlm( + cortex_tensorrt_variants, windows, cuda_version); + auto linux_result = engine_matcher_utils::ValidateTensorrtLlm( + cortex_tensorrt_variants, linux, cuda_version); + + EXPECT_EQ(windows_result, windows_expect_matched_variant); + EXPECT_EQ(linux_result, linux_expect_matched_variant); + } + + { // macos is not supported + auto os = "mac"; + auto cuda_version{"12.4"}; + + auto result = engine_matcher_utils::ValidateTensorrtLlm( + cortex_tensorrt_variants, os, cuda_version); + EXPECT_EQ(result, ""); + } +} + +TEST_F(EngineMatcherUtilsTestSuite, TestValidate) { + { + auto os{"windows"}; + auto cpu_arch{"amd64"}; + auto suitable_avx{"avx2"}; + auto cuda_version{"12.4"}; + + auto variant = engine_matcher_utils::Validate( + cortex_llamacpp_variants, os, cpu_arch, suitable_avx, cuda_version); + + EXPECT_EQ( + variant, + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-12-0.tar.gz"); + } + + { + auto os{"mac"}; + auto cpu_arch{"amd64"}; + auto suitable_avx{""}; + auto cuda_version{""}; + + auto variant = engine_matcher_utils::Validate( + cortex_llamacpp_variants, os, cpu_arch, suitable_avx, cuda_version); + + EXPECT_EQ(variant, "cortex.llamacpp-0.1.25-25.08.24-mac-amd64.tar.gz"); + } + + { + auto os{"windows"}; + auto cpu_arch{"amd64"}; + auto suitable_avx{"avx2"}; + auto cuda_version{"10"}; + + auto variant = engine_matcher_utils::Validate( + cortex_llamacpp_variants, os, cpu_arch, suitable_avx, cuda_version); + + // fallback to no cuda version + EXPECT_EQ(variant, + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2.tar.gz"); + } +} diff --git a/engine/test/components/test_system_info_utils.cc b/engine/test/components/test_system_info_utils.cc new file mode 100644 index 000000000..da38bde5d --- /dev/null +++ b/engine/test/components/test_system_info_utils.cc @@ -0,0 +1,90 @@ +#include +#include "gtest/gtest.h" + +class SystemInfoUtilsTestSuite : public ::testing::Test { + protected: + const std::string nvidia_smi_sample_output = R"( +Sun Aug 25 22:29:25 2024 ++-----------------------------------------------------------------------------------------+ +| NVIDIA-SMI 560.70 Driver Version: 560.70 CUDA Version: 12.6 | +|-----------------------------------------+------------------------+----------------------+ +| GPU Name Driver-Model | Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | +| | | MIG M. | +|=========================================+========================+======================| +| 0 NVIDIA GeForce RTX 4090 WDDM | 00000000:01:00.0 Off | Off | +| 0% 24C P8 10W / 500W | 395MiB / 24564MiB | 19% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ + ++-----------------------------------------------------------------------------------------+ +| Processes: | +| GPU GI CI PID Type Process name GPU Memory | +| ID ID Usage | +|=========================================================================================| +| 0 N/A N/A 3984 C+G ...5n1h2txyewy\ShellExperienceHost.exe N/A | +| 0 N/A N/A 7904 C+G ...ekyb3d8bbwe\PhoneExperienceHost.exe N/A | +| 0 N/A N/A 8240 C+G ...__8wekyb3d8bbwe\WindowsTerminal.exe N/A | +| 0 N/A N/A 8904 C+G C:\Windows\explorer.exe N/A | +| 0 N/A N/A 9304 C+G ...siveControlPanel\SystemSettings.exe N/A | +| 0 N/A N/A 9944 C+G ...nt.CBS_cw5n1h2txyewy\SearchHost.exe N/A | +| 0 N/A N/A 11140 C+G ...2txyewy\StartMenuExperienceHost.exe N/A | ++-----------------------------------------------------------------------------------------+ +)"; + + const std::string vulkan_sample_output = R"( +========== +VULKANINFO +========== + +Vulkan Instance Version: 1.3.280 + + +Instance Extensions: count = 19 +------------------------------- +VK_EXT_debug_report : extension revision 10 +VK_EXT_debug_utils : extension revision 2 +VK_EXT_direct_mode_display : extension revision 1 +VK_EXT_surface_maintenance1 : extension revision 1 +VK_EXT_swapchain_colorspace : extension revision 4 +VK_KHR_device_group_creation : extension revision 1 +VK_KHR_display : extension revision 23 +VK_KHR_external_fence_capabilities : extension revision 1 +VK_KHR_external_memory_capabilities : extension revision 1 +VK_KHR_external_semaphore_capabilities : extension revision 1 +VK_KHR_get_display_properties2 : extension revision 1 +VK_KHR_get_physical_device_properties2 : extension revision 2 +VK_KHR_get_surface_capabilities2 : extension revision 1 +VK_KHR_portability_enumeration : extension revision 1 +VK_KHR_surface : extension revision 25 +VK_KHR_surface_protected_capabilities : extension revision 1 +VK_KHR_win32_surface : extension revision 6 +VK_LUNARG_direct_driver_loading : extension revision 1 +VK_NV_external_memory_capabilities : extension revision 1 + +Instance Layers: count = 1 +-------------------------- +VK_LAYER_NV_optimus NVIDIA Optimus layer 1.3.280 version 1 + +Devices: +======== +GPU0: + apiVersion = 1.3.280 + driverVersion = 560.70.0.0 + vendorID = 0x10de + deviceID = 0x2684 + deviceType = PHYSICAL_DEVICE_TYPE_DISCRETE_GPU + deviceName = NVIDIA GeForce RTX 4090 + driverID = DRIVER_ID_NVIDIA_PROPRIETARY + driverName = NVIDIA + driverInfo = 560.70 + conformanceVersion = 1.3.8.2 + deviceUUID = 11deafdf-9f15-e857-2a87-8acc153fc9f7 + driverUUID = 10f251d9-d3c0-5001-bf67-24bb06423040 +)"; + + const std::string gpu_query_list_sample_output = R"( +0, 46068, NVIDIA RTX A6000, 8.6 +1, 46068, NVIDIA RTX A6000, 8.6 +)"; +}; diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h index 4330f3527..8e3668292 100644 --- a/engine/utils/config_yaml_utils.h +++ b/engine/utils/config_yaml_utils.h @@ -8,14 +8,17 @@ namespace config_yaml_utils { struct CortexConfig { + std::string logFolderPath; std::string dataFolderPath; - std::string host; - std::string port; + int maxLogLines; + std::string apiServerHost; + std::string apiServerPort; }; const std::string kCortexFolderName = "cortexcpp"; const std::string kDefaultHost{"127.0.0.1"}; const std::string kDefaultPort{"3928"}; +const int kDefaultMaxLines{100000}; inline void DumpYamlConfig(const CortexConfig& config, const std::string& path) { @@ -27,9 +30,11 @@ inline void DumpYamlConfig(const CortexConfig& config, throw std::runtime_error("Failed to open output file."); } YAML::Node node; + node["logFolderPath"] = config.logFolderPath; node["dataFolderPath"] = config.dataFolderPath; - node["host"] = config.host; - node["port"] = config.port; + node["maxLogLines"] = config.maxLogLines; + node["apiServerHost"] = config.apiServerHost; + node["apiServerPort"] = config.apiServerPort; out_file << node; out_file.close(); @@ -48,10 +53,18 @@ inline CortexConfig FromYaml(const std::string& path, try { auto node = YAML::LoadFile(config_file_path.string()); + int max_lines; + if (!node["maxLogLines"]) { + max_lines = kDefaultMaxLines; + } else { + max_lines = node["maxLogLines"].as(); + } CortexConfig config = { + .logFolderPath = node["logFolderPath"].as(), .dataFolderPath = node["dataFolderPath"].as(), - .host = node["host"].as(), - .port = node["port"].as(), + .maxLogLines = max_lines, + .apiServerHost = node["apiServerHost"].as(), + .apiServerPort = node["apiServerPort"].as(), }; return config; } catch (const YAML::BadFile& e) { @@ -60,5 +73,4 @@ inline CortexConfig FromYaml(const std::string& path, } } - } // namespace config_yaml_utils diff --git a/engine/utils/cortex_utils.h b/engine/utils/cortex_utils.h index 32dea9321..9673f0c1a 100644 --- a/engine/utils/cortex_utils.h +++ b/engine/utils/cortex_utils.h @@ -34,8 +34,8 @@ constexpr static auto kTensorrtLlmPath = "/engines/cortex.tensorrt-llm"; inline std::string models_folder = "./models"; inline std::string logs_folder = "./logs"; -inline std::string logs_base_name = "./logs/cortex"; -inline size_t log_file_size_limit = 20000000; // ~20 mb +inline std::string logs_base_name = "./logs/cortex.log"; +inline std::string logs_cli_base_name = "./logs/cortex-cli.log"; inline std::string extractBase64(const std::string& input) { std::regex pattern("base64,(.*)"); diff --git a/engine/utils/engine_matcher_utils.h b/engine/utils/engine_matcher_utils.h index 77baf1f72..287304f02 100644 --- a/engine/utils/engine_matcher_utils.h +++ b/engine/utils/engine_matcher_utils.h @@ -8,49 +8,10 @@ #include "utils/logging_utils.h" namespace engine_matcher_utils { -// for testing purpose -const std::vector cortex_llamacpp_variants{ - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-vulkan.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-mac-amd64.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-mac-arm64.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-vulkan.tar.gz", -}; -const std::vector cortex_onnx_variants{ - "cortex.onnx-0.1.7-windows-amd64.tar.gz"}; - -const std::vector cortex_tensorrt_variants{ - "cortex.tensorrt-llm-0.0.9-linux-cuda-12-4.tar.gz", - "cortex.tensorrt-llm-0.0.9-windows-cuda-12-4.tar.gz"}; - -inline std::string GetSuitableAvxVariant() { - cortex::cpuid::CpuInfo cpu_info; - +inline std::string GetSuitableAvxVariant(cortex::cpuid::CpuInfo& cpu_info) { CTL_INF("GetSuitableAvxVariant:" << "\n" << cpu_info.to_string()); + // prioritize avx512 > avx2 > avx > noavx if (cpu_info.has_avx512_f()) return "avx512"; if (cpu_info.has_avx2()) @@ -151,10 +112,8 @@ inline std::string Validate(const std::vector& variants, const std::string& os, const std::string& cpu_arch, const std::string& suitable_avx, const std::string& cuda_version) { - - // Early return if the OS is unsupported + // Early return if the OS is not supported if (os != "mac" && os != "windows" && os != "linux") { - // TODO: throw is better return ""; } diff --git a/engine/utils/file_logger.cc b/engine/utils/file_logger.cc new file mode 100644 index 000000000..f684d457f --- /dev/null +++ b/engine/utils/file_logger.cc @@ -0,0 +1,175 @@ +#include "file_logger.h" +#include +#include +#include + +#ifdef _WIN32 +#include +#define ftruncate _chsize +#else +#include +#endif +#include + +using namespace trantor; + +FileLogger::FileLogger() : AsyncFileLogger() {} + +FileLogger::~FileLogger() = default; + +void FileLogger::output_(const char* msg, const uint64_t len) { + if (!circular_log_file_ptr_) { + circular_log_file_ptr_ = + std::make_unique(fileBaseName_, max_lines_); + } + circular_log_file_ptr_->writeLog(msg, len); +} + +FileLogger::CircularLogFile::CircularLogFile(const std::string& fileName, + uint64_t maxLines) + : max_lines_(maxLines), file_name_(fileName) { + std::lock_guard lock(mutex_); + OpenFile(); + LoadExistingLines(); + TruncateFileIfNeeded(); +} + +FileLogger::CircularLogFile::~CircularLogFile() { + std::lock_guard lock(mutex_); + CloseFile(); +} +void FileLogger::CircularLogFile::writeLog(const char* logLine, + const uint64_t len) { + std::lock_guard lock(mutex_); + if (!fp_) + return; + + std::string logString(logLine, len); + std::istringstream iss(logString); + std::string line; + while (std::getline(iss, line)) { + if (lineBuffer_.size() >= max_lines_) { + lineBuffer_.pop_front(); + } + lineBuffer_.push_back(line); + AppendToFile(line + "\n"); + ++linesWrittenSinceLastTruncate_; + if (linesWrittenSinceLastTruncate_.load() >= TRUNCATE_CHECK_INTERVAL) { + + TruncateFileIfNeeded(); + } + } +} +void FileLogger::CircularLogFile::flush() { + std::lock_guard lock(mutex_); + if (fp_) { + fflush(fp_); + } +} + +void FileLogger::CircularLogFile::TruncateFileIfNeeded() { + // std::cout<<"Truncating file "<< totalLines_ < max_lines_ ? lineBuffer_.size() - max_lines_ : 0; + + for (size_t i = startIndex; i < lineBuffer_.size(); ++i) { + fprintf(tempFile, "%s\n", lineBuffer_[i].c_str()); + } + + fclose(tempFile); + + // Replace the original file with the temporary file + if (std::rename(tempFileName.c_str(), file_name_.c_str()) != 0) { + std::cout << "Error replacing original file with truncated file: " + << strerror(errno) << std::endl; + std::remove(tempFileName.c_str()); // Clean up the temporary file + } + // else { + // totalLines_.store(lineBuffer_.size() > max_lines_ ? max_lines_ + // : lineBuffer_.size()); + // } + + // Reopen the file + OpenFile(); + // LoadExistingLines(); + linesWrittenSinceLastTruncate_.store(0); +} + +void FileLogger::CircularLogFile::OpenFile() { +#ifdef _WIN32 + auto wFileName = utils::toNativePath(file_name_); + fp_ = _wfopen(wFileName.c_str(), L"r+"); +#else + fp_ = fopen(file_name_.c_str(), "r+"); +#endif + + if (!fp_) { +// If file doesn't exist, create it +#ifdef _WIN32 + fp_ = _wfopen(wFileName.c_str(), L"w+"); +#else + fp_ = fopen(file_name_.c_str(), "w+"); +#endif + + if (!fp_) { + std::cerr << "Error opening file: " << strerror(errno) << std::endl; + } + } +} +void FileLogger::CircularLogFile::LoadExistingLines() { + if (!fp_) + return; + + // Move to the beginning of the file + fseek(fp_, 0, SEEK_SET); + + lineBuffer_.clear(); + + std::string line; + char buffer[4096]; + while (fgets(buffer, sizeof(buffer), fp_) != nullptr) { + line = buffer; + if (!line.empty() && line.back() == '\n') { + line.pop_back(); // Remove trailing newline + } + if (lineBuffer_.size() >= max_lines_) { + lineBuffer_.pop_front(); + } + lineBuffer_.push_back(line); + } + + // Move back to the end of the file for appending + fseek(fp_, 0, SEEK_END); +} +void FileLogger::CircularLogFile::AppendToFile(const std::string& line) { + if (fp_) { + fwrite(line.c_str(), 1, line.length(), fp_); + fflush(fp_); + } +} + +void FileLogger::CircularLogFile::CloseFile() { + if (fp_) { + fclose(fp_); + fp_ = nullptr; + } +} \ No newline at end of file diff --git a/engine/utils/file_logger.h b/engine/utils/file_logger.h new file mode 100644 index 000000000..58b719019 --- /dev/null +++ b/engine/utils/file_logger.h @@ -0,0 +1,72 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#endif + +namespace trantor { + +class TRANTOR_EXPORT FileLogger : public AsyncFileLogger { + public: + FileLogger(); + ~FileLogger(); + + /** + * @brief Set the maximum number of lines to keep in the log file. + * + * @param maxLines + */ + void setMaxLines(uint64_t maxLines) { max_lines_ = maxLines; } + + /** + * @brief Set the log file name. + * + * @param fileName The full name of the log file. + */ + void setFileName(const std::string& fileName) { + filePath_ = "./"; + fileBaseName_ = fileName; + fileExtName_ = ""; + } + void output_(const char* msg, const uint64_t len); + + protected: + class CircularLogFile { + public: + CircularLogFile(const std::string& fileName, uint64_t maxLines); + ~CircularLogFile(); + + void writeLog(const char* logLine, const uint64_t len); + void flush(); + uint64_t getLength() const { return lineBuffer_.size(); } + + private: + FILE* fp_{nullptr}; + uint64_t max_lines_; + std::string file_name_; + std::deque lineBuffer_; + std::atomic linesWrittenSinceLastTruncate_{0}; + static const uint64_t TRUNCATE_CHECK_INTERVAL = 1000; + mutable std::mutex mutex_; + + void LoadExistingLines(); + void TruncateFileIfNeeded(); + void AppendToFile(const std::string& line); + void OpenFile(); + void CloseFile(); + }; + std::unique_ptr circular_log_file_ptr_; + uint64_t max_lines_{100000}; // Default to 100000 lines +}; + +} // namespace trantor \ No newline at end of file diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h index d20a39f13..b4d7ab07a 100644 --- a/engine/utils/file_manager_utils.h +++ b/engine/utils/file_manager_utils.h @@ -135,9 +135,11 @@ inline void CreateConfigFileIfNotExist() { CTL_INF("Default data folder path: " + defaultDataFolderPath.string()); auto config = config_yaml_utils::CortexConfig{ + .logFolderPath = defaultDataFolderPath.string(), .dataFolderPath = defaultDataFolderPath.string(), - .host = config_yaml_utils::kDefaultHost, - .port = config_yaml_utils::kDefaultPort, + .maxLogLines = config_yaml_utils::kDefaultMaxLines, + .apiServerHost = config_yaml_utils::kDefaultHost, + .apiServerPort = config_yaml_utils::kDefaultPort, }; DumpYamlConfig(config, config_path.string()); } @@ -169,6 +171,27 @@ inline std::filesystem::path GetCortexDataPath() { return data_folder_path; } +inline std::filesystem::path GetCortexLogPath() { + // TODO: We will need to support user to move the data folder to other place. + // TODO: get the variant of cortex. As discussed, we will have: prod, beta, nightly + // currently we will store cortex data at ~/cortexcpp + auto config = GetCortexConfig(); + std::filesystem::path log_folder_path; + if (!config.logFolderPath.empty()) { + log_folder_path = std::filesystem::path(config.logFolderPath); + } else { + auto home_path = GetHomeDirectoryPath(); + log_folder_path = home_path / config_yaml_utils::kCortexFolderName; + } + + if (!std::filesystem::exists(log_folder_path)) { + CTL_INF("Cortex log folder not found. Create one: " + + log_folder_path.string()); + std::filesystem::create_directory(log_folder_path); + } + return log_folder_path; +} + inline std::filesystem::path GetModelsContainerPath() { auto cortex_path = GetCortexDataPath(); auto models_container_path = cortex_path / "models"; diff --git a/engine/utils/http_util.h b/engine/utils/http_util.h index 471ef3b27..422d2c543 100644 --- a/engine/utils/http_util.h +++ b/engine/utils/http_util.h @@ -1,6 +1,7 @@ #pragma once #include +#include "utils/cortex_utils.h" using namespace drogon; @@ -21,4 +22,4 @@ inline bool HasFieldInReq(const HttpRequestPtr& req, return true; } -} // namespace http_util \ No newline at end of file +} // namespace http_util diff --git a/engine/utils/logging_utils.h b/engine/utils/logging_utils.h index 77311c13e..fcaa3f4bb 100644 --- a/engine/utils/logging_utils.h +++ b/engine/utils/logging_utils.h @@ -30,4 +30,3 @@ inline bool log_verbose = false; } else { \ std::cout << msg << std::endl; \ } - diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h index ede7915d2..9cdcc8f05 100644 --- a/engine/utils/system_info_utils.h +++ b/engine/utils/system_info_utils.h @@ -79,91 +79,6 @@ inline SystemInfo GetSystemInfo() { return SystemInfo{os.str(), arch.str()}; } -constexpr auto vulkan_sample_output = R"( -========== -VULKANINFO -========== - -Vulkan Instance Version: 1.3.280 - - -Instance Extensions: count = 19 -------------------------------- -VK_EXT_debug_report : extension revision 10 -VK_EXT_debug_utils : extension revision 2 -VK_EXT_direct_mode_display : extension revision 1 -VK_EXT_surface_maintenance1 : extension revision 1 -VK_EXT_swapchain_colorspace : extension revision 4 -VK_KHR_device_group_creation : extension revision 1 -VK_KHR_display : extension revision 23 -VK_KHR_external_fence_capabilities : extension revision 1 -VK_KHR_external_memory_capabilities : extension revision 1 -VK_KHR_external_semaphore_capabilities : extension revision 1 -VK_KHR_get_display_properties2 : extension revision 1 -VK_KHR_get_physical_device_properties2 : extension revision 2 -VK_KHR_get_surface_capabilities2 : extension revision 1 -VK_KHR_portability_enumeration : extension revision 1 -VK_KHR_surface : extension revision 25 -VK_KHR_surface_protected_capabilities : extension revision 1 -VK_KHR_win32_surface : extension revision 6 -VK_LUNARG_direct_driver_loading : extension revision 1 -VK_NV_external_memory_capabilities : extension revision 1 - -Instance Layers: count = 1 --------------------------- -VK_LAYER_NV_optimus NVIDIA Optimus layer 1.3.280 version 1 - -Devices: -======== -GPU0: - apiVersion = 1.3.280 - driverVersion = 560.70.0.0 - vendorID = 0x10de - deviceID = 0x2684 - deviceType = PHYSICAL_DEVICE_TYPE_DISCRETE_GPU - deviceName = NVIDIA GeForce RTX 4090 - driverID = DRIVER_ID_NVIDIA_PROPRIETARY - driverName = NVIDIA - driverInfo = 560.70 - conformanceVersion = 1.3.8.2 - deviceUUID = 11deafdf-9f15-e857-2a87-8acc153fc9f7 - driverUUID = 10f251d9-d3c0-5001-bf67-24bb06423040 -)"; - -constexpr auto gpu_query_list_sample_output = R"( -0, 46068, NVIDIA RTX A6000, 8.6 -1, 46068, NVIDIA RTX A6000, 8.6 -)"; - -constexpr auto nvidia_smi_sample_output = R"( -Sun Aug 25 22:29:25 2024 -+-----------------------------------------------------------------------------------------+ -| NVIDIA-SMI 560.70 Driver Version: 560.70 CUDA Version: 12.6 | -|-----------------------------------------+------------------------+----------------------+ -| GPU Name Driver-Model | Bus-Id Disp.A | Volatile Uncorr. ECC | -| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | -| | | MIG M. | -|=========================================+========================+======================| -| 0 NVIDIA GeForce RTX 4090 WDDM | 00000000:01:00.0 Off | Off | -| 0% 24C P8 10W / 500W | 395MiB / 24564MiB | 19% Default | -| | | N/A | -+-----------------------------------------+------------------------+----------------------+ - -+-----------------------------------------------------------------------------------------+ -| Processes: | -| GPU GI CI PID Type Process name GPU Memory | -| ID ID Usage | -|=========================================================================================| -| 0 N/A N/A 3984 C+G ...5n1h2txyewy\ShellExperienceHost.exe N/A | -| 0 N/A N/A 7904 C+G ...ekyb3d8bbwe\PhoneExperienceHost.exe N/A | -| 0 N/A N/A 8240 C+G ...__8wekyb3d8bbwe\WindowsTerminal.exe N/A | -| 0 N/A N/A 8904 C+G C:\Windows\explorer.exe N/A | -| 0 N/A N/A 9304 C+G ...siveControlPanel\SystemSettings.exe N/A | -| 0 N/A N/A 9944 C+G ...nt.CBS_cw5n1h2txyewy\SearchHost.exe N/A | -| 0 N/A N/A 11140 C+G ...2txyewy\StartMenuExperienceHost.exe N/A | -+-----------------------------------------------------------------------------------------+ -)"; - inline bool IsNvidiaSmiAvailable() { #ifdef _WIN32 // Check if nvidia-smi.exe exists in the PATH on Windows diff --git a/engine/vcpkg.json b/engine/vcpkg.json index 74f89965a..fe4783ec8 100644 --- a/engine/vcpkg.json +++ b/engine/vcpkg.json @@ -1,5 +1,6 @@ { "dependencies": [ + "gtest", "cli11", { "name": "cpp-httplib", @@ -16,4 +17,4 @@ "libarchive", "tabulate" ] - } \ No newline at end of file +}