From 287c75065767fdc270db24c421196a07a5b0c425 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 27 Aug 2024 14:35:47 +0700 Subject: [PATCH] feat: add engine init cli --- engine/commands/engine_init_cmd.cc | 47 +++- engine/commands/engine_init_cmd.h | 4 +- engine/controllers/command_line_parser.cc | 41 ++-- engine/controllers/command_line_parser.h | 2 + engine/controllers/engines.cc | 111 +++++++++ engine/controllers/engines.h | 21 ++ engine/main.cc | 5 +- engine/utils/command_executor.h | 49 ++++ engine/utils/engine_matcher_utils.h | 180 ++++++++++++++ engine/utils/system_info_utils.h | 273 +++++++++++++++++++++- 10 files changed, 696 insertions(+), 37 deletions(-) create mode 100644 engine/controllers/engines.cc create mode 100644 engine/controllers/engines.h create mode 100644 engine/utils/command_executor.h create mode 100644 engine/utils/engine_matcher_utils.h diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc index 430433e4d..b4f8fe064 100644 --- a/engine/commands/engine_init_cmd.cc +++ b/engine/commands/engine_init_cmd.cc @@ -7,6 +7,7 @@ #include "utils/archive_utils.h" #include "utils/system_info_utils.h" // clang-format on +#include "utils/engine_matcher_utils.h" namespace commands { @@ -27,6 +28,7 @@ void EngineInitCmd::Exec() const { << system_info.arch; return; } + LOG_INFO << "OS: " << system_info.os << ", Arch: " << system_info.arch; // check if engine is supported if (std::find(supportedEngines_.begin(), supportedEngines_.end(), @@ -36,11 +38,11 @@ void EngineInitCmd::Exec() const { } constexpr auto gitHubHost = "https://api.github.com"; - + std::string version = version_.empty() ? "latest" : version_; std::ostringstream engineReleasePath; engineReleasePath << "/repos/janhq/" << engineName_ << "/releases/" - << version_; - + << version; + LOG_INFO << "Engine release path: " << gitHubHost << engineReleasePath.str(); using namespace nlohmann; httplib::Client cli(gitHubHost); @@ -51,9 +53,37 @@ void EngineInitCmd::Exec() const { auto assets = jsonResponse["assets"]; auto os_arch{system_info.os + "-" + system_info.arch}; + std::vector variants; + for (auto& asset : assets) { + auto asset_name = asset["name"].get(); + variants.push_back(asset_name); + } + + auto cuda_version = system_info_utils::GetCudaVersion(); + LOG_INFO << "engineName_: " << engineName_; + LOG_INFO << "CUDA version: " << cuda_version; + std::string matched_variant = ""; + if (engineName_ == "cortex.tensorrt-llm") { + matched_variant = engine_matcher_utils::ValidateTensorrtLlm( + variants, system_info.os, cuda_version); + } else if (engineName_ == "cortex.onnx") { + matched_variant = engine_matcher_utils::ValidateOnnx( + variants, system_info.os, system_info.arch); + } else if (engineName_ == "cortex.llamacpp") { + auto suitable_avx = engine_matcher_utils::GetSuitableAvxVariant(); + matched_variant = engine_matcher_utils::Validate( + variants, system_info.os, system_info.arch, suitable_avx, + cuda_version); + } + LOG_INFO << "Matched variant: " << matched_variant; + if (matched_variant.empty()) { + LOG_ERROR << "No variant found for " << os_arch; + return; + } + for (auto& asset : assets) { auto assetName = asset["name"].get(); - if (assetName.find(os_arch) != std::string::npos) { + if (assetName == matched_variant) { std::string host{"https://github.com"}; auto full_url = asset["browser_download_url"].get(); @@ -74,8 +104,7 @@ void EngineInitCmd::Exec() const { }}}; DownloadService().AddDownloadTask( - downloadTask, - [&downloadTask](const std::string& absolute_path) { + downloadTask, [](const std::string& absolute_path) { // try to unzip the downloaded file std::filesystem::path downloadedEnginePath{absolute_path}; LOG_INFO << "Downloaded engine path: " @@ -95,15 +124,15 @@ void EngineInitCmd::Exec() const { return; } } - LOG_ERROR << "No asset found for " << os_arch; } catch (const json::parse_error& e) { std::cerr << "JSON parse error: " << e.what() << std::endl; } + } else { + LOG_ERROR << "HTTP error: " << res->status; } } else { auto err = res.error(); LOG_ERROR << "HTTP error: " << httplib::to_string(err); } } - -}; // namespace commands \ No newline at end of file +}; // namespace commands diff --git a/engine/commands/engine_init_cmd.h b/engine/commands/engine_init_cmd.h index 09b908e37..dc75d5cf6 100644 --- a/engine/commands/engine_init_cmd.h +++ b/engine/commands/engine_init_cmd.h @@ -15,7 +15,7 @@ class EngineInitCmd { std::string engineName_; std::string version_; - static constexpr std::array supportedEngines_ = { - "cortex.llamacpp"}; + static constexpr std::array supportedEngines_ = { + "cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"}; }; } // namespace commands \ No newline at end of file diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index d58760433..e409ab90a 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -1,7 +1,7 @@ #include "command_line_parser.h" #include "commands/engine_init_cmd.h" -#include "commands/model_pull_cmd.h" #include "commands/model_list_cmd.h" +#include "commands/model_pull_cmd.h" #include "commands/start_model_cmd.h" #include "commands/stop_model_cmd.h" #include "commands/stop_server_cmd.h" @@ -44,7 +44,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { auto list_models_cmd = models_cmd->add_subcommand("list", "List all models locally"); - list_models_cmd->callback([](){ + list_models_cmd->callback([]() { commands::ModelListCmd command; command.Exec(); }); @@ -74,27 +74,15 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { auto embeddings_cmd = app_.add_subcommand( "embeddings", "Creates an embedding vector representing the input text"); - // engines group commands - { + { // engines group commands auto engines_cmd = app_.add_subcommand("engines", "Get cortex engines"); auto list_engines_cmd = engines_cmd->add_subcommand("list", "List all cortex engines"); auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine"); - { // Engine init command - auto init_cmd = engines_cmd->add_subcommand("init", "Initialize engine"); - std::string engine_name; - std::string version = "latest"; - - init_cmd->add_option("-n,--name", engine_name, - "Engine name. E.g: cortex.llamacpp"); - init_cmd->add_option("-v,--version", version, - "Engine version. Default will be latest"); - init_cmd->callback([&engine_name, &version]() { - commands::EngineInitCmd eic(engine_name, version); - eic.Exec(); - }); - } + EngineInstall(engines_cmd, "cortex.llamacpp"); + EngineInstall(engines_cmd, "cortex.onnx"); + EngineInstall(engines_cmd, "cortex.tensorrt-llm"); } auto run_cmd = @@ -110,4 +98,21 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { CLI11_PARSE(app_, argc, argv); return true; +} + +void CommandLineParser::EngineInstall(CLI::App* parent, + const std::string& engine_name) { + auto engine_cmd = + parent->add_subcommand(engine_name, "Manage " + engine_name + " engine"); + + // Default version is latest + std::string version{"latest"}; + auto install_cmd = engine_cmd->add_subcommand( + "install", "Install " + engine_name + " engine"); + install_cmd->add_option("-v, --version", version, + "Engine version. Default will be latest"); + install_cmd->callback([&engine_name, &version] { + commands::EngineInitCmd eic(engine_name, version); + eic.Exec(); + }); } \ No newline at end of file diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h index 3324d45e0..e48ed31b0 100644 --- a/engine/controllers/command_line_parser.h +++ b/engine/controllers/command_line_parser.h @@ -9,5 +9,7 @@ class CommandLineParser { bool SetupCommand(int argc, char** argv); private: + void EngineInstall(CLI::App* parent, const std::string& engine_name); + CLI::App app_; }; \ No newline at end of file diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc new file mode 100644 index 000000000..12bea809d --- /dev/null +++ b/engine/controllers/engines.cc @@ -0,0 +1,111 @@ +#include "engines.h" +#include "utils/archive_utils.h" +#include "utils/file_manager_utils.h" +#include "utils/system_info_utils.h" + +void Engines::InitEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const { + LOG_DEBUG << "InitEngine, Engine: " << engine; + if (engine.empty()) { + Json::Value res; + res["message"] = "Engine name is required"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k409Conflict); + callback(resp); + LOG_WARN << "No engine field in path param"; + return; + } + + auto system_info = system_info_utils::GetSystemInfo(); + if (system_info.arch == system_info_utils::kUnsupported || + system_info.os == system_info_utils::kUnsupported) { + Json::Value res; + res["message"] = "Unsupported OS or architecture"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k409Conflict); + callback(resp); + LOG_ERROR << "Unsupported OS or architecture: " << system_info.os << ", " + << system_info.arch; + return; + } + + auto version{"latest"}; + constexpr auto gitHubHost = "https://api.github.com"; + + std::ostringstream engineReleasePath; + engineReleasePath << "/repos/janhq/" << engine << "/releases/" << version; + + httplib::Client cli(gitHubHost); + using namespace nlohmann; + if (auto res = cli.Get(engineReleasePath.str())) { + if (res->status == httplib::StatusCode::OK_200) { + try { + auto jsonResponse = json::parse(res->body); + auto assets = jsonResponse["assets"]; + + auto os_arch{system_info.os + "-" + system_info.arch}; + for (auto& asset : assets) { + auto assetName = asset["name"].get(); + if (assetName.find(os_arch) != std::string::npos) { + std::string host{"https://github.com"}; + + auto full_url = asset["browser_download_url"].get(); + std::string path = full_url.substr(host.length()); + + auto fileName = asset["name"].get(); + LOG_INFO << "URL: " << full_url; + + auto downloadTask = DownloadTask{.id = engine, + .type = DownloadType::Engine, + .error = std::nullopt, + .items = {DownloadItem{ + .id = engine, + .host = host, + .fileName = fileName, + .type = DownloadType::Engine, + .path = path, + }}}; + + DownloadService().AddAsyncDownloadTask( + downloadTask, [](const std::string& absolute_path) { + // try to unzip the downloaded file + std::filesystem::path downloadedEnginePath{absolute_path}; + LOG_INFO << "Downloaded engine path: " + << downloadedEnginePath.string(); + + archive_utils::ExtractArchive( + downloadedEnginePath.string(), + downloadedEnginePath.parent_path() + .parent_path() + .string()); + + // remove the downloaded file + std::filesystem::remove(absolute_path); + LOG_INFO << "Finished!"; + }); + + Json::Value res; + res["message"] = "Engine download started"; + res["result"] = "OK"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k200OK); + callback(resp); + return; + } + } + Json::Value res; + res["message"] = "Engine not found"; + res["result"] = "Error"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k404NotFound); + callback(resp); + } catch (const json::parse_error& e) { + std::cerr << "JSON parse error: " << e.what() << std::endl; + } + } + } else { + auto err = res.error(); + LOG_ERROR << "HTTP error: " << httplib::to_string(err); + } +} \ No newline at end of file diff --git a/engine/controllers/engines.h b/engine/controllers/engines.h new file mode 100644 index 000000000..282e79402 --- /dev/null +++ b/engine/controllers/engines.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include +#include "services/download_service.h" +#include "utils/cortex_utils.h" +#include "utils/cortexso_parser.h" +#include "utils/http_util.h" + +using namespace drogon; + +class Engines : public drogon::HttpController { + public: + METHOD_LIST_BEGIN + METHOD_ADD(Engines::InitEngine, "/{1}/init", Post); + METHOD_LIST_END + + void InitEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const; +}; diff --git a/engine/main.cc b/engine/main.cc index 27591d48a..e8701dd7b 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -1,12 +1,11 @@ #include #include #include // for PATH_MAX -#include #include "controllers/command_line_parser.h" #include "cortex-common/cortexpythoni.h" +#include "utils/archive_utils.h" #include "utils/cortex_utils.h" #include "utils/dylib.h" -#include "utils/archive_utils.h" #if defined(__APPLE__) && defined(__MACH__) #include // for dirname() @@ -98,4 +97,4 @@ int main(int argc, char* argv[]) { drogon::app().run(); return 0; -} \ No newline at end of file +} diff --git a/engine/utils/command_executor.h b/engine/utils/command_executor.h new file mode 100644 index 000000000..9ba13025a --- /dev/null +++ b/engine/utils/command_executor.h @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#define POPEN _popen +#define PCLOSE _pclose +#else +#define POPEN popen +#define PCLOSE pclose +#endif + +class CommandExecutor { + public: + CommandExecutor(const std::string& command) { + FILE* pipe = POPEN(command.c_str(), "r"); + if (!pipe) { + throw std::runtime_error("popen() failed!"); + } + m_pipe = std::unique_ptr(pipe, PCLOSE); + } + + CommandExecutor(const CommandExecutor&) = delete; + CommandExecutor& operator=(const CommandExecutor&) = delete; + CommandExecutor(CommandExecutor&&) = default; + CommandExecutor& operator=(CommandExecutor&&) = default; + ~CommandExecutor() = default; + + std::string execute() { + if (!m_pipe) { + throw std::runtime_error("Command not initialized!"); + } + + std::array buffer; + std::string result; + + while (fgets(buffer.data(), buffer.size(), m_pipe.get()) != nullptr) { + result += buffer.data(); + } + + return result; + } + + private: + std::unique_ptr m_pipe{nullptr, PCLOSE}; +}; \ No newline at end of file diff --git a/engine/utils/engine_matcher_utils.h b/engine/utils/engine_matcher_utils.h new file mode 100644 index 000000000..23c93c1a6 --- /dev/null +++ b/engine/utils/engine_matcher_utils.h @@ -0,0 +1,180 @@ +#include +#include +#include +#include +#include +#include +#include "utils/cpuid/cpu_info.h" + +namespace engine_matcher_utils { +// for testing purpose +const std::vector cortex_llamacpp_variants{ + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-vulkan.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-mac-amd64.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-mac-arm64.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-vulkan.tar.gz", +}; +const std::vector cortex_onnx_variants{ + "cortex.onnx-0.1.7-windows-amd64.tar.gz"}; + +const std::vector cortex_tensorrt_variants{ + "cortex.tensorrt-llm-0.0.9-linux-cuda-12-4.tar.gz", + "cortex.tensorrt-llm-0.0.9-windows-cuda-12-4.tar.gz"}; + +inline std::string GetSuitableAvxVariant() { + cortex::cpuid::CpuInfo cpu_info; + + LOG_INFO << "GetSuitableAvxVariant:" << "\n" << cpu_info.to_string(); + + if (cpu_info.has_avx512_f()) + return "avx512"; + if (cpu_info.has_avx2()) + return "avx2"; + if (cpu_info.has_avx()) + return "avx"; + return "noavx"; +} + +inline std::string GetSuitableCudaVariant( + const std::vector& variants, const std::string& cuda_version) { + std::regex cuda_reg("cuda-(\\d+)-(\\d+)"); + std::smatch match; + + int requestedMajor = 0; + int requestedMinor = 0; + + if (!cuda_version.empty()) { + // Split the provided CUDA version into major and minor parts + sscanf(cuda_version.c_str(), "%d.%d", &requestedMajor, &requestedMinor); + } + + std::string selectedVariant; + int bestMatchMajor = -1; + int bestMatchMinor = -1; + + for (const auto& variant : variants) { + if (std::regex_search(variant, match, cuda_reg)) { + // Found a CUDA version in the variant + int variantMajor = std::stoi(match[1]); + int variantMinor = std::stoi(match[2]); + + if (requestedMajor == variantMajor) { + // If the major versions match, prefer the closest minor version + if (requestedMinor >= variantMinor && + (variantMajor > bestMatchMajor || + (variantMajor == bestMatchMajor && + variantMinor > bestMatchMinor))) { + selectedVariant = variant; + bestMatchMajor = variantMajor; + bestMatchMinor = variantMinor; + } + } + } else if (cuda_version.empty() && selectedVariant.empty()) { + // If no CUDA version is provided, select the variant without any CUDA in the name + selectedVariant = variant; + } + } + + return selectedVariant; +} + +inline std::string ValidateTensorrtLlm(const std::vector& variants, + const std::string& os, + const std::string& cuda_version) { + std::vector os_compatible_list; + std::copy_if(variants.begin(), variants.end(), + std::back_inserter(os_compatible_list), + [&os](const std::string& variant) { + auto os_match = "-" + os; + return variant.find(os_match) != std::string::npos; + }); + auto cuda_compatible = + GetSuitableCudaVariant(os_compatible_list, cuda_version); + return cuda_compatible; +} + +inline std::string ValidateOnnx(const std::vector& variants, + const std::string& os, + const std::string& cpu_arch) { + + std::vector os_and_arch_compatible_list; + std::copy_if(variants.begin(), variants.end(), + std::back_inserter(os_and_arch_compatible_list), + [&os, &cpu_arch](const std::string& variant) { + auto os_match = "-" + os; + auto cpu_arch_match = "-" + cpu_arch; + + return variant.find(os_match) != std::string::npos && + variant.find(cpu_arch_match) != std::string::npos; + }); + if (!os_and_arch_compatible_list.empty()) + return os_and_arch_compatible_list[0]; + return ""; +} + +inline std::string Validate(const std::vector& variants, + const std::string& os, const std::string& cpu_arch, + const std::string& suitable_avx, + const std::string& cuda_version) { + + // Early return if the OS is unsupported + if (os != "mac" && os != "windows" && os != "linux") { + // TODO: throw is better + return ""; + } + + std::vector os_and_arch_compatible_list; + std::copy_if(variants.begin(), variants.end(), + std::back_inserter(os_and_arch_compatible_list), + [&os, &cpu_arch](const std::string& variant) { + auto os_match = "-" + os; + auto cpu_arch_match = "-" + cpu_arch; + + return variant.find(os_match) != std::string::npos && + variant.find(cpu_arch_match) != std::string::npos; + }); + + if (os == "mac" && !os_and_arch_compatible_list.empty()) + return os_and_arch_compatible_list[0]; + + std::vector avx_compatible_list; + + std::copy_if(os_and_arch_compatible_list.begin(), + os_and_arch_compatible_list.end(), + std::back_inserter(avx_compatible_list), + [&suitable_avx](const std::string& variant) { + auto suitable_avx_match = "-" + suitable_avx; + + return variant.find(suitable_avx_match) != std::string::npos; + }); + + auto cuda_compatible = + GetSuitableCudaVariant(avx_compatible_list, cuda_version); + + return cuda_compatible; +} +} // namespace engine_matcher_utils \ No newline at end of file diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h index 184428751..d13935295 100644 --- a/engine/utils/system_info_utils.h +++ b/engine/utils/system_info_utils.h @@ -1,22 +1,64 @@ #pragma once -#include +#include +#include +#include +#include "utils/command_executor.h" +#ifdef _WIN32 +#include +#endif namespace system_info_utils { + +constexpr static auto kUnsupported{"Unsupported"}; +constexpr static auto kCudaVersionRegex{R"(CUDA Version:\s*([\d\.]+))"}; +constexpr static auto kGpuQueryCommand{ + "nvidia-smi --query-gpu=index,memory.total,name,compute_cap " + "--format=csv,noheader,nounits"}; +constexpr static auto kGpuInfoRegex{ + R"((\d+),\s*(\d+),\s*([^,]+),\s*([\d\.]+))"}; + struct SystemInfo { std::string os; std::string arch; }; -constexpr static auto kUnsupported{"Unsupported"}; +/** + * @brief Get the Gpu Arch. Currently we only support Ampere and Ada. + * Might need to come up with better way to detect the GPU architecture. + * + * @param gpuName E.g. NVIDIA GeForce RTX 4090 + * @return corresponding GPU arch. E.g. ampere, ada. + */ +inline std::string GetGpuArch(const std::string& gpuName) { + std::string lowerGpuName = gpuName; + std::transform(lowerGpuName.begin(), lowerGpuName.end(), lowerGpuName.begin(), + ::tolower); + + if (lowerGpuName.find("nvidia") == std::string::npos) { + return "unknown"; + } + + if (gpuName.find("30") != std::string::npos) { + return "ampere"; + } else if (gpuName.find("40") != std::string::npos) { + return "ada"; + } else { + return "unknown"; + } +} inline SystemInfo GetSystemInfo() { std::ostringstream arch; std::ostringstream os; -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__amd64__) || \ + defined(__amd64) || defined(__x86_64) || defined(_M_AMD64) arch << "amd64"; -#elif defined(__arm__) || defined(__arm64__) || defined(__aarch64__) +#elif defined(__arm__) || defined(__arm) || defined(__arm64__) || \ + defined(__aarch64__) || defined(__thumb__) || \ + defined(__TARGET_ARCH_ARM) || defined(__TARGET_ARCH_THUMB) || \ + defined(_ARM) || defined(_M_ARM) || defined(_M_ARMT) arch << "arm64"; #else arch << kUnsupported; @@ -33,4 +75,225 @@ inline SystemInfo GetSystemInfo() { #endif return SystemInfo{os.str(), arch.str()}; } -} // namespace system_info_utils \ No newline at end of file + +constexpr auto vulkan_sample_output = R"( +========== +VULKANINFO +========== + +Vulkan Instance Version: 1.3.280 + + +Instance Extensions: count = 19 +------------------------------- +VK_EXT_debug_report : extension revision 10 +VK_EXT_debug_utils : extension revision 2 +VK_EXT_direct_mode_display : extension revision 1 +VK_EXT_surface_maintenance1 : extension revision 1 +VK_EXT_swapchain_colorspace : extension revision 4 +VK_KHR_device_group_creation : extension revision 1 +VK_KHR_display : extension revision 23 +VK_KHR_external_fence_capabilities : extension revision 1 +VK_KHR_external_memory_capabilities : extension revision 1 +VK_KHR_external_semaphore_capabilities : extension revision 1 +VK_KHR_get_display_properties2 : extension revision 1 +VK_KHR_get_physical_device_properties2 : extension revision 2 +VK_KHR_get_surface_capabilities2 : extension revision 1 +VK_KHR_portability_enumeration : extension revision 1 +VK_KHR_surface : extension revision 25 +VK_KHR_surface_protected_capabilities : extension revision 1 +VK_KHR_win32_surface : extension revision 6 +VK_LUNARG_direct_driver_loading : extension revision 1 +VK_NV_external_memory_capabilities : extension revision 1 + +Instance Layers: count = 1 +-------------------------- +VK_LAYER_NV_optimus NVIDIA Optimus layer 1.3.280 version 1 + +Devices: +======== +GPU0: + apiVersion = 1.3.280 + driverVersion = 560.70.0.0 + vendorID = 0x10de + deviceID = 0x2684 + deviceType = PHYSICAL_DEVICE_TYPE_DISCRETE_GPU + deviceName = NVIDIA GeForce RTX 4090 + driverID = DRIVER_ID_NVIDIA_PROPRIETARY + driverName = NVIDIA + driverInfo = 560.70 + conformanceVersion = 1.3.8.2 + deviceUUID = 11deafdf-9f15-e857-2a87-8acc153fc9f7 + driverUUID = 10f251d9-d3c0-5001-bf67-24bb06423040 +)"; + +constexpr auto gpu_query_list_sample_output = R"( +0, 46068, NVIDIA RTX A6000, 8.6 +1, 46068, NVIDIA RTX A6000, 8.6 +)"; + +constexpr auto nvidia_smi_sample_output = R"( +Sun Aug 25 22:29:25 2024 ++-----------------------------------------------------------------------------------------+ +| NVIDIA-SMI 560.70 Driver Version: 560.70 CUDA Version: 12.6 | +|-----------------------------------------+------------------------+----------------------+ +| GPU Name Driver-Model | Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | +| | | MIG M. | +|=========================================+========================+======================| +| 0 NVIDIA GeForce RTX 4090 WDDM | 00000000:01:00.0 Off | Off | +| 0% 24C P8 10W / 500W | 395MiB / 24564MiB | 19% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ + ++-----------------------------------------------------------------------------------------+ +| Processes: | +| GPU GI CI PID Type Process name GPU Memory | +| ID ID Usage | +|=========================================================================================| +| 0 N/A N/A 3984 C+G ...5n1h2txyewy\ShellExperienceHost.exe N/A | +| 0 N/A N/A 7904 C+G ...ekyb3d8bbwe\PhoneExperienceHost.exe N/A | +| 0 N/A N/A 8240 C+G ...__8wekyb3d8bbwe\WindowsTerminal.exe N/A | +| 0 N/A N/A 8904 C+G C:\Windows\explorer.exe N/A | +| 0 N/A N/A 9304 C+G ...siveControlPanel\SystemSettings.exe N/A | +| 0 N/A N/A 9944 C+G ...nt.CBS_cw5n1h2txyewy\SearchHost.exe N/A | +| 0 N/A N/A 11140 C+G ...2txyewy\StartMenuExperienceHost.exe N/A | ++-----------------------------------------------------------------------------------------+ +)"; + +inline bool IsNvidiaSmiAvailable() { +#ifdef _WIN32 + // Check if nvidia-smi.exe exists in the PATH on Windows + char buffer[MAX_PATH]; + if (SearchPath(NULL, "nvidia-smi.exe", NULL, MAX_PATH, buffer, NULL) != 0) { + return true; + } else { + return false; + } +#else + // Check if nvidia-smi is available on Unix-like systems + int result = std::system("which nvidia-smi > /dev/null 2>&1"); + return result == 0; +#endif +} + +inline std::string GetCudaVersion() { + if (!IsNvidiaSmiAvailable()) { + LOG_INFO << "nvidia-smi is not available!"; + return ""; + } + try { + CommandExecutor cmd("nvidia-smi"); + auto output = cmd.execute(); + + const std::regex cuda_version_reg(kCudaVersionRegex); + std::smatch match; + + if (std::regex_search(output, match, cuda_version_reg)) { + LOG_INFO << "CUDA Version: " << match[1].str(); + return match[1].str(); + } else { + LOG_ERROR << "CUDA Version not found!"; + return ""; + } + } catch (const std::exception& e) { + LOG_ERROR << "Error: " << e.what(); + return ""; + } +} + +struct GpuInfo { + std::string id; + std::string vram; + std::string name; + std::string arch; + std::optional compute_cap; +}; + +inline std::vector GetGpuInfoListVulkan() { + std::vector gpuInfoList; + + try { + // NOTE: current ly we don't have logic to download vulkaninfoSDK +#ifdef _WIN32 + CommandExecutor cmd("vulkaninfoSDK.exe --summary"); +#else + CommandExecutor cmd("vulkaninfoSDK --summary"); +#endif + auto output = cmd.execute(); + + // Regular expression patterns to match each field + std::regex gpu_block_reg(R"(GPU(\d+):)"); + std::regex field_pattern(R"(\s*(\w+)\s*=\s*(.*))"); + + std::sregex_iterator iter(output.begin(), output.end(), gpu_block_reg); + std::sregex_iterator end; + + while (iter != end) { + GpuInfo gpuInfo; + + // Extract GPU ID from the GPU block pattern (e.g., GPU0 -> id = "0") + gpuInfo.id = (*iter)[1].str(); + + auto gpu_start_pos = iter->position(0) + iter->length(0); + auto gpu_end_pos = std::next(iter) != end ? std::next(iter)->position(0) + : std::string::npos; + std::string gpu_block = + output.substr(gpu_start_pos, gpu_end_pos - gpu_start_pos); + + std::sregex_iterator field_iter(gpu_block.begin(), gpu_block.end(), + field_pattern); + + while (field_iter != end) { + std::string key = (*field_iter)[1].str(); + std::string value = (*field_iter)[2].str(); + + if (key == "deviceName") + gpuInfo.name = value; + else if (key == "apiVersion") + gpuInfo.compute_cap = value; + + gpuInfo.vram = ""; // not available + gpuInfo.arch = GetGpuArch(gpuInfo.name); + + ++field_iter; + } + + gpuInfoList.push_back(gpuInfo); + ++iter; + } + } catch (const std::exception& e) {} + + return gpuInfoList; +} + +inline std::vector GetGpuInfoList() { + std::vector gpuInfoList; + + try { + CommandExecutor cmd(kGpuQueryCommand); + auto output = cmd.execute(); + + const std::regex gpu_info_reg(kGpuInfoRegex); + std::smatch match; + std::string::const_iterator search_start(output.cbegin()); + + while ( + std::regex_search(search_start, output.cend(), match, gpu_info_reg)) { + GpuInfo gpuInfo = { + match[1].str(), // id + match[2].str(), // vram + match[3].str(), // name + GetGpuArch(match[3].str()), // arch + match[4].str() // compute_cap + }; + gpuInfoList.push_back(gpuInfo); + search_start = match.suffix().first; + } + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + } + + return gpuInfoList; +} +} // namespace system_info_utils