diff --git a/engine/commands/chat_cmd.cc b/engine/commands/chat_cmd.cc index 185dd60fe..2c00053c9 100644 --- a/engine/commands/chat_cmd.cc +++ b/engine/commands/chat_cmd.cc @@ -57,7 +57,7 @@ void ChatCmd::Exec(std::string msg) { } } // Some instruction for user here - std::cout << "Inorder to exit, type exit()" << std::endl; + std::cout << "Inorder to exit, type `exit()`" << std::endl; // Model is loaded, start to chat { while (true) { diff --git a/engine/commands/cmd_info.cc b/engine/commands/cmd_info.cc new file mode 100644 index 000000000..3d419906d --- /dev/null +++ b/engine/commands/cmd_info.cc @@ -0,0 +1,54 @@ +#include "cmd_info.h" +#include +#include "trantor/utils/Logger.h" + +namespace commands { +namespace { +constexpr const char* kDelimiter = ":"; + +std::vector split(std::string& s, const std::string& delimiter) { + std::vector tokens; + size_t pos = 0; + std::string token; + while ((pos = s.find(delimiter)) != std::string::npos) { + token = s.substr(0, pos); + tokens.push_back(token); + s.erase(0, pos + delimiter.length()); + } + tokens.push_back(s); + + return tokens; +} +} // namespace + +CmdInfo::CmdInfo(std::string model_id) { + Parse(std::move(model_id)); +} + +void CmdInfo::Parse(std::string model_id) { + if (model_id.find(kDelimiter) == std::string::npos) { + engine = "cortex.llamacpp"; + name = std::move(model_id); + branch = "main"; + } else { + auto res = split(model_id, kDelimiter); + if (res.size() != 2) { + LOG_ERROR << "model_id does not valid"; + return; + } else { + name = std::move(res[0]); + branch = std::move(res[1]); + if (branch.find("onnx") != std::string::npos) { + engine = "cortex.onnx"; + } else if (branch.find("tensorrt") != std::string::npos) { + engine = "cortex.tensorrt-llm"; + } else if (branch.find("gguf") != std::string::npos) { + engine = "cortex.llamacpp"; + } else { + LOG_ERROR << "Not a valid branch name " << branch; + } + } + } +} + +} // namespace commands \ No newline at end of file diff --git a/engine/commands/cmd_info.h b/engine/commands/cmd_info.h new file mode 100644 index 000000000..d952c3085 --- /dev/null +++ b/engine/commands/cmd_info.h @@ -0,0 +1,14 @@ +#pragma once +#include +namespace commands { +struct CmdInfo { + explicit CmdInfo(std::string model_id); + + std::string engine; + std::string name; + std::string branch; + + private: + void Parse(std::string model_id); +}; +} // namespace commands \ No newline at end of file diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc index b4f8fe064..863f29ce5 100644 --- a/engine/commands/engine_init_cmd.cc +++ b/engine/commands/engine_init_cmd.cc @@ -14,10 +14,10 @@ namespace commands { EngineInitCmd::EngineInitCmd(std::string engineName, std::string version) : engineName_(std::move(engineName)), version_(std::move(version)) {} -void EngineInitCmd::Exec() const { +bool EngineInitCmd::Exec() const { if (engineName_.empty()) { LOG_ERROR << "Engine name is required"; - return; + return false; } // Check if the architecture and OS are supported @@ -26,7 +26,7 @@ void EngineInitCmd::Exec() const { system_info.os == system_info_utils::kUnsupported) { LOG_ERROR << "Unsupported OS or architecture: " << system_info.os << ", " << system_info.arch; - return; + return false; } LOG_INFO << "OS: " << system_info.os << ", Arch: " << system_info.arch; @@ -34,7 +34,7 @@ void EngineInitCmd::Exec() const { if (std::find(supportedEngines_.begin(), supportedEngines_.end(), engineName_) == supportedEngines_.end()) { LOG_ERROR << "Engine not supported"; - return; + return false; } constexpr auto gitHubHost = "https://api.github.com"; @@ -78,7 +78,7 @@ void EngineInitCmd::Exec() const { LOG_INFO << "Matched variant: " << matched_variant; if (matched_variant.empty()) { LOG_ERROR << "No variant found for " << os_arch; - return; + return false; } for (auto& asset : assets) { @@ -103,36 +103,45 @@ void EngineInitCmd::Exec() const { .path = path, }}}; - DownloadService().AddDownloadTask( - downloadTask, [](const std::string& absolute_path) { - // try to unzip the downloaded file - std::filesystem::path downloadedEnginePath{absolute_path}; - LOG_INFO << "Downloaded engine path: " - << downloadedEnginePath.string(); - - archive_utils::ExtractArchive( - downloadedEnginePath.string(), - downloadedEnginePath.parent_path() - .parent_path() - .string()); - - // remove the downloaded file - std::filesystem::remove(absolute_path); - LOG_INFO << "Finished!"; - }); - - return; + DownloadService().AddDownloadTask(downloadTask, [](const std::string& + absolute_path, + bool unused) { + // try to unzip the downloaded file + std::filesystem::path downloadedEnginePath{absolute_path}; + LOG_INFO << "Downloaded engine path: " + << downloadedEnginePath.string(); + + archive_utils::ExtractArchive( + downloadedEnginePath.string(), + downloadedEnginePath.parent_path().parent_path().string()); + + // remove the downloaded file + // TODO(any) Could not delete file on Windows because it is currently hold by httplib(?) + // Not sure about other platforms + try { + std::filesystem::remove(absolute_path); + } catch (const std::exception& e) { + LOG_ERROR << "Could not delete file: " << e.what(); + } + LOG_INFO << "Finished!"; + }); + + return false; } } } catch (const json::parse_error& e) { std::cerr << "JSON parse error: " << e.what() << std::endl; + return false; } } else { LOG_ERROR << "HTTP error: " << res->status; + return false; } } else { auto err = res.error(); LOG_ERROR << "HTTP error: " << httplib::to_string(err); + return false; } + return true; } }; // namespace commands diff --git a/engine/commands/engine_init_cmd.h b/engine/commands/engine_init_cmd.h index dc75d5cf6..8de74034e 100644 --- a/engine/commands/engine_init_cmd.h +++ b/engine/commands/engine_init_cmd.h @@ -9,7 +9,7 @@ class EngineInitCmd { public: EngineInitCmd(std::string engineName, std::string version); - void Exec() const; + bool Exec() const; private: std::string engineName_; diff --git a/engine/commands/model_pull_cmd.cc b/engine/commands/model_pull_cmd.cc index 9dcd8c4ef..f8e3a7947 100644 --- a/engine/commands/model_pull_cmd.cc +++ b/engine/commands/model_pull_cmd.cc @@ -6,18 +6,20 @@ #include "utils/model_callback_utils.h" namespace commands { -ModelPullCmd::ModelPullCmd(std::string modelHandle) - : modelHandle_(std::move(modelHandle)) {} +ModelPullCmd::ModelPullCmd(std::string model_handle, std::string branch) + : model_handle_(std::move(model_handle)), branch_(std::move(branch)) {} -void ModelPullCmd::Exec() { - auto downloadTask = cortexso_parser::getDownloadTask(modelHandle_); +bool ModelPullCmd::Exec() { + auto downloadTask = cortexso_parser::getDownloadTask(model_handle_, branch_); if (downloadTask.has_value()) { DownloadService downloadService; downloadService.AddDownloadTask(downloadTask.value(), model_callback_utils::DownloadModelCb); std::cout << "Download finished" << std::endl; + return true; } else { std::cout << "Model not found" << std::endl; + return false; } } diff --git a/engine/commands/model_pull_cmd.h b/engine/commands/model_pull_cmd.h index 2c5f658f2..da5713bdf 100644 --- a/engine/commands/model_pull_cmd.h +++ b/engine/commands/model_pull_cmd.h @@ -6,10 +6,11 @@ namespace commands { class ModelPullCmd { public: - ModelPullCmd(std::string modelHandle); - void Exec(); +explicit ModelPullCmd(std::string model_handle, std::string branch); + bool Exec(); private: - std::string modelHandle_; + std::string model_handle_; + std::string branch_; }; } // namespace commands \ No newline at end of file diff --git a/engine/commands/start_model_cmd.cc b/engine/commands/model_start_cmd.cc similarity index 84% rename from engine/commands/start_model_cmd.cc rename to engine/commands/model_start_cmd.cc index 341ba2f9d..0342c3d35 100644 --- a/engine/commands/start_model_cmd.cc +++ b/engine/commands/model_start_cmd.cc @@ -1,14 +1,14 @@ -#include "start_model_cmd.h" +#include "model_start_cmd.h" #include "httplib.h" #include "nlohmann/json.hpp" #include "trantor/utils/Logger.h" namespace commands { -StartModelCmd::StartModelCmd(std::string host, int port, +ModelStartCmd::ModelStartCmd(std::string host, int port, const config::ModelConfig& mc) : host_(std::move(host)), port_(port), mc_(mc) {} -void StartModelCmd::Exec() { +bool ModelStartCmd::Exec() { httplib::Client cli(host_ + ":" + std::to_string(port_)); nlohmann::json json_data; if (mc_.files.size() > 0) { @@ -16,7 +16,7 @@ void StartModelCmd::Exec() { json_data["model_path"] = mc_.files[0]; } else { LOG_WARN << "model_path is empty"; - return; + return false; } json_data["model"] = mc_.name; json_data["system_prompt"] = mc_.system_template; @@ -27,7 +27,7 @@ void StartModelCmd::Exec() { json_data["engine"] = mc_.engine; auto data_str = json_data.dump(); - + cli.set_read_timeout(std::chrono::seconds(60)); auto res = cli.Post("/inferences/server/loadmodel", httplib::Headers(), data_str.data(), data_str.size(), "application/json"); if (res) { @@ -37,7 +37,9 @@ void StartModelCmd::Exec() { } else { auto err = res.error(); LOG_WARN << "HTTP error: " << httplib::to_string(err); + return false; } + return true; } }; // namespace commands \ No newline at end of file diff --git a/engine/commands/start_model_cmd.h b/engine/commands/model_start_cmd.h similarity index 64% rename from engine/commands/start_model_cmd.h rename to engine/commands/model_start_cmd.h index 27cfc59e6..809f71c83 100644 --- a/engine/commands/start_model_cmd.h +++ b/engine/commands/model_start_cmd.h @@ -5,10 +5,10 @@ namespace commands { -class StartModelCmd{ +class ModelStartCmd{ public: - StartModelCmd(std::string host, int port, const config::ModelConfig& mc); - void Exec(); + explicit ModelStartCmd(std::string host, int port, const config::ModelConfig& mc); + bool Exec(); private: std::string host_; diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc new file mode 100644 index 000000000..8ec608eb8 --- /dev/null +++ b/engine/commands/run_cmd.cc @@ -0,0 +1,97 @@ +#include "run_cmd.h" +#include "chat_cmd.h" +#include "cmd_info.h" +#include "config/yaml_config.h" +#include "engine_init_cmd.h" +#include "httplib.h" +#include "model_pull_cmd.h" +#include "model_start_cmd.h" +#include "trantor/utils/Logger.h" +#include "utils/cortex_utils.h" + +namespace commands { + +RunCmd::RunCmd(std::string host, int port, std::string model_id) + : host_(std::move(host)), port_(port), model_id_(std::move(model_id)) {} + +void RunCmd::Exec() { + auto address = host_ + ":" + std::to_string(port_); + CmdInfo ci(model_id_); + std::string model_file = + ci.branch == "main" ? ci.name : ci.name + "-" + ci.branch; + // TODO should we clean all resource if something fails? + // Check if model existed. If not, download it + { + if (!IsModelExisted(model_file)) { + ModelPullCmd model_pull_cmd(ci.name, ci.branch); + if (!model_pull_cmd.Exec()) { + return; + } + } + } + + // Check if engine existed. If not, download it + { + if (!IsEngineExisted(ci.engine)) { + EngineInitCmd eic(ci.engine, ""); + if (!eic.Exec()) + return; + } + } + + // Start model + config::YamlHandler yaml_handler; + yaml_handler.ModelConfigFromFile(cortex_utils::GetCurrentPath() + "/models/" + + model_file + ".yaml"); + { + ModelStartCmd msc(host_, port_, yaml_handler.GetModelConfig()); + if (!msc.Exec()) { + return; + } + } + + // Chat + { + ChatCmd cc(host_, port_, yaml_handler.GetModelConfig()); + cc.Exec(""); + } +} + +bool RunCmd::IsModelExisted(const std::string& model_id) { + if (std::filesystem::exists(cortex_utils::GetCurrentPath() + "/" + + cortex_utils::models_folder) && + std::filesystem::is_directory(cortex_utils::GetCurrentPath() + "/" + + cortex_utils::models_folder)) { + // Iterate through directory + for (const auto& entry : std::filesystem::directory_iterator( + cortex_utils::GetCurrentPath() + "/" + + cortex_utils::models_folder)) { + if (entry.is_regular_file() && entry.path().extension() == ".yaml") { + try { + config::YamlHandler handler; + handler.ModelConfigFromFile(entry.path().string()); + std::cout << entry.path().stem().string() << std::endl; + if (entry.path().stem().string() == model_id) { + return true; + } + } catch (const std::exception& e) { + LOG_ERROR << "Error reading yaml file '" << entry.path().string() + << "': " << e.what(); + } + } + } + } + return false; +} + +bool RunCmd::IsEngineExisted(const std::string& e) { + if (std::filesystem::exists(cortex_utils::GetCurrentPath() + "/" + + "engines") && + std::filesystem::exists(cortex_utils::GetCurrentPath() + "/" + + "engines/" + e)) { + return true; + } + return false; +} + +}; // namespace commands \ No newline at end of file diff --git a/engine/commands/run_cmd.h b/engine/commands/run_cmd.h new file mode 100644 index 000000000..ca44b9d24 --- /dev/null +++ b/engine/commands/run_cmd.h @@ -0,0 +1,22 @@ +#pragma once +#include +#include +#include "config/model_config.h" +#include "nlohmann/json.hpp" + +namespace commands { +class RunCmd { + public: + explicit RunCmd(std::string host, int port, std::string model_id); + void Exec(); + + private: + bool IsModelExisted(const std::string& model_id); + bool IsEngineExisted(const std::string& e); + + private: + std::string host_; + int port_; + std::string model_id_; +}; +} // namespace commands \ No newline at end of file diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index 42a5f8731..88d79f4e7 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -1,13 +1,14 @@ #include "command_line_parser.h" +#include "commands/chat_cmd.h" #include "commands/engine_init_cmd.h" #include "commands/model_list_cmd.h" #include "commands/model_get_cmd.h" - +#include "commands/cmd_info.h" #include "commands/model_pull_cmd.h" -#include "commands/start_model_cmd.h" +#include "commands/model_start_cmd.h" +#include "commands/run_cmd.h" #include "commands/stop_model_cmd.h" #include "commands/stop_server_cmd.h" -#include "commands/chat_cmd.h" #include "config/yaml_config.h" #include "utils/cortex_utils.h" @@ -15,7 +16,7 @@ CommandLineParser::CommandLineParser() : app_("Cortex.cpp CLI") {} bool CommandLineParser::SetupCommand(int argc, char** argv) { std::string model_id; - + // Models group commands { auto models_cmd = @@ -28,9 +29,9 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { config::YamlHandler yaml_handler; yaml_handler.ModelConfigFromFile(cortex_utils::GetCurrentPath() + "/models/" + model_id + "/model.yml"); - commands::StartModelCmd smc("127.0.0.1", 3928, + commands::ModelStartCmd msc("127.0.0.1", 3928, yaml_handler.GetModelConfig()); - smc.Exec(); + msc.Exec(); }); auto stop_model_cmd = @@ -67,8 +68,10 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { "HuggingFace repositories. For available models, " "please visit https://huggingface.co/cortexso"); model_pull_cmd->add_option("model_id", model_id, ""); + model_pull_cmd->callback([&model_id]() { - commands::ModelPullCmd command(model_id); + commands::CmdInfo ci(model_id); + commands::ModelPullCmd command(ci.name, ci.branch); command.Exec(); }); @@ -82,10 +85,9 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { { auto chat_cmd = app_.add_subcommand("chat", "Send a chat request to a model"); - + chat_cmd->add_option("model_id", model_id, ""); - chat_cmd->add_option("-m,--message", msg, - "Message to chat with model"); + chat_cmd->add_option("-m,--message", msg, "Message to chat with model"); chat_cmd->callback([&model_id, &msg] { // TODO(sang) switch to .yaml when implement model manager @@ -116,8 +118,17 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { EngineInstall(engines_cmd, "cortex.tensorrt-llm", version); } - auto run_cmd = - app_.add_subcommand("run", "Shortcut to start a model and chat"); + { + // cortex run tinyllama:gguf + auto run_cmd = + app_.add_subcommand("run", "Shortcut to start a model and chat"); + std::string model_id; + run_cmd->add_option("model_id", model_id, ""); + run_cmd->callback([&model_id] { + commands::RunCmd rc("127.0.0.1", 3928, model_id); + rc.Exec(); + }); + } auto stop_cmd = app_.add_subcommand("stop", "Stop the API server"); @@ -132,7 +143,8 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { } void CommandLineParser::EngineInstall(CLI::App* parent, - const std::string& engine_name, std::string& version) { + const std::string& engine_name, + std::string& version) { auto engine_cmd = parent->add_subcommand(engine_name, "Manage " + engine_name + " engine"); diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 12bea809d..b10a6b758 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -68,7 +68,7 @@ void Engines::InitEngine(const HttpRequestPtr& req, }}}; DownloadService().AddAsyncDownloadTask( - downloadTask, [](const std::string& absolute_path) { + downloadTask, [](const std::string& absolute_path, bool unused) { // try to unzip the downloaded file std::filesystem::path downloadedEnginePath{absolute_path}; LOG_INFO << "Downloaded engine path: " diff --git a/engine/main.cc b/engine/main.cc index 143cb94e6..754c7c73a 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -20,7 +20,7 @@ #error "Unsupported platform!" #endif -int main(int argc, char* argv[]) { +int main(int argc, char* argv[]) { // Check if this process is for python execution if (argc > 1) { diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc index 4a60a42a8..97c16d650 100644 --- a/engine/services/download_service.cc +++ b/engine/services/download_service.cc @@ -72,8 +72,8 @@ void DownloadService::StartDownloadItem( outputFile.write(data, data_length); return true; }, - [&last, &outputFile, &callback, outputFilePath, this](uint64_t current, - uint64_t total) { + [&item, &last, &outputFile, &callback, outputFilePath, this]( + uint64_t current, uint64_t total) { if (current - last > kUpdateProgressThreshold) { last = current; LOG_INFO << "Downloading: " << current << " / " << total; @@ -83,7 +83,9 @@ void DownloadService::StartDownloadItem( LOG_INFO << "Done download: " << static_cast(total) / 1024 / 1024 << " MiB"; if (callback.has_value()) { - callback.value()(outputFilePath.string()); + auto need_parse_gguf = + item.path.find("cortexso") == std::string::npos; + callback.value()(outputFilePath.string(), need_parse_gguf); } return false; } diff --git a/engine/services/download_service.h b/engine/services/download_service.h index 86aefeb52..4efe653bf 100644 --- a/engine/services/download_service.h +++ b/engine/services/download_service.h @@ -41,7 +41,7 @@ class DownloadService { * * @param task */ - using DownloadItemCb = std::function; + using DownloadItemCb = std::function; void AddDownloadTask(const DownloadTask& task, std::optional callback = std::nullopt); diff --git a/engine/utils/cortexso_parser.h b/engine/utils/cortexso_parser.h index 04f6e7fa5..2d51ec145 100644 --- a/engine/utils/cortexso_parser.h +++ b/engine/utils/cortexso_parser.h @@ -44,8 +44,8 @@ inline std::optional getDownloadTask( downloadItems.push_back(downloadItem); } - DownloadTask downloadTask{}; - downloadTask.id = modelId; + DownloadTask downloadTask{}; + downloadTask.id = branch == "main" ? modelId : modelId + "-" + branch; downloadTask.type = DownloadType::Model; downloadTask.error = std::nullopt; downloadTask.items = downloadItems; diff --git a/engine/utils/model_callback_utils.h b/engine/utils/model_callback_utils.h index 753fdb205..781693fef 100644 --- a/engine/utils/model_callback_utils.h +++ b/engine/utils/model_callback_utils.h @@ -10,7 +10,7 @@ #include "utils/file_manager_utils.h" namespace model_callback_utils { -inline void DownloadModelCb(const std::string& path) { +inline void DownloadModelCb(const std::string& path, bool need_parse_gguf) { std::filesystem::path path_obj(path); std::string filename(path_obj.filename().string()); @@ -29,7 +29,7 @@ inline void DownloadModelCb(const std::string& path) { // currently, only handle downloaded model with only 1 .gguf file // TODO: handle multipart gguf file or different model in 1 repo. else if (path_obj.extension().string().compare(".gguf") == 0) { - + if(!need_parse_gguf) return; config::GGUFHandler gguf_handler; config::YamlHandler yaml_handler; gguf_handler.Parse(path); @@ -40,10 +40,11 @@ inline void DownloadModelCb(const std::string& path) { std::string yml_path(path_obj.parent_path().parent_path().string() + "/" + model_config.id + ".yaml"); std::string yaml_path(path_obj.parent_path().string() + "/model.yml"); - if (!std::filesystem::exists(yml_path)) { // if model.yml doesn't exsited + if (!std::filesystem::exists(yml_path)) { // if model.yml doesn't exsited yaml_handler.WriteYamlFile(yml_path); } - if (!std::filesystem::exists(yaml_path)) {// if .yaml doesn't exsited + if (!std::filesystem::exists( + yaml_path)) { // if .yaml doesn't exsited yaml_handler.WriteYamlFile(yaml_path); } }