diff --git a/cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.cc b/cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.cc index 882bae4d1..ac628109f 100644 --- a/cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.cc +++ b/cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.cc @@ -27,6 +27,7 @@ constexpr const int k200OK = 200; constexpr const int k400BadRequest = 400; constexpr const int k409Conflict = 409; constexpr const int k500InternalServerError = 500; +constexpr const int kFileLoggerOption = 0; // '<', '|', 'im', '_', 'end', '|', '>', '', '<|im_end|>' const std::list> kOpenhermesStopWords = { @@ -111,11 +112,28 @@ void RemoveSpecialTokens(std::vector& v, ModelType model_type) { } } } // namespace +TensorrtllmEngine::TensorrtllmEngine(int log_option) { + trantor::Logger::setLogLevel(trantor::Logger::kError); + if (log_option == kFileLoggerOption) { + std::filesystem::create_directories(log_folder); + asynce_file_logger_ = std::make_unique(); + asynce_file_logger_->setFileName(log_base_name); + asynce_file_logger_->startLogging(); + trantor::Logger::setOutputFunction( + [&](const char* msg, const uint64_t len) { + asynce_file_logger_->output(msg, len); + }, + [&]() { asynce_file_logger_->flush(); }); + asynce_file_logger_->setFileSizeLimit(max_log_file_size); + } +} + TensorrtllmEngine::~TensorrtllmEngine() { model_loaded_ = false; if (res_thread_ && res_thread_->joinable()) { res_thread_->join(); } + asynce_file_logger_.reset(); } void RemoveId(std::vector& vec, int id) { @@ -364,9 +382,51 @@ void TensorrtllmEngine::HandleChatCompletion( return; }; +void TensorrtllmEngine::SetLoggerOption(const Json::Value& json_body) { + if (!json_body["log_option"].isNull()) { + int log_option = json_body["log_option"].asInt(); + if (log_option != kFileLoggerOption) { + // Revert to default trantor logger output function + trantor::Logger::setOutputFunction( + [](const char* msg, const uint64_t len) { + fwrite(msg, 1, static_cast(len), stdout); + }, + []() { fflush(stdout); }); + } + } + logger_ = std::make_shared(); + if (!json_body["log_level"].isNull()) { + std::string log_level = json_body["log_level"].asString(); + if (log_level == "trace") + { + logger_->setLevel(nvinfer1::ILogger::Severity::kINFO); + trantor::Logger::setLogLevel(trantor::Logger::kTrace); + } else if (log_level == "debug") { + trantor::Logger::setLogLevel(trantor::Logger::kDebug); + logger_->setLevel(nvinfer1::ILogger::Severity::kINFO); + } else if (log_level == "info") { + trantor::Logger::setLogLevel(trantor::Logger::kInfo); + logger_->setLevel(nvinfer1::ILogger::Severity::kINFO); + } else if (log_level == "warn") { + trantor::Logger::setLogLevel(trantor::Logger::kWarn); + logger_->setLevel(nvinfer1::ILogger::Severity::kWARNING); + } else if (log_level == "fatal") { + trantor::Logger::setLogLevel(trantor::Logger::kFatal); + logger_->setLevel(nvinfer1::ILogger::Severity::kWARNING); + } else { + trantor::Logger::setLogLevel(trantor::Logger::kError); + logger_->setLevel(nvinfer1::ILogger::Severity::kERROR); + } + } + else{ + logger_->setLevel(nvinfer1::ILogger::Severity::kWARNING); + } +} + void TensorrtllmEngine::LoadModel( std::shared_ptr json_body, std::function&& callback) { + SetLoggerOption(*json_body); model::LoadModelRequest request = model::fromJson(json_body); if (model_loaded_ && model_type_ == GetModelType(request.model_path)) { LOG_INFO << "Model already loaded"; @@ -398,8 +458,6 @@ void TensorrtllmEngine::LoadModel( } model_id_ = GetModelId(*json_body); - logger_ = std::make_shared(); - logger_->setLevel(nvinfer1::ILogger::Severity::kINFO); initTrtLlmPlugins(logger_.get()); std::filesystem::path tokenizer_model_name = model_dir / "tokenizer.model"; diff --git a/cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.h b/cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.h index 71950ab89..6bed4628d 100644 --- a/cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.h +++ b/cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.h @@ -16,6 +16,7 @@ #include "models/chat_completion_request.h" #include "models/load_model_request.h" #include "sentencepiece_processor.h" +#include "tensorrt_llm/common/logger.h" #include "tensorrt_llm/executor/executor.h" #include "tensorrt_llm/plugins/api/tllmPlugin.h" #include "tensorrt_llm/runtime/generationInput.h" @@ -27,6 +28,7 @@ #include "tensorrt_llm/runtime/tllmLogger.h" #include "trantor/utils/ConcurrentTaskQueue.h" #include "trantor/utils/Logger.h" +#include using namespace tensorrt_llm::runtime; @@ -34,6 +36,75 @@ namespace tle = tensorrt_llm::executor; namespace fs = std::filesystem; +namespace tc = tensorrt_llm::common; + +constexpr char log_base_name[] = "logs/cortex"; +constexpr char log_folder[] = "logs"; +constexpr size_t max_log_file_size = 20000000; // ~20mb + +// This class is inspired by https://github.com/NVIDIA/TensorRT-LLM/blob/main/cpp/tensorrt_llm/runtime/tllmLogger.cpp +class TllmFileLogger : public nvinfer1::ILogger { + public: + void log(Severity severity, + nvinfer1::AsciiChar const* msg) noexcept override { + switch (severity) { + case nvinfer1::ILogger::Severity::kINTERNAL_ERROR: + LOG_ERROR << "[TensorRT-LLM][ERROR] " << msg; + break; + case nvinfer1::ILogger::Severity::kERROR: + LOG_ERROR << "[TensorRT-LLM][ERROR] " << msg; + break; + case nvinfer1::ILogger::Severity::kWARNING: + LOG_WARN << "[TensorRT-LLM][WARN] " << msg; + break; + case nvinfer1::ILogger::Severity::kINFO: + LOG_INFO << "[TensorRT-LLM][INFO] " << msg; + break; + case nvinfer1::ILogger::Severity::kVERBOSE: + LOG_DEBUG << "[TensorRT-LLM][DEBUG] " << msg; + break; + default: + LOG_TRACE << "[TensorRT-LLM][TRACE] " << msg; + break; + } + } + Severity getLevel() { + auto* const logger = tc::Logger::getLogger(); + switch (logger->getLevel()) + { + case tc::Logger::Level::ERROR: return nvinfer1::ILogger::Severity::kERROR; + case tc::Logger::Level::WARNING: return nvinfer1::ILogger::Severity::kWARNING; + case tc::Logger::Level::INFO: return nvinfer1::ILogger::Severity::kINFO; + case tc::Logger::Level::DEBUG: + case tc::Logger::Level::TRACE: return nvinfer1::ILogger::Severity::kVERBOSE; + default: return nvinfer1::ILogger::Severity::kINTERNAL_ERROR; + } + }; + + void setLevel(Severity level) { + auto* const logger = tc::Logger::getLogger(); + switch (level) { + case nvinfer1::ILogger::Severity::kINTERNAL_ERROR: + logger->setLevel(tc::Logger::Level::ERROR); + break; + case nvinfer1::ILogger::Severity::kERROR: + logger->setLevel(tc::Logger::Level::ERROR); + break; + case nvinfer1::ILogger::Severity::kWARNING: + logger->setLevel(tc::Logger::Level::WARNING); + break; + case nvinfer1::ILogger::Severity::kINFO: + logger->setLevel(tc::Logger::Level::INFO); + break; + case nvinfer1::ILogger::Severity::kVERBOSE: + logger->setLevel(tc::Logger::Level::TRACE); + break; + default: + TLLM_THROW("Unsupported severity"); + } + }; +}; + struct RuntimeOptions { std::string trtEnginePath; @@ -187,7 +258,7 @@ struct InferenceState { std::string WaitAndPop() { std::unique_lock l(m); - cv.wait(l, [this](){return !texts_to_stream.empty();}); + cv.wait(l, [this]() { return !texts_to_stream.empty(); }); auto s = texts_to_stream.front(); texts_to_stream.pop(); return s; @@ -228,6 +299,7 @@ namespace tensorrtllm { class TensorrtllmEngine : public EngineI { public: + TensorrtllmEngine(int log_option = 0); ~TensorrtllmEngine() final; // ### Interface ### void HandleChatCompletion( @@ -252,7 +324,7 @@ class TensorrtllmEngine : public EngineI { void GetModels( std::shared_ptr json_body, std::function&& callback) final; - + void SetLoggerOption(const Json::Value& json_body); private: bool CheckModelLoaded( std::function& callback); @@ -288,7 +360,7 @@ class TensorrtllmEngine : public EngineI { std::unique_ptr cortex_tokenizer_; RuntimeOptions runtime_opts_; std::unique_ptr executor_; - std::shared_ptr logger_; + std::shared_ptr logger_; std::string user_prompt_; std::string ai_prompt_; std::string system_prompt_; @@ -300,6 +372,7 @@ class TensorrtllmEngine : public EngineI { std::unique_ptr q_; ModelType model_type_ = ModelType::kOpenHermes; int n_parallel_ = 1; + std::unique_ptr asynce_file_logger_; }; } // namespace tensorrtllm