Skip to content

Commit

Permalink
Redirect log (#72)
Browse files Browse the repository at this point in the history
* Redirect log

* new logger option

---------

Co-authored-by: nguyenhoangthuan99 <=>
  • Loading branch information
nguyenhoangthuan99 authored Sep 4, 2024
1 parent c8b3ae2 commit 3064523
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 5 deletions.
62 changes: 60 additions & 2 deletions cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ constexpr const int k200OK = 200;
constexpr const int k400BadRequest = 400;
constexpr const int k409Conflict = 409;
constexpr const int k500InternalServerError = 500;
constexpr const int kFileLoggerOption = 0;

// '<', '|', 'im', '_', 'end', '|', '>', '</s>', '<|im_end|>'
const std::list<std::vector<int32_t>> kOpenhermesStopWords = {
Expand Down Expand Up @@ -111,11 +112,28 @@ void RemoveSpecialTokens(std::vector<int32_t>& v, ModelType model_type) {
}
}
} // namespace
TensorrtllmEngine::TensorrtllmEngine(int log_option) {
trantor::Logger::setLogLevel(trantor::Logger::kError);
if (log_option == kFileLoggerOption) {
std::filesystem::create_directories(log_folder);
asynce_file_logger_ = std::make_unique<trantor::AsyncFileLogger>();
asynce_file_logger_->setFileName(log_base_name);
asynce_file_logger_->startLogging();
trantor::Logger::setOutputFunction(
[&](const char* msg, const uint64_t len) {
asynce_file_logger_->output(msg, len);
},
[&]() { asynce_file_logger_->flush(); });
asynce_file_logger_->setFileSizeLimit(max_log_file_size);
}
}

TensorrtllmEngine::~TensorrtllmEngine() {
model_loaded_ = false;
if (res_thread_ && res_thread_->joinable()) {
res_thread_->join();
}
asynce_file_logger_.reset();
}

void RemoveId(std::vector<int>& vec, int id) {
Expand Down Expand Up @@ -364,9 +382,51 @@ void TensorrtllmEngine::HandleChatCompletion(
return;
};

void TensorrtllmEngine::SetLoggerOption(const Json::Value& json_body) {
if (!json_body["log_option"].isNull()) {
int log_option = json_body["log_option"].asInt();
if (log_option != kFileLoggerOption) {
// Revert to default trantor logger output function
trantor::Logger::setOutputFunction(
[](const char* msg, const uint64_t len) {
fwrite(msg, 1, static_cast<size_t>(len), stdout);
},
[]() { fflush(stdout); });
}
}
logger_ = std::make_shared<TllmFileLogger>();
if (!json_body["log_level"].isNull()) {
std::string log_level = json_body["log_level"].asString();
if (log_level == "trace")
{
logger_->setLevel(nvinfer1::ILogger::Severity::kINFO);
trantor::Logger::setLogLevel(trantor::Logger::kTrace);
} else if (log_level == "debug") {
trantor::Logger::setLogLevel(trantor::Logger::kDebug);
logger_->setLevel(nvinfer1::ILogger::Severity::kINFO);
} else if (log_level == "info") {
trantor::Logger::setLogLevel(trantor::Logger::kInfo);
logger_->setLevel(nvinfer1::ILogger::Severity::kINFO);
} else if (log_level == "warn") {
trantor::Logger::setLogLevel(trantor::Logger::kWarn);
logger_->setLevel(nvinfer1::ILogger::Severity::kWARNING);
} else if (log_level == "fatal") {
trantor::Logger::setLogLevel(trantor::Logger::kFatal);
logger_->setLevel(nvinfer1::ILogger::Severity::kWARNING);
} else {
trantor::Logger::setLogLevel(trantor::Logger::kError);
logger_->setLevel(nvinfer1::ILogger::Severity::kERROR);
}
}
else{
logger_->setLevel(nvinfer1::ILogger::Severity::kWARNING);
}
}

void TensorrtllmEngine::LoadModel(
std::shared_ptr<Json::Value> json_body,
std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
SetLoggerOption(*json_body);
model::LoadModelRequest request = model::fromJson(json_body);
if (model_loaded_ && model_type_ == GetModelType(request.model_path)) {
LOG_INFO << "Model already loaded";
Expand Down Expand Up @@ -398,8 +458,6 @@ void TensorrtllmEngine::LoadModel(
}
model_id_ = GetModelId(*json_body);

logger_ = std::make_shared<TllmLogger>();
logger_->setLevel(nvinfer1::ILogger::Severity::kINFO);
initTrtLlmPlugins(logger_.get());

std::filesystem::path tokenizer_model_name = model_dir / "tokenizer.model";
Expand Down
79 changes: 76 additions & 3 deletions cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "models/chat_completion_request.h"
#include "models/load_model_request.h"
#include "sentencepiece_processor.h"
#include "tensorrt_llm/common/logger.h"
#include "tensorrt_llm/executor/executor.h"
#include "tensorrt_llm/plugins/api/tllmPlugin.h"
#include "tensorrt_llm/runtime/generationInput.h"
Expand All @@ -27,13 +28,83 @@
#include "tensorrt_llm/runtime/tllmLogger.h"
#include "trantor/utils/ConcurrentTaskQueue.h"
#include "trantor/utils/Logger.h"
#include <trantor/utils/AsyncFileLogger.h>

using namespace tensorrt_llm::runtime;

namespace tle = tensorrt_llm::executor;

namespace fs = std::filesystem;

namespace tc = tensorrt_llm::common;

constexpr char log_base_name[] = "logs/cortex";
constexpr char log_folder[] = "logs";
constexpr size_t max_log_file_size = 20000000; // ~20mb

// This class is inspired by https://github.com/NVIDIA/TensorRT-LLM/blob/main/cpp/tensorrt_llm/runtime/tllmLogger.cpp
class TllmFileLogger : public nvinfer1::ILogger {
public:
void log(Severity severity,
nvinfer1::AsciiChar const* msg) noexcept override {
switch (severity) {
case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
LOG_ERROR << "[TensorRT-LLM][ERROR] " << msg;
break;
case nvinfer1::ILogger::Severity::kERROR:
LOG_ERROR << "[TensorRT-LLM][ERROR] " << msg;
break;
case nvinfer1::ILogger::Severity::kWARNING:
LOG_WARN << "[TensorRT-LLM][WARN] " << msg;
break;
case nvinfer1::ILogger::Severity::kINFO:
LOG_INFO << "[TensorRT-LLM][INFO] " << msg;
break;
case nvinfer1::ILogger::Severity::kVERBOSE:
LOG_DEBUG << "[TensorRT-LLM][DEBUG] " << msg;
break;
default:
LOG_TRACE << "[TensorRT-LLM][TRACE] " << msg;
break;
}
}
Severity getLevel() {
auto* const logger = tc::Logger::getLogger();
switch (logger->getLevel())
{
case tc::Logger::Level::ERROR: return nvinfer1::ILogger::Severity::kERROR;
case tc::Logger::Level::WARNING: return nvinfer1::ILogger::Severity::kWARNING;
case tc::Logger::Level::INFO: return nvinfer1::ILogger::Severity::kINFO;
case tc::Logger::Level::DEBUG:
case tc::Logger::Level::TRACE: return nvinfer1::ILogger::Severity::kVERBOSE;
default: return nvinfer1::ILogger::Severity::kINTERNAL_ERROR;
}
};

void setLevel(Severity level) {
auto* const logger = tc::Logger::getLogger();
switch (level) {
case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
logger->setLevel(tc::Logger::Level::ERROR);
break;
case nvinfer1::ILogger::Severity::kERROR:
logger->setLevel(tc::Logger::Level::ERROR);
break;
case nvinfer1::ILogger::Severity::kWARNING:
logger->setLevel(tc::Logger::Level::WARNING);
break;
case nvinfer1::ILogger::Severity::kINFO:
logger->setLevel(tc::Logger::Level::INFO);
break;
case nvinfer1::ILogger::Severity::kVERBOSE:
logger->setLevel(tc::Logger::Level::TRACE);
break;
default:
TLLM_THROW("Unsupported severity");
}
};
};

struct RuntimeOptions {
std::string trtEnginePath;

Expand Down Expand Up @@ -187,7 +258,7 @@ struct InferenceState {

std::string WaitAndPop() {
std::unique_lock<std::mutex> l(m);
cv.wait(l, [this](){return !texts_to_stream.empty();});
cv.wait(l, [this]() { return !texts_to_stream.empty(); });
auto s = texts_to_stream.front();
texts_to_stream.pop();
return s;
Expand Down Expand Up @@ -228,6 +299,7 @@ namespace tensorrtllm {

class TensorrtllmEngine : public EngineI {
public:
TensorrtllmEngine(int log_option = 0);
~TensorrtllmEngine() final;
// ### Interface ###
void HandleChatCompletion(
Expand All @@ -252,7 +324,7 @@ class TensorrtllmEngine : public EngineI {
void GetModels(
std::shared_ptr<Json::Value> json_body,
std::function<void(Json::Value&&, Json::Value&&)>&& callback) final;

void SetLoggerOption(const Json::Value& json_body);
private:
bool CheckModelLoaded(
std::function<void(Json::Value&&, Json::Value&&)>& callback);
Expand Down Expand Up @@ -288,7 +360,7 @@ class TensorrtllmEngine : public EngineI {
std::unique_ptr<Tokenizer> cortex_tokenizer_;
RuntimeOptions runtime_opts_;
std::unique_ptr<tle::Executor> executor_;
std::shared_ptr<TllmLogger> logger_;
std::shared_ptr<TllmFileLogger> logger_;
std::string user_prompt_;
std::string ai_prompt_;
std::string system_prompt_;
Expand All @@ -300,6 +372,7 @@ class TensorrtllmEngine : public EngineI {
std::unique_ptr<trantor::ConcurrentTaskQueue> q_;
ModelType model_type_ = ModelType::kOpenHermes;
int n_parallel_ = 1;
std::unique_ptr<trantor::AsyncFileLogger> asynce_file_logger_;
};

} // namespace tensorrtllm

0 comments on commit 3064523

Please sign in to comment.