Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
Fix warmup prompt length and add ns_log_level control (#39)
Browse files Browse the repository at this point in the history
  • Loading branch information
luoyu-intel authored Jan 10, 2024
1 parent 227e89f commit 070b6b9
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions neural_speed/application/main_run.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,10 +404,11 @@ int main(int argc, char** argv) { // NOLINT
const float mirostat_eta = params.mirostat_eta;
const bool penalize_nl = params.penalize_nl;
model_token id = 0;

if (params.warmup) {
if (ns_log_level() >= 0 && params.warmup) {
// Warmup phase is used to generate static objects(e.g. JIT kernels)
int constexpr WarmUpPromptLen = 32;
{
const std::vector<model_token> tmp(32, ctx->vocab.bos_token_id);
const std::vector<model_token> tmp(WarmUpPromptLen, ctx->vocab.bos_token_id);
std::vector<model_input> inputs = {model_input{
/*.tokens =*/tmp.data(),
/*.n_tokens =*/(uint32_t)tmp.size(),
Expand All @@ -430,8 +431,8 @@ int main(int argc, char** argv) { // NOLINT
/*.tokens =*/tmp.data(),
/*.n_tokens =*/(uint32_t)tmp.size(),
/*.n_prompt_tokens =*/0,
/*.n_past =*/(uint32_t)(params.n_predict - 1),
/*.n_total =*/(uint32_t)(params.n_predict - 1),
/*.n_past =*/WarmUpPromptLen,
/*.n_total =*/WarmUpPromptLen,
/*.request_idx =*/0,
/*.beam_idx =*/0,
/*.padding_side =*/0,
Expand Down

0 comments on commit 070b6b9

Please sign in to comment.