Skip to content

Commit

Permalink
Merge pull request #357 from janhq/215-epic-queue-system
Browse files Browse the repository at this point in the history
215 epic queue system
  • Loading branch information
tikikun authored Jan 17, 2024
2 parents 18575c3 + 8edf8ae commit 33c9540
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
24 changes: 22 additions & 2 deletions controllers/llamaCPP.cc
Original file line number Diff line number Diff line change
Expand Up @@ -293,20 +293,38 @@ void llamaCPP::chatCompletion(
LOG_INFO << "Current completion text";
LOG_INFO << formatted_output;
#endif
const int task_id = llama.request_completion(data, false, false, -1);
int task_id;

if (llama.params.n_parallel == 1) {
while (true) {
if (!single_queue_is_busy) {
task_id = llama.request_completion(data, false, false, -1);
single_queue_is_busy = true;
break;
} else {
std::this_thread::sleep_for(
std::chrono::milliseconds(500)); // Sleep for 500 milliseconds
}
}
} else {
task_id = llama.request_completion(data, false, false, -1);
}

LOG_INFO << "Resolved request for task_id:" << task_id;

if (is_streamed) {
auto state = createState(task_id, this);

auto chunked_content_provider =
[state](char *pBuffer, std::size_t nBuffSize) -> std::size_t {
[this, state](char *pBuffer, std::size_t nBuffSize) -> std::size_t {
if (!pBuffer) {
LOG_INFO << "Connection closed or buffer is null. Reset context";
state->instance->llama.request_cancel(state->task_id);
single_queue_is_busy = false;
return 0;
}
if (state->isStopped) {
single_queue_is_busy = false;
return 0;
}

Expand Down Expand Up @@ -339,8 +357,10 @@ void llamaCPP::chatCompletion(
}
return nRead;
} else {
single_queue_is_busy = false;
return 0;
}
single_queue_is_busy = false;
return 0;
};
auto resp = nitro_utils::nitroStreamResponse(chunked_content_provider,
Expand Down
4 changes: 3 additions & 1 deletion controllers/llamaCPP.h
Original file line number Diff line number Diff line change
Expand Up @@ -2560,7 +2560,7 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {

private:
llama_server_context llama;
//std::atomic<bool> model_loaded = false;
// std::atomic<bool> model_loaded = false;
size_t sent_count = 0;
size_t sent_token_probs_index = 0;
std::thread backgroundThread;
Expand All @@ -2572,5 +2572,7 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
bool caching_enabled;
std::atomic<int> no_of_chats = 0;
int clean_cache_threshold;
std::atomic<bool> single_queue_is_busy; // This value only used under the
// condition n_parallel is 1
};
}; // namespace inferences

0 comments on commit 33c9540

Please sign in to comment.