janhq · vansangpfiev · Jan 21, 2025 · Jan 21, 2025 · Jan 21, 2025
diff --git a/src/llama_engine.cc b/src/llama_engine.cc
@@ -66,6 +66,13 @@ bool AreAllElementsInt32(const Json::Value& arr) {
   return true;
 }
 
+std::string ToLower(const std::string& s) {
+  std::string data = s;
+  std::transform(data.begin(), data.end(), data.begin(),
+                 [](unsigned char c) { return std::tolower(c); });
+  return data;
+}
+
 struct InferenceState {
   int task_id;
   LlamaServerContext& llama;
@@ -726,7 +733,9 @@ bool LlamaEngine::LoadModelImpl(std::shared_ptr<Json::Value> json_body) {
   // For model like nomic-embed-text-v1.5.f16.gguf, etc, we don't need to warm up model.
   // So we use this variable to differentiate with other models
   if (server_map_[model_id].ctx.model_type == ModelType::kLlm) {
-    WarmUpModel(model_id);
+    if (ToLower(model_id).find("deepseek-r1") == std::string::npos) {
+      WarmUpModel(model_id);
+    }
   }
   return true;
 }
@@ -846,6 +855,11 @@ void LlamaEngine::HandleInferenceImpl(
 
         if (auto content = get_message(message["content"]); !content.empty()) {
           formatted_output += role + content;
+          if (input_role == "assistant" &&
+              (ToLower(completion.model_id).find("deepseek-r1") !=
+               std::string::npos)) {
+            formatted_output += "<｜end▁of▁sentence｜>";
+          }
         }
       }
       formatted_output += si.ai_prompt;