From 8cfadd38997c9b3f0c1778bab87ca40eae2d07fe Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Fri, 3 Jan 2025 13:13:17 +0700
Subject: [PATCH 1/3] fix: add ctx_shift parameter (#357)

* fix: add ctx_shift parameter

* chore: readme

---------

Co-authored-by: vansangpfiev <sang@jan.ai>
---
 README.md           |  1 +
 src/llama_engine.cc | 25 ++++++++++---------------
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index ce67774..284ce52 100644
--- a/README.md
+++ b/README.md
@@ -148,3 +148,4 @@ Table of parameters
 |`flash_attn` | Boolean| To enable Flash Attention, default is true|
 |`cache_type` | String| KV cache type: f16, q8_0, q4_0, default is f16|
 |`use_mmap` | Boolean| To enable mmap, default is true|
+|`ctx_shift` | Boolean| To enable context shift, default is true|
diff --git a/src/llama_engine.cc b/src/llama_engine.cc
index 5560645..762d7e7 100644
--- a/src/llama_engine.cc
+++ b/src/llama_engine.cc
@@ -270,24 +270,18 @@ std::string CreateReturnJson(const std::string& id, const std::string& model,
 }
 
 const std::vector<ggml_type> kv_cache_types = {
-    GGML_TYPE_F32,
-    GGML_TYPE_F16,
-    GGML_TYPE_BF16,
-    GGML_TYPE_Q8_0,
-    GGML_TYPE_Q4_0,
-    GGML_TYPE_Q4_1,
-    GGML_TYPE_IQ4_NL,
-    GGML_TYPE_Q5_0,
-    GGML_TYPE_Q5_1,
+    GGML_TYPE_F32,    GGML_TYPE_F16,  GGML_TYPE_BF16,
+    GGML_TYPE_Q8_0,   GGML_TYPE_Q4_0, GGML_TYPE_Q4_1,
+    GGML_TYPE_IQ4_NL, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
 };
 
-ggml_type kv_cache_type_from_str(const std::string & s) {
-    for (const auto & type : kv_cache_types) {
-        if (ggml_type_name(type) == s) {
-            return type;
-        }
+ggml_type kv_cache_type_from_str(const std::string& s) {
+  for (const auto& type : kv_cache_types) {
+    if (ggml_type_name(type) == s) {
+      return type;
     }
-    throw std::runtime_error("Unsupported cache type: " + s);
+  }
+  throw std::runtime_error("Unsupported cache type: " + s);
 }
 
 }  // namespace
@@ -611,6 +605,7 @@ bool LlamaEngine::LoadModelImpl(std::shared_ptr<Json::Value> json_body) {
       }
     }
 
+    params.ctx_shift = json_body->get("ctx_shift", true).asBool();
     params.n_gpu_layers =
         json_body->get("ngl", 300)
             .asInt();  // change from 100 -> 300 since llama 3.1 has 292 gpu layers

From 234143024c04aaf76545b41c8850ac695b5e8a77 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Fri, 3 Jan 2025 13:22:55 +0700
Subject: [PATCH 2/3] chore: down log level (#358)

Co-authored-by: vansangpfiev <sang@jan.ai>
---
 src/llama_engine.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama_engine.cc b/src/llama_engine.cc
index 762d7e7..b967b71 100644
--- a/src/llama_engine.cc
+++ b/src/llama_engine.cc
@@ -287,7 +287,7 @@ ggml_type kv_cache_type_from_str(const std::string& s) {
 }  // namespace
 
 void LlamaEngine::Load(EngineLoadOption opts) {
-  LOG_INFO << "Loading engine..";
+  LOG_DEBUG << "Loading engine..";
 
   LOG_DEBUG << "Is custom engine path: " << opts.is_custom_engine_path;
   LOG_DEBUG << "Engine path: " << opts.engine_path.string();

From 44412ee83a7d017353db41e0baeda03f4226235f Mon Sep 17 00:00:00 2001
From: jan-service-account
 <136811300+jan-service-account@users.noreply.github.com>
Date: Fri, 3 Jan 2025 14:12:00 +0700
Subject: [PATCH 3/3] Update submodule to latest release b4406 (#356)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index 0827b2c..0da5d86 160000
--- a/llama.cpp
+++ b/llama.cpp
@@ -1 +1 @@
-Subproject commit 0827b2c1da299805288abbd556d869318f2b121e
+Subproject commit 0da5d860266c6928b8c9408efbd264ae59fedda6