diff --git a/README.md b/README.md
index ce67774..284ce52 100644
--- a/README.md
+++ b/README.md
@@ -148,3 +148,4 @@ Table of parameters
 |`flash_attn` | Boolean| To enable Flash Attention, default is true|
 |`cache_type` | String| KV cache type: f16, q8_0, q4_0, default is f16|
 |`use_mmap` | Boolean| To enable mmap, default is true|
+|`ctx_shift` | Boolean| To enable context shift, default is true|
diff --git a/llama.cpp b/llama.cpp
index 0827b2c..0da5d86 160000
--- a/llama.cpp
+++ b/llama.cpp
@@ -1 +1 @@
-Subproject commit 0827b2c1da299805288abbd556d869318f2b121e
+Subproject commit 0da5d860266c6928b8c9408efbd264ae59fedda6
diff --git a/src/llama_engine.cc b/src/llama_engine.cc
index 05393eb..43d6fe3 100644
--- a/src/llama_engine.cc
+++ b/src/llama_engine.cc
@@ -712,6 +712,7 @@ bool LlamaEngine::LoadModelImpl(std::shared_ptr<Json::Value> json_body) {
       }
     }
 
+    params.ctx_shift = json_body->get("ctx_shift", true).asBool();
     params.n_gpu_layers =
         json_body->get("ngl", 300)
             .asInt();  // change from 100 -> 300 since llama 3.1 has 292 gpu layers