diff --git a/extensions/llamacpp-extension/settings.json b/extensions/llamacpp-extension/settings.json
index 8d013fee1..f75cd5dd8 100644
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@@ -71,6 +71,15 @@
       "textAlign": "right"
     }
   },
+  {
+      "key": "context_shift",
+      "title": "Context Shift",
+      "description": "Allow model to cut text in the beginning to accommodate new text in its memory",
+      "controllerType": "checkbox",
+      "controllerProps":{
+          "value": false
+      }
+  },
   {
     "key": "n_predict",
     "title": "Max Tokens to Predict",
diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
index ff03d36d2..c71e43651 100644
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -57,6 +57,7 @@ type LlamacppConfig = {
   rope_freq_base: number
   rope_freq_scale: number
   reasoning_budget: number
+  ctx_shift: boolean
 }
 
 interface DownloadItem {
@@ -807,6 +808,8 @@ export default class llamacpp_extension extends AIEngine {
       args.push('--main-gpu', String(cfg.main_gpu))
 
     // Boolean flags
+    if (!cfg.ctx_shift)
+        args.push('--no-context-shift')
     if (cfg.flash_attn) args.push('--flash-attn')
     if (cfg.cont_batching) args.push('--cont-batching')
     args.push('--no-mmap')