diff --git a/extensions/llamacpp-extension/settings.json b/extensions/llamacpp-extension/settings.json index 8d013fee1..f75cd5dd8 100644 --- a/extensions/llamacpp-extension/settings.json +++ b/extensions/llamacpp-extension/settings.json @@ -71,6 +71,15 @@ "textAlign": "right" } }, + { + "key": "context_shift", + "title": "Context Shift", + "description": "Allow model to cut text in the beginning to accommodate new text in its memory", + "controllerType": "checkbox", + "controllerProps":{ + "value": false + } + }, { "key": "n_predict", "title": "Max Tokens to Predict", diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index ff03d36d2..c71e43651 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -57,6 +57,7 @@ type LlamacppConfig = { rope_freq_base: number rope_freq_scale: number reasoning_budget: number + ctx_shift: boolean } interface DownloadItem { @@ -807,6 +808,8 @@ export default class llamacpp_extension extends AIEngine { args.push('--main-gpu', String(cfg.main_gpu)) // Boolean flags + if (!cfg.ctx_shift) + args.push('--no-context-shift') if (cfg.flash_attn) args.push('--flash-attn') if (cfg.cont_batching) args.push('--cont-batching') args.push('--no-mmap')