From 654e566dcbc228da94fb59b2fcf46aeaa0ea127d Mon Sep 17 00:00:00 2001
From: Akarshan Biswas <akarshan@menlo.ai>
Date: Fri, 12 Sep 2025 13:43:31 +0530
Subject: [PATCH] fix: correct context shift flag handling in LlamaCPP
 extension (#6404) (#6431)

* fix: correct context shift flag handling in LlamaCPP extension

The previous implementation added the `--no-context-shift` flag when `cfg.ctx_shift` was disabled, which conflicted with the llama.cpp CLI where the presence of `--context-shift` enables the feature.
The logic is updated to push `--context-shift` only when `cfg.ctx_shift` is true, ensuring the extension passes the correct argument and behaves as expected.

* feat: detect model out of context during generation

---------

Co-authored-by: Dinh Long Nguyen <dinhlongviolin1@gmail.com>
---
 extensions/llamacpp-extension/src/index.ts | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
index a086b74db..1d98d4213 100644
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -42,6 +42,9 @@ import {
 } from '@janhq/tauri-plugin-llamacpp-api'
 import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
 
+// Error message constant - matches web-app/src/utils/error.ts
+const OUT_OF_CONTEXT_SIZE = 'the request exceeds the available context size.'
+
 type LlamacppConfig = {
   version_backend: string
   auto_update_engine: boolean
@@ -1541,7 +1544,7 @@ export default class llamacpp_extension extends AIEngine {
       args.push('--main-gpu', String(cfg.main_gpu))
 
     // Boolean flags
-    if (!cfg.ctx_shift) args.push('--no-context-shift')
+    if (cfg.ctx_shift) args.push('--context-shift')
     if (Number(version.replace(/^b/, '')) >= 6325) {
       if (!cfg.flash_attn) args.push('--flash-attn', 'off') //default: auto = ON when supported
     } else {
@@ -1739,6 +1742,13 @@ export default class llamacpp_extension extends AIEngine {
           try {
             const data = JSON.parse(jsonStr)
             const chunk = data as chatCompletionChunk
+            
+            // Check for out-of-context error conditions
+            if (chunk.choices?.[0]?.finish_reason === 'length') {
+              // finish_reason 'length' indicates context limit was hit
+              throw new Error(OUT_OF_CONTEXT_SIZE)
+            }
+            
             yield chunk
           } catch (e) {
             logger.error('Error parsing JSON from stream or server error:', e)
@@ -1817,7 +1827,15 @@ export default class llamacpp_extension extends AIEngine {
       )
     }
 
-    return (await response.json()) as chatCompletion
+    const completionResponse = (await response.json()) as chatCompletion
+    
+    // Check for out-of-context error conditions
+    if (completionResponse.choices?.[0]?.finish_reason === 'length') {
+      // finish_reason 'length' indicates context limit was hit
+      throw new Error(OUT_OF_CONTEXT_SIZE)
+    }
+    
+    return completionResponse
   }
 
   override async delete(modelId: string): Promise<void> {