From 654e566dcbc228da94fb59b2fcf46aeaa0ea127d Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Fri, 12 Sep 2025 13:43:31 +0530 Subject: [PATCH] fix: correct context shift flag handling in LlamaCPP extension (#6404) (#6431) * fix: correct context shift flag handling in LlamaCPP extension The previous implementation added the `--no-context-shift` flag when `cfg.ctx_shift` was disabled, which conflicted with the llama.cpp CLI where the presence of `--context-shift` enables the feature. The logic is updated to push `--context-shift` only when `cfg.ctx_shift` is true, ensuring the extension passes the correct argument and behaves as expected. * feat: detect model out of context during generation --------- Co-authored-by: Dinh Long Nguyen --- extensions/llamacpp-extension/src/index.ts | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index a086b74db..1d98d4213 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -42,6 +42,9 @@ import { } from '@janhq/tauri-plugin-llamacpp-api' import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api' +// Error message constant - matches web-app/src/utils/error.ts +const OUT_OF_CONTEXT_SIZE = 'the request exceeds the available context size.' + type LlamacppConfig = { version_backend: string auto_update_engine: boolean @@ -1541,7 +1544,7 @@ export default class llamacpp_extension extends AIEngine { args.push('--main-gpu', String(cfg.main_gpu)) // Boolean flags - if (!cfg.ctx_shift) args.push('--no-context-shift') + if (cfg.ctx_shift) args.push('--context-shift') if (Number(version.replace(/^b/, '')) >= 6325) { if (!cfg.flash_attn) args.push('--flash-attn', 'off') //default: auto = ON when supported } else { @@ -1739,6 +1742,13 @@ export default class llamacpp_extension extends AIEngine { try { const data = JSON.parse(jsonStr) const chunk = data as chatCompletionChunk + + // Check for out-of-context error conditions + if (chunk.choices?.[0]?.finish_reason === 'length') { + // finish_reason 'length' indicates context limit was hit + throw new Error(OUT_OF_CONTEXT_SIZE) + } + yield chunk } catch (e) { logger.error('Error parsing JSON from stream or server error:', e) @@ -1817,7 +1827,15 @@ export default class llamacpp_extension extends AIEngine { ) } - return (await response.json()) as chatCompletion + const completionResponse = (await response.json()) as chatCompletion + + // Check for out-of-context error conditions + if (completionResponse.choices?.[0]?.finish_reason === 'length') { + // finish_reason 'length' indicates context limit was hit + throw new Error(OUT_OF_CONTEXT_SIZE) + } + + return completionResponse } override async delete(modelId: string): Promise {