From 5382e9666eacf77778086f3a52612ac5333140f4 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Fri, 3 Oct 2025 19:01:19 +0700 Subject: [PATCH 1/3] fix: prompt token --- extensions/llamacpp-extension/src/index.ts | 57 ++++++++++++++-------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 07d49cd53..ca8e7791f 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -332,12 +332,14 @@ export default class llamacpp_extension extends AIEngine { ) // Clear the invalid stored preference this.clearStoredBackendType() - bestAvailableBackendString = - await this.determineBestBackend(version_backends) + bestAvailableBackendString = await this.determineBestBackend( + version_backends + ) } } else { - bestAvailableBackendString = - await this.determineBestBackend(version_backends) + bestAvailableBackendString = await this.determineBestBackend( + version_backends + ) } let settings = structuredClone(SETTINGS) @@ -2151,7 +2153,12 @@ export default class llamacpp_extension extends AIEngine { if (mmprojPath && !this.isAbsolutePath(mmprojPath)) mmprojPath = await joinPath([await getJanDataFolderPath(), path]) try { - const result = await planModelLoadInternal(path, this.memoryMode, mmprojPath, requestedCtx) + const result = await planModelLoadInternal( + path, + this.memoryMode, + mmprojPath, + requestedCtx + ) return result } catch (e) { throw new Error(String(e)) @@ -2279,30 +2286,38 @@ export default class llamacpp_extension extends AIEngine { } // Calculate text tokens - const messages = JSON.stringify({ messages: opts.messages }) + // Use a direct approach: convert messages to text and tokenize directly + // This avoids issues with enable_thinking and assistant prefills + let textToTokenize = '' - let parseResponse = await fetch(`${baseUrl}/apply-template`, { - method: 'POST', - headers: headers, - body: messages, - }) + for (const msg of opts.messages) { + const rolePrefix = + msg.role === 'user' + ? 'User: ' + : msg.role === 'assistant' + ? 'Assistant: ' + : msg.role === 'system' + ? 'System: ' + : '' - if (!parseResponse.ok) { - const errorData = await parseResponse.json().catch(() => null) - throw new Error( - `API request failed with status ${ - parseResponse.status - }: ${JSON.stringify(errorData)}` - ) + if (typeof msg.content === 'string') { + textToTokenize += `${rolePrefix}${msg.content}\n` + } else if (Array.isArray(msg.content)) { + for (const part of msg.content) { + if (part.type === 'text' && part.text) { + textToTokenize += part.text + } + // Skip image tokens as they're calculated separately + } + textToTokenize += '\n' + } } - const parsedPrompt = await parseResponse.json() - const response = await fetch(`${baseUrl}/tokenize`, { method: 'POST', headers: headers, body: JSON.stringify({ - content: parsedPrompt.prompt, + content: textToTokenize, }), }) From 40c5953fea37445b1a95405e46e3009619102f53 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Fri, 3 Oct 2025 19:06:01 +0700 Subject: [PATCH 2/3] fix:exclude enable thinking from FE --- extensions/llamacpp-extension/src/index.ts | 50 +++++++++++----------- web-app/src/services/models/default.ts | 6 +++ 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index ca8e7791f..8590891b6 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -2286,38 +2286,36 @@ export default class llamacpp_extension extends AIEngine { } // Calculate text tokens - // Use a direct approach: convert messages to text and tokenize directly - // This avoids issues with enable_thinking and assistant prefills - let textToTokenize = '' - - for (const msg of opts.messages) { - const rolePrefix = - msg.role === 'user' - ? 'User: ' - : msg.role === 'assistant' - ? 'Assistant: ' - : msg.role === 'system' - ? 'System: ' - : '' - - if (typeof msg.content === 'string') { - textToTokenize += `${rolePrefix}${msg.content}\n` - } else if (Array.isArray(msg.content)) { - for (const part of msg.content) { - if (part.type === 'text' && part.text) { - textToTokenize += part.text - } - // Skip image tokens as they're calculated separately - } - textToTokenize += '\n' - } + // Use chat_template_kwargs from opts if provided, otherwise default to disable enable_thinking + const tokenizeRequest = { + messages: opts.messages, + chat_template_kwargs: opts.chat_template_kwargs || { + enable_thinking: false, + }, } + let parseResponse = await fetch(`${baseUrl}/apply-template`, { + method: 'POST', + headers: headers, + body: JSON.stringify(tokenizeRequest), + }) + + if (!parseResponse.ok) { + const errorData = await parseResponse.json().catch(() => null) + throw new Error( + `API request failed with status ${ + parseResponse.status + }: ${JSON.stringify(errorData)}` + ) + } + + const parsedPrompt = await parseResponse.json() + const response = await fetch(`${baseUrl}/tokenize`, { method: 'POST', headers: headers, body: JSON.stringify({ - content: textToTokenize, + content: parsedPrompt.prompt, }), }) diff --git a/web-app/src/services/models/default.ts b/web-app/src/services/models/default.ts index 746f869d1..203ab5ccd 100644 --- a/web-app/src/services/models/default.ts +++ b/web-app/src/services/models/default.ts @@ -578,6 +578,9 @@ export class DefaultModelsService implements ModelsService { } }> }> + chat_template_kwargs?: { + enable_thinking: boolean + } }) => Promise } @@ -654,6 +657,9 @@ export class DefaultModelsService implements ModelsService { return await engine.getTokensCount({ model: modelId, messages: transformedMessages, + chat_template_kwargs: { + enable_thinking: false, + }, }) } From fdf239352a772fc15994af2832ff4fbb6e111690 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Fri, 3 Oct 2025 19:52:15 +0700 Subject: [PATCH 3/3] chore: check model running --- web-app/src/containers/ChatInput.tsx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx index c96141876..9a9901800 100644 --- a/web-app/src/containers/ChatInput.tsx +++ b/web-app/src/containers/ChatInput.tsx @@ -129,7 +129,10 @@ const ChatInput = ({ const activeModels = await serviceHub .models() .getActiveModels('llamacpp') - setHasActiveModels(activeModels.length > 0) + const hasMatchingActiveModel = activeModels.some( + (model) => String(model) === selectedModel?.id + ) + setHasActiveModels(activeModels.length > 0 && hasMatchingActiveModel) } catch (error) { console.error('Failed to get active models:', error) setHasActiveModels(false) @@ -142,7 +145,7 @@ const ChatInput = ({ const intervalId = setInterval(checkActiveModels, 3000) return () => clearInterval(intervalId) - }, [serviceHub]) + }, [serviceHub, selectedModel?.id]) // Check for mmproj existence or vision capability when model changes useEffect(() => {