From 61c3fd4b5ae4d7bd9db9c984793bf107fcc41583 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Fri, 3 Oct 2025 20:12:12 +0700 Subject: [PATCH] Merge pull request #6727 from menloresearch/fix/prompt-token fix: prompt token --- extensions/llamacpp-extension/src/index.ts | 27 ++++++++++++++++------ web-app/src/containers/ChatInput.tsx | 7 ++++-- web-app/src/services/models/default.ts | 6 +++++ 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 07d49cd53..8590891b6 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -332,12 +332,14 @@ export default class llamacpp_extension extends AIEngine { ) // Clear the invalid stored preference this.clearStoredBackendType() - bestAvailableBackendString = - await this.determineBestBackend(version_backends) + bestAvailableBackendString = await this.determineBestBackend( + version_backends + ) } } else { - bestAvailableBackendString = - await this.determineBestBackend(version_backends) + bestAvailableBackendString = await this.determineBestBackend( + version_backends + ) } let settings = structuredClone(SETTINGS) @@ -2151,7 +2153,12 @@ export default class llamacpp_extension extends AIEngine { if (mmprojPath && !this.isAbsolutePath(mmprojPath)) mmprojPath = await joinPath([await getJanDataFolderPath(), path]) try { - const result = await planModelLoadInternal(path, this.memoryMode, mmprojPath, requestedCtx) + const result = await planModelLoadInternal( + path, + this.memoryMode, + mmprojPath, + requestedCtx + ) return result } catch (e) { throw new Error(String(e)) @@ -2279,12 +2286,18 @@ export default class llamacpp_extension extends AIEngine { } // Calculate text tokens - const messages = JSON.stringify({ messages: opts.messages }) + // Use chat_template_kwargs from opts if provided, otherwise default to disable enable_thinking + const tokenizeRequest = { + messages: opts.messages, + chat_template_kwargs: opts.chat_template_kwargs || { + enable_thinking: false, + }, + } let parseResponse = await fetch(`${baseUrl}/apply-template`, { method: 'POST', headers: headers, - body: messages, + body: JSON.stringify(tokenizeRequest), }) if (!parseResponse.ok) { diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx index 95bdc9b39..3026a30f2 100644 --- a/web-app/src/containers/ChatInput.tsx +++ b/web-app/src/containers/ChatInput.tsx @@ -132,7 +132,10 @@ const ChatInput = ({ const activeModels = await serviceHub .models() .getActiveModels('llamacpp') - setHasActiveModels(activeModels.length > 0) + const hasMatchingActiveModel = activeModels.some( + (model) => String(model) === selectedModel?.id + ) + setHasActiveModels(activeModels.length > 0 && hasMatchingActiveModel) } catch (error) { console.error('Failed to get active models:', error) setHasActiveModels(false) @@ -145,7 +148,7 @@ const ChatInput = ({ const intervalId = setInterval(checkActiveModels, 3000) return () => clearInterval(intervalId) - }, [serviceHub]) + }, [serviceHub, selectedModel?.id]) // Check for mmproj existence or vision capability when model changes useEffect(() => { diff --git a/web-app/src/services/models/default.ts b/web-app/src/services/models/default.ts index 746f869d1..203ab5ccd 100644 --- a/web-app/src/services/models/default.ts +++ b/web-app/src/services/models/default.ts @@ -578,6 +578,9 @@ export class DefaultModelsService implements ModelsService { } }> }> + chat_template_kwargs?: { + enable_thinking: boolean + } }) => Promise } @@ -654,6 +657,9 @@ export class DefaultModelsService implements ModelsService { return await engine.getTokensCount({ model: modelId, messages: transformedMessages, + chat_template_kwargs: { + enable_thinking: false, + }, }) }