fix:exclude enable thinking from FE

This commit is contained in:
Faisal Amir 2025-10-03 19:06:01 +07:00
parent 5382e9666e
commit 40c5953fea
2 changed files with 30 additions and 26 deletions

View File

@ -2286,38 +2286,36 @@ export default class llamacpp_extension extends AIEngine {
} }
// Calculate text tokens // Calculate text tokens
// Use a direct approach: convert messages to text and tokenize directly // Use chat_template_kwargs from opts if provided, otherwise default to disable enable_thinking
// This avoids issues with enable_thinking and assistant prefills const tokenizeRequest = {
let textToTokenize = '' messages: opts.messages,
chat_template_kwargs: opts.chat_template_kwargs || {
enable_thinking: false,
},
}
for (const msg of opts.messages) { let parseResponse = await fetch(`${baseUrl}/apply-template`, {
const rolePrefix = method: 'POST',
msg.role === 'user' headers: headers,
? 'User: ' body: JSON.stringify(tokenizeRequest),
: msg.role === 'assistant' })
? 'Assistant: '
: msg.role === 'system'
? 'System: '
: ''
if (typeof msg.content === 'string') { if (!parseResponse.ok) {
textToTokenize += `${rolePrefix}${msg.content}\n` const errorData = await parseResponse.json().catch(() => null)
} else if (Array.isArray(msg.content)) { throw new Error(
for (const part of msg.content) { `API request failed with status ${
if (part.type === 'text' && part.text) { parseResponse.status
textToTokenize += part.text }: ${JSON.stringify(errorData)}`
} )
// Skip image tokens as they're calculated separately
}
textToTokenize += '\n'
}
} }
const parsedPrompt = await parseResponse.json()
const response = await fetch(`${baseUrl}/tokenize`, { const response = await fetch(`${baseUrl}/tokenize`, {
method: 'POST', method: 'POST',
headers: headers, headers: headers,
body: JSON.stringify({ body: JSON.stringify({
content: textToTokenize, content: parsedPrompt.prompt,
}), }),
}) })

View File

@ -578,6 +578,9 @@ export class DefaultModelsService implements ModelsService {
} }
}> }>
}> }>
chat_template_kwargs?: {
enable_thinking: boolean
}
}) => Promise<number> }) => Promise<number>
} }
@ -654,6 +657,9 @@ export class DefaultModelsService implements ModelsService {
return await engine.getTokensCount({ return await engine.getTokensCount({
model: modelId, model: modelId,
messages: transformedMessages, messages: transformedMessages,
chat_template_kwargs: {
enable_thinking: false,
},
}) })
} }