fix:exclude enable thinking from FE

2025-10-03 19:06:01 +07:00 · 2025-10-03 19:06:01 +07:00 · 40c5953fea
commit 40c5953fea
parent 5382e9666e
2 changed files with 30 additions and 26 deletions
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -2286,38 +2286,36 @@ export default class llamacpp_extension extends AIEngine {
    }
    // Calculate text tokens
-    // Use a direct approach: convert messages to text and tokenize directly
+    // Use chat_template_kwargs from opts if provided, otherwise default to disable enable_thinking
-    // This avoids issues with enable_thinking and assistant prefills
+    const tokenizeRequest = {
-    let textToTokenize = ''
+      messages: opts.messages,
-
+      chat_template_kwargs: opts.chat_template_kwargs || {
-    for (const msg of opts.messages) {
+        enable_thinking: false,
-      const rolePrefix =
+      },
        msg.role === 'user'
          ? 'User: '
          : msg.role === 'assistant'
          ? 'Assistant: '
          : msg.role === 'system'
          ? 'System: '
          : ''
      if (typeof msg.content === 'string') {
        textToTokenize += `${rolePrefix}${msg.content}\n`
      } else if (Array.isArray(msg.content)) {
        for (const part of msg.content) {
          if (part.type === 'text' && part.text) {
            textToTokenize += part.text
          }
          // Skip image tokens as they're calculated separately
        }
        textToTokenize += '\n'
      }
    }
    let parseResponse = await fetch(`${baseUrl}/apply-template`, {
      method: 'POST',
      headers: headers,
      body: JSON.stringify(tokenizeRequest),
    })
    if (!parseResponse.ok) {
      const errorData = await parseResponse.json().catch(() => null)
      throw new Error(
        `API request failed with status ${
          parseResponse.status
        }: ${JSON.stringify(errorData)}`
      )
    }
    const parsedPrompt = await parseResponse.json()
    const response = await fetch(`${baseUrl}/tokenize`, {
      method: 'POST',
      headers: headers,
      body: JSON.stringify({
-        content: textToTokenize,
+        content: parsedPrompt.prompt,
      }),
    })
--- a/web-app/src/services/models/default.ts
+++ b/web-app/src/services/models/default.ts
@ -578,6 +578,9 @@ export class DefaultModelsService implements ModelsService {
                  }
                }>
          }>
          chat_template_kwargs?: {
            enable_thinking: boolean
          }
        }) => Promise<number>
      }
@ -654,6 +657,9 @@ export class DefaultModelsService implements ModelsService {
        return await engine.getTokensCount({
          model: modelId,
          messages: transformedMessages,
          chat_template_kwargs: {
            enable_thinking: false,
          },
        })
      }