fix:exclude enable thinking from FE

2025-10-03 19:06:01 +07:00 · 2025-10-03 19:06:01 +07:00 · 40c5953fea
commit 40c5953fea
parent 5382e9666e
2 changed files with 30 additions and 26 deletions
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -2286,38 +2286,36 @@ export default class llamacpp_extension extends AIEngine {
    }

    // Calculate text tokens
-    // Use a direct approach: convert messages to text and tokenize directly
-    // This avoids issues with enable_thinking and assistant prefills
-    let textToTokenize = ''
-
-    for (const msg of opts.messages) {
-      const rolePrefix =
-        msg.role === 'user'
-          ? 'User: '
-          : msg.role === 'assistant'
-          ? 'Assistant: '
-          : msg.role === 'system'
-          ? 'System: '
-          : ''
-
-      if (typeof msg.content === 'string') {
-        textToTokenize += `${rolePrefix}${msg.content}\n`
-      } else if (Array.isArray(msg.content)) {
-        for (const part of msg.content) {
-          if (part.type === 'text' && part.text) {
-            textToTokenize += part.text
-          }
-          // Skip image tokens as they're calculated separately
-        }
-        textToTokenize += '\n'
-      }
+    // Use chat_template_kwargs from opts if provided, otherwise default to disable enable_thinking
+    const tokenizeRequest = {
+      messages: opts.messages,
+      chat_template_kwargs: opts.chat_template_kwargs || {
+        enable_thinking: false,
+      },
    }

+    let parseResponse = await fetch(`${baseUrl}/apply-template`, {
+      method: 'POST',
+      headers: headers,
+      body: JSON.stringify(tokenizeRequest),
+    })
+
+    if (!parseResponse.ok) {
+      const errorData = await parseResponse.json().catch(() => null)
+      throw new Error(
+        `API request failed with status ${
+          parseResponse.status
+        }: ${JSON.stringify(errorData)}`
+      )
+    }
+
+    const parsedPrompt = await parseResponse.json()
+
    const response = await fetch(`${baseUrl}/tokenize`, {
      method: 'POST',
      headers: headers,
      body: JSON.stringify({
-        content: textToTokenize,
+        content: parsedPrompt.prompt,
      }),
    })

--- a/web-app/src/services/models/default.ts
+++ b/web-app/src/services/models/default.ts
@ -578,6 +578,9 @@ export class DefaultModelsService implements ModelsService {
                  }
                }>
          }>
+          chat_template_kwargs?: {
+            enable_thinking: boolean
+          }
        }) => Promise<number>
      }

@ -654,6 +657,9 @@ export class DefaultModelsService implements ModelsService {
        return await engine.getTokensCount({
          model: modelId,
          messages: transformedMessages,
+          chat_template_kwargs: {
+            enable_thinking: false,
+          },
        })
      }