From 75d3dd2de0c531154c757372633241076dd018ec Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 13 May 2025 21:08:16 +0700
Subject: [PATCH] fix: qwen3 - weird token output  - reasoning content should
 not be in completion request (#4983)

* fix: qwen3 - weird token output  - reasoning content should not be in completion request

* chore: bump engine version to llama.cpp b5219
---
 .../rolldown.config.mjs                       |  4 ++--
 .../inference-cortex-extension/download.bat   |  2 +-
 .../inference-cortex-extension/download.sh    |  2 +-
 .../rolldown.config.mjs                       |  2 +-
 web/utils/messageRequestBuilder.ts            | 20 +++++++++++++++++++
 5 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/extensions/engine-management-extension/rolldown.config.mjs b/extensions/engine-management-extension/rolldown.config.mjs
index 02b84b363..7d6a6c1af 100644
--- a/extensions/engine-management-extension/rolldown.config.mjs
+++ b/extensions/engine-management-extension/rolldown.config.mjs
@@ -15,7 +15,7 @@ export default defineConfig([
         `http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
       ),
       PLATFORM: JSON.stringify(process.platform),
-      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.55'),
+      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.56'),
       DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
       DEFAULT_REMOTE_MODELS: JSON.stringify(models),
       DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
@@ -38,7 +38,7 @@ export default defineConfig([
       file: 'dist/node/index.cjs.js',
     },
     define: {
-      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.55'),
+      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.56'),
     },
   },
 ])
diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index ca2930bdd..220c5528b 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -2,7 +2,7 @@
 set BIN_PATH=./bin
 set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
-set ENGINE_VERSION=0.1.55
+set ENGINE_VERSION=0.1.56
 
 @REM Download cortex.llamacpp binaries
 set DOWNLOAD_URL=https://github.com/menloresearch/cortex.llamacpp/releases/download/v%ENGINE_VERSION%/cortex.llamacpp-%ENGINE_VERSION%-windows-amd64
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index 3476708bb..46fe35c48 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -2,7 +2,7 @@
 
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
-ENGINE_VERSION=0.1.55
+ENGINE_VERSION=0.1.56
 CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
 ENGINE_DOWNLOAD_URL="https://github.com/menloresearch/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}"
 CUDA_DOWNLOAD_URL="https://github.com/menloresearch/cortex.llamacpp/releases/download/v${ENGINE_VERSION}"
diff --git a/extensions/inference-cortex-extension/rolldown.config.mjs b/extensions/inference-cortex-extension/rolldown.config.mjs
index ef4c56c7b..0e91dfbc1 100644
--- a/extensions/inference-cortex-extension/rolldown.config.mjs
+++ b/extensions/inference-cortex-extension/rolldown.config.mjs
@@ -19,7 +19,7 @@ export default defineConfig([
       CORTEX_SOCKET_URL: JSON.stringify(
         `ws://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
       ),
-      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.55'),
+      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.56'),
     },
   },
   {
diff --git a/web/utils/messageRequestBuilder.ts b/web/utils/messageRequestBuilder.ts
index c3da9cbd8..bcd529f58 100644
--- a/web/utils/messageRequestBuilder.ts
+++ b/web/utils/messageRequestBuilder.ts
@@ -131,11 +131,31 @@ export class MessageRequestBuilder {
     return this
   }
 
+  reasoningTagHandle = (
+    message: ChatCompletionMessage
+  ): ChatCompletionMessageContent => {
+    let content =
+      typeof message.content === 'string'
+        ? message.content
+        : (message.content?.[0]?.text ?? '')
+    // Reasoning content should not be sent to the model
+    if (content.includes('<think>')) {
+      const match = content.match(/<think>([\s\S]*?)<\/think>/)
+      if (match?.index !== undefined) {
+        const splitIndex = match.index + match[0].length
+        content = content.slice(splitIndex).trim()
+      }
+    }
+    return content
+  }
+
   normalizeMessages = (
     messages: ChatCompletionMessage[]
   ): ChatCompletionMessage[] => {
     const stack = new Stack<ChatCompletionMessage>()
     for (const message of messages) {
+      // Handle message content such as reasoning tags
+      message.content = this.reasoningTagHandle(message)
       if (stack.isEmpty()) {
         stack.push(message)
         continue