From 75d3dd2de0c531154c757372633241076dd018ec Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 13 May 2025 21:08:16 +0700 Subject: [PATCH] fix: qwen3 - weird token output - reasoning content should not be in completion request (#4983) * fix: qwen3 - weird token output - reasoning content should not be in completion request * chore: bump engine version to llama.cpp b5219 --- .../rolldown.config.mjs | 4 ++-- .../inference-cortex-extension/download.bat | 2 +- .../inference-cortex-extension/download.sh | 2 +- .../rolldown.config.mjs | 2 +- web/utils/messageRequestBuilder.ts | 20 +++++++++++++++++++ 5 files changed, 25 insertions(+), 5 deletions(-) diff --git a/extensions/engine-management-extension/rolldown.config.mjs b/extensions/engine-management-extension/rolldown.config.mjs index 02b84b363..7d6a6c1af 100644 --- a/extensions/engine-management-extension/rolldown.config.mjs +++ b/extensions/engine-management-extension/rolldown.config.mjs @@ -15,7 +15,7 @@ export default defineConfig([ `http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}` ), PLATFORM: JSON.stringify(process.platform), - CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.55'), + CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.56'), DEFAULT_REMOTE_ENGINES: JSON.stringify(engines), DEFAULT_REMOTE_MODELS: JSON.stringify(models), DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify( @@ -38,7 +38,7 @@ export default defineConfig([ file: 'dist/node/index.cjs.js', }, define: { - CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.55'), + CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.56'), }, }, ]) diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat index ca2930bdd..220c5528b 100644 --- a/extensions/inference-cortex-extension/download.bat +++ b/extensions/inference-cortex-extension/download.bat @@ -2,7 +2,7 @@ set BIN_PATH=./bin set SHARED_PATH=./../../electron/shared set /p CORTEX_VERSION=<./bin/version.txt -set ENGINE_VERSION=0.1.55 +set ENGINE_VERSION=0.1.56 @REM Download cortex.llamacpp binaries set DOWNLOAD_URL=https://github.com/menloresearch/cortex.llamacpp/releases/download/v%ENGINE_VERSION%/cortex.llamacpp-%ENGINE_VERSION%-windows-amd64 diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh index 3476708bb..46fe35c48 100755 --- a/extensions/inference-cortex-extension/download.sh +++ b/extensions/inference-cortex-extension/download.sh @@ -2,7 +2,7 @@ # Read CORTEX_VERSION CORTEX_VERSION=$(cat ./bin/version.txt) -ENGINE_VERSION=0.1.55 +ENGINE_VERSION=0.1.56 CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download" ENGINE_DOWNLOAD_URL="https://github.com/menloresearch/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}" CUDA_DOWNLOAD_URL="https://github.com/menloresearch/cortex.llamacpp/releases/download/v${ENGINE_VERSION}" diff --git a/extensions/inference-cortex-extension/rolldown.config.mjs b/extensions/inference-cortex-extension/rolldown.config.mjs index ef4c56c7b..0e91dfbc1 100644 --- a/extensions/inference-cortex-extension/rolldown.config.mjs +++ b/extensions/inference-cortex-extension/rolldown.config.mjs @@ -19,7 +19,7 @@ export default defineConfig([ CORTEX_SOCKET_URL: JSON.stringify( `ws://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}` ), - CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.55'), + CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.56'), }, }, { diff --git a/web/utils/messageRequestBuilder.ts b/web/utils/messageRequestBuilder.ts index c3da9cbd8..bcd529f58 100644 --- a/web/utils/messageRequestBuilder.ts +++ b/web/utils/messageRequestBuilder.ts @@ -131,11 +131,31 @@ export class MessageRequestBuilder { return this } + reasoningTagHandle = ( + message: ChatCompletionMessage + ): ChatCompletionMessageContent => { + let content = + typeof message.content === 'string' + ? message.content + : (message.content?.[0]?.text ?? '') + // Reasoning content should not be sent to the model + if (content.includes('')) { + const match = content.match(/([\s\S]*?)<\/think>/) + if (match?.index !== undefined) { + const splitIndex = match.index + match[0].length + content = content.slice(splitIndex).trim() + } + } + return content + } + normalizeMessages = ( messages: ChatCompletionMessage[] ): ChatCompletionMessage[] => { const stack = new Stack() for (const message of messages) { + // Handle message content such as reasoning tags + message.content = this.reasoningTagHandle(message) if (stack.isEmpty()) { stack.push(message) continue