fix: qwen3 - weird token output - reasoning content should not be in completion request (#4983)

* fix: qwen3 - weird token output  - reasoning content should not be in completion request

* chore: bump engine version to llama.cpp b5219
This commit is contained in:
Louis 2025-05-13 21:08:16 +07:00 committed by GitHub
parent 57e0707850
commit 75d3dd2de0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 25 additions and 5 deletions

View File

@ -15,7 +15,7 @@ export default defineConfig([
`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}` `http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
), ),
PLATFORM: JSON.stringify(process.platform), PLATFORM: JSON.stringify(process.platform),
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.55'), CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.56'),
DEFAULT_REMOTE_ENGINES: JSON.stringify(engines), DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
DEFAULT_REMOTE_MODELS: JSON.stringify(models), DEFAULT_REMOTE_MODELS: JSON.stringify(models),
DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify( DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
@ -38,7 +38,7 @@ export default defineConfig([
file: 'dist/node/index.cjs.js', file: 'dist/node/index.cjs.js',
}, },
define: { define: {
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.55'), CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.56'),
}, },
}, },
]) ])

View File

@ -2,7 +2,7 @@
set BIN_PATH=./bin set BIN_PATH=./bin
set SHARED_PATH=./../../electron/shared set SHARED_PATH=./../../electron/shared
set /p CORTEX_VERSION=<./bin/version.txt set /p CORTEX_VERSION=<./bin/version.txt
set ENGINE_VERSION=0.1.55 set ENGINE_VERSION=0.1.56
@REM Download cortex.llamacpp binaries @REM Download cortex.llamacpp binaries
set DOWNLOAD_URL=https://github.com/menloresearch/cortex.llamacpp/releases/download/v%ENGINE_VERSION%/cortex.llamacpp-%ENGINE_VERSION%-windows-amd64 set DOWNLOAD_URL=https://github.com/menloresearch/cortex.llamacpp/releases/download/v%ENGINE_VERSION%/cortex.llamacpp-%ENGINE_VERSION%-windows-amd64

View File

@ -2,7 +2,7 @@
# Read CORTEX_VERSION # Read CORTEX_VERSION
CORTEX_VERSION=$(cat ./bin/version.txt) CORTEX_VERSION=$(cat ./bin/version.txt)
ENGINE_VERSION=0.1.55 ENGINE_VERSION=0.1.56
CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download" CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
ENGINE_DOWNLOAD_URL="https://github.com/menloresearch/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}" ENGINE_DOWNLOAD_URL="https://github.com/menloresearch/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}"
CUDA_DOWNLOAD_URL="https://github.com/menloresearch/cortex.llamacpp/releases/download/v${ENGINE_VERSION}" CUDA_DOWNLOAD_URL="https://github.com/menloresearch/cortex.llamacpp/releases/download/v${ENGINE_VERSION}"

View File

@ -19,7 +19,7 @@ export default defineConfig([
CORTEX_SOCKET_URL: JSON.stringify( CORTEX_SOCKET_URL: JSON.stringify(
`ws://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}` `ws://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
), ),
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.55'), CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.56'),
}, },
}, },
{ {

View File

@ -131,11 +131,31 @@ export class MessageRequestBuilder {
return this return this
} }
reasoningTagHandle = (
message: ChatCompletionMessage
): ChatCompletionMessageContent => {
let content =
typeof message.content === 'string'
? message.content
: (message.content?.[0]?.text ?? '')
// Reasoning content should not be sent to the model
if (content.includes('<think>')) {
const match = content.match(/<think>([\s\S]*?)<\/think>/)
if (match?.index !== undefined) {
const splitIndex = match.index + match[0].length
content = content.slice(splitIndex).trim()
}
}
return content
}
normalizeMessages = ( normalizeMessages = (
messages: ChatCompletionMessage[] messages: ChatCompletionMessage[]
): ChatCompletionMessage[] => { ): ChatCompletionMessage[] => {
const stack = new Stack<ChatCompletionMessage>() const stack = new Stack<ChatCompletionMessage>()
for (const message of messages) { for (const message of messages) {
// Handle message content such as reasoning tags
message.content = this.reasoningTagHandle(message)
if (stack.isEmpty()) { if (stack.isEmpty()) {
stack.push(message) stack.push(message)
continue continue