fix: qwen3 - weird token output - reasoning content should not be in completion request (#4983)
* fix: qwen3 - weird token output - reasoning content should not be in completion request * chore: bump engine version to llama.cpp b5219
This commit is contained in:
parent
57e0707850
commit
75d3dd2de0
@ -15,7 +15,7 @@ export default defineConfig([
|
|||||||
`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
||||||
),
|
),
|
||||||
PLATFORM: JSON.stringify(process.platform),
|
PLATFORM: JSON.stringify(process.platform),
|
||||||
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.55'),
|
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.56'),
|
||||||
DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
|
DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
|
||||||
DEFAULT_REMOTE_MODELS: JSON.stringify(models),
|
DEFAULT_REMOTE_MODELS: JSON.stringify(models),
|
||||||
DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
|
DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
|
||||||
@ -38,7 +38,7 @@ export default defineConfig([
|
|||||||
file: 'dist/node/index.cjs.js',
|
file: 'dist/node/index.cjs.js',
|
||||||
},
|
},
|
||||||
define: {
|
define: {
|
||||||
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.55'),
|
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.56'),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
])
|
])
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
set BIN_PATH=./bin
|
set BIN_PATH=./bin
|
||||||
set SHARED_PATH=./../../electron/shared
|
set SHARED_PATH=./../../electron/shared
|
||||||
set /p CORTEX_VERSION=<./bin/version.txt
|
set /p CORTEX_VERSION=<./bin/version.txt
|
||||||
set ENGINE_VERSION=0.1.55
|
set ENGINE_VERSION=0.1.56
|
||||||
|
|
||||||
@REM Download cortex.llamacpp binaries
|
@REM Download cortex.llamacpp binaries
|
||||||
set DOWNLOAD_URL=https://github.com/menloresearch/cortex.llamacpp/releases/download/v%ENGINE_VERSION%/cortex.llamacpp-%ENGINE_VERSION%-windows-amd64
|
set DOWNLOAD_URL=https://github.com/menloresearch/cortex.llamacpp/releases/download/v%ENGINE_VERSION%/cortex.llamacpp-%ENGINE_VERSION%-windows-amd64
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# Read CORTEX_VERSION
|
# Read CORTEX_VERSION
|
||||||
CORTEX_VERSION=$(cat ./bin/version.txt)
|
CORTEX_VERSION=$(cat ./bin/version.txt)
|
||||||
ENGINE_VERSION=0.1.55
|
ENGINE_VERSION=0.1.56
|
||||||
CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
|
CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
|
||||||
ENGINE_DOWNLOAD_URL="https://github.com/menloresearch/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}"
|
ENGINE_DOWNLOAD_URL="https://github.com/menloresearch/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}"
|
||||||
CUDA_DOWNLOAD_URL="https://github.com/menloresearch/cortex.llamacpp/releases/download/v${ENGINE_VERSION}"
|
CUDA_DOWNLOAD_URL="https://github.com/menloresearch/cortex.llamacpp/releases/download/v${ENGINE_VERSION}"
|
||||||
|
|||||||
@ -19,7 +19,7 @@ export default defineConfig([
|
|||||||
CORTEX_SOCKET_URL: JSON.stringify(
|
CORTEX_SOCKET_URL: JSON.stringify(
|
||||||
`ws://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
`ws://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
||||||
),
|
),
|
||||||
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.55'),
|
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.56'),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -131,11 +131,31 @@ export class MessageRequestBuilder {
|
|||||||
return this
|
return this
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reasoningTagHandle = (
|
||||||
|
message: ChatCompletionMessage
|
||||||
|
): ChatCompletionMessageContent => {
|
||||||
|
let content =
|
||||||
|
typeof message.content === 'string'
|
||||||
|
? message.content
|
||||||
|
: (message.content?.[0]?.text ?? '')
|
||||||
|
// Reasoning content should not be sent to the model
|
||||||
|
if (content.includes('<think>')) {
|
||||||
|
const match = content.match(/<think>([\s\S]*?)<\/think>/)
|
||||||
|
if (match?.index !== undefined) {
|
||||||
|
const splitIndex = match.index + match[0].length
|
||||||
|
content = content.slice(splitIndex).trim()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return content
|
||||||
|
}
|
||||||
|
|
||||||
normalizeMessages = (
|
normalizeMessages = (
|
||||||
messages: ChatCompletionMessage[]
|
messages: ChatCompletionMessage[]
|
||||||
): ChatCompletionMessage[] => {
|
): ChatCompletionMessage[] => {
|
||||||
const stack = new Stack<ChatCompletionMessage>()
|
const stack = new Stack<ChatCompletionMessage>()
|
||||||
for (const message of messages) {
|
for (const message of messages) {
|
||||||
|
// Handle message content such as reasoning tags
|
||||||
|
message.content = this.reasoningTagHandle(message)
|
||||||
if (stack.isEmpty()) {
|
if (stack.isEmpty()) {
|
||||||
stack.push(message)
|
stack.push(message)
|
||||||
continue
|
continue
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user