fix: enhance tool use and model provider not persisted issues (#5094)
* chore: enhance tool use loop * fix: create new custom provider is not saved * chore: bump llama.cpp b5488 * chore: normalize reasoning assistant response * chore: fix tool call parse in stream mode * fix: give tool call default generated id * fix: system instruction should be on top of the history * chore: allow users to add parameters
This commit is contained in:
parent
2744e787d1
commit
b8de48c9e9
@ -15,7 +15,7 @@ export default defineConfig([
|
|||||||
`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
||||||
),
|
),
|
||||||
PLATFORM: JSON.stringify(process.platform),
|
PLATFORM: JSON.stringify(process.platform),
|
||||||
CORTEX_ENGINE_VERSION: JSON.stringify('b5371'),
|
CORTEX_ENGINE_VERSION: JSON.stringify('b5488'),
|
||||||
DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
|
DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
|
||||||
DEFAULT_REMOTE_MODELS: JSON.stringify(models),
|
DEFAULT_REMOTE_MODELS: JSON.stringify(models),
|
||||||
DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
|
DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
|
||||||
@ -38,7 +38,7 @@ export default defineConfig([
|
|||||||
file: 'dist/node/index.cjs.js',
|
file: 'dist/node/index.cjs.js',
|
||||||
},
|
},
|
||||||
define: {
|
define: {
|
||||||
CORTEX_ENGINE_VERSION: JSON.stringify('b5371'),
|
CORTEX_ENGINE_VERSION: JSON.stringify('b5488'),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
])
|
])
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
set BIN_PATH=./bin
|
set BIN_PATH=./bin
|
||||||
set SHARED_PATH=./../../electron/shared
|
set SHARED_PATH=./../../electron/shared
|
||||||
set /p CORTEX_VERSION=<./bin/version.txt
|
set /p CORTEX_VERSION=<./bin/version.txt
|
||||||
set ENGINE_VERSION=b5371
|
set ENGINE_VERSION=b5488
|
||||||
|
|
||||||
@REM Download llama.cpp binaries
|
@REM Download llama.cpp binaries
|
||||||
set DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
set DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# Read CORTEX_VERSION
|
# Read CORTEX_VERSION
|
||||||
CORTEX_VERSION=$(cat ./bin/version.txt)
|
CORTEX_VERSION=$(cat ./bin/version.txt)
|
||||||
ENGINE_VERSION=b5371
|
ENGINE_VERSION=b5488
|
||||||
CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
|
CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
|
||||||
ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
|
ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
|
||||||
CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}
|
CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}
|
||||||
|
|||||||
@ -19,7 +19,7 @@ export default defineConfig([
|
|||||||
CORTEX_SOCKET_URL: JSON.stringify(
|
CORTEX_SOCKET_URL: JSON.stringify(
|
||||||
`ws://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
`ws://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
||||||
),
|
),
|
||||||
CORTEX_ENGINE_VERSION: JSON.stringify('b5371'),
|
CORTEX_ENGINE_VERSION: JSON.stringify('b5488'),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
@echo off
|
@echo off
|
||||||
|
|
||||||
set CORTEX_VERSION=1.0.13-rc6
|
set CORTEX_VERSION=1.0.13-rc6
|
||||||
set ENGINE_VERSION=b5371
|
set ENGINE_VERSION=b5488
|
||||||
set ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
set ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
||||||
set ENGINE_DOWNLOAD_GGML_URL=https://github.com/ggml-org/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
set ENGINE_DOWNLOAD_GGML_URL=https://github.com/ggml-org/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
||||||
set CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%
|
set CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%
|
||||||
|
|||||||
@ -15,7 +15,7 @@ download() {
|
|||||||
|
|
||||||
# Read CORTEX_VERSION
|
# Read CORTEX_VERSION
|
||||||
CORTEX_VERSION=1.0.13-rc6
|
CORTEX_VERSION=1.0.13-rc6
|
||||||
ENGINE_VERSION=b5371
|
ENGINE_VERSION=b5488
|
||||||
CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
|
CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
|
||||||
ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
|
ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
|
||||||
CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}
|
CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}
|
||||||
|
|||||||
@ -98,7 +98,6 @@ pub fn get_jan_data_folder_path<R: Runtime>(app_handle: tauri::AppHandle<R>) ->
|
|||||||
}
|
}
|
||||||
|
|
||||||
let app_configurations = get_app_configurations(app_handle);
|
let app_configurations = get_app_configurations(app_handle);
|
||||||
log::debug!("data_folder: {}", app_configurations.data_folder);
|
|
||||||
PathBuf::from(app_configurations.data_folder)
|
PathBuf::from(app_configurations.data_folder)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -177,8 +176,6 @@ pub fn get_configuration_file_path<R: Runtime>(app_handle: tauri::AppHandle<R>)
|
|||||||
.unwrap_or(&app_path.join("../"))
|
.unwrap_or(&app_path.join("../"))
|
||||||
.join(package_name);
|
.join(package_name);
|
||||||
|
|
||||||
log::debug!("old_data_dir: {}", old_data_dir.display());
|
|
||||||
|
|
||||||
if old_data_dir.exists() {
|
if old_data_dir.exists() {
|
||||||
return old_data_dir.join(CONFIGURATION_FILE_NAME);
|
return old_data_dir.join(CONFIGURATION_FILE_NAME);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@ -85,6 +85,7 @@ pub fn run() {
|
|||||||
.setup(|app| {
|
.setup(|app| {
|
||||||
app.handle().plugin(
|
app.handle().plugin(
|
||||||
tauri_plugin_log::Builder::default()
|
tauri_plugin_log::Builder::default()
|
||||||
|
.level(log::LevelFilter::Info)
|
||||||
.targets([
|
.targets([
|
||||||
tauri_plugin_log::Target::new(tauri_plugin_log::TargetKind::Stdout),
|
tauri_plugin_log::Target::new(tauri_plugin_log::TargetKind::Stdout),
|
||||||
tauri_plugin_log::Target::new(tauri_plugin_log::TargetKind::Webview),
|
tauri_plugin_log::Target::new(tauri_plugin_log::TargetKind::Webview),
|
||||||
|
|||||||
@ -38,7 +38,7 @@
|
|||||||
"security": {
|
"security": {
|
||||||
"csp": {
|
"csp": {
|
||||||
"default-src": "'self' customprotocol: asset: http://localhost:* http://127.0.0.1:* ws://localhost:* ws://127.0.0.1:*",
|
"default-src": "'self' customprotocol: asset: http://localhost:* http://127.0.0.1:* ws://localhost:* ws://127.0.0.1:*",
|
||||||
"connect-src": "ipc: http://ipc.localhost http://127.0.0.1:* ws://localhost:* ws://127.0.0.1:* https:",
|
"connect-src": "ipc: http://ipc.localhost http://127.0.0.1:* ws://localhost:* ws://127.0.0.1:* https: http:",
|
||||||
"font-src": [
|
"font-src": [
|
||||||
"https://fonts.gstatic.com blob: data: tauri://localhost http://tauri.localhost"
|
"https://fonts.gstatic.com blob: data: tauri://localhost http://tauri.localhost"
|
||||||
],
|
],
|
||||||
|
|||||||
@ -88,36 +88,35 @@ export const useChat = () => {
|
|||||||
updateLoadingModel(false)
|
updateLoadingModel(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
const builder = new CompletionMessagesBuilder(messages)
|
const builder = new CompletionMessagesBuilder(
|
||||||
if (currentAssistant?.instructions?.length > 0)
|
messages,
|
||||||
builder.addSystemMessage(currentAssistant?.instructions || '')
|
currentAssistant?.instructions
|
||||||
// REMARK: Would it possible to not attach the entire message history to the request?
|
)
|
||||||
// TODO: If not amend messages history here
|
|
||||||
builder.addUserMessage(message)
|
builder.addUserMessage(message)
|
||||||
|
|
||||||
let isCompleted = false
|
let isCompleted = false
|
||||||
|
|
||||||
let attempts = 0
|
let availableTools = selectedModel?.capabilities?.includes('tools')
|
||||||
const availableTools = selectedModel?.capabilities?.includes('tools')
|
|
||||||
? tools
|
? tools
|
||||||
: []
|
: []
|
||||||
while (
|
while (
|
||||||
!isCompleted &&
|
!isCompleted &&
|
||||||
!abortController.signal.aborted &&
|
!abortController.signal.aborted
|
||||||
// TODO: Max attempts can be set in the provider settings later
|
// TODO: Max attempts can be set in the provider settings later
|
||||||
attempts < 10
|
|
||||||
) {
|
) {
|
||||||
attempts += 1
|
|
||||||
const completion = await sendCompletion(
|
const completion = await sendCompletion(
|
||||||
activeThread,
|
activeThread,
|
||||||
provider,
|
provider,
|
||||||
builder.getMessages(),
|
builder.getMessages(),
|
||||||
abortController,
|
abortController,
|
||||||
availableTools,
|
availableTools,
|
||||||
|
currentAssistant.parameters?.stream === false ? false : true,
|
||||||
|
currentAssistant.parameters as unknown as Record<string, object>
|
||||||
// TODO: replace it with according provider setting later on
|
// TODO: replace it with according provider setting later on
|
||||||
selectedProvider === 'llama.cpp' && availableTools.length > 0
|
// selectedProvider === 'llama.cpp' && availableTools.length > 0
|
||||||
? false
|
// ? false
|
||||||
: true
|
// : true
|
||||||
)
|
)
|
||||||
|
|
||||||
if (!completion) throw new Error('No completion received')
|
if (!completion) throw new Error('No completion received')
|
||||||
@ -164,6 +163,7 @@ export const useChat = () => {
|
|||||||
addMessage(updatedMessage ?? finalContent)
|
addMessage(updatedMessage ?? finalContent)
|
||||||
|
|
||||||
isCompleted = !toolCalls.length
|
isCompleted = !toolCalls.length
|
||||||
|
availableTools = []
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
toast.error(
|
toast.error(
|
||||||
@ -188,7 +188,6 @@ export const useChat = () => {
|
|||||||
setAbortController,
|
setAbortController,
|
||||||
updateLoadingModel,
|
updateLoadingModel,
|
||||||
tools,
|
tools,
|
||||||
selectedProvider,
|
|
||||||
updateTokenSpeed,
|
updateTokenSpeed,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|||||||
@ -58,9 +58,13 @@ export const useModelProvider = create<ModelProviderState>()(
|
|||||||
active: existingProvider ? existingProvider?.active : true,
|
active: existingProvider ? existingProvider?.active : true,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
providers: updatedProviders,
|
providers: [
|
||||||
|
...updatedProviders,
|
||||||
|
...existingProviders.filter(
|
||||||
|
(e) => !updatedProviders.some((p) => p.provider === e.provider)
|
||||||
|
),
|
||||||
|
],
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
updateProvider: (providerName, data) => {
|
updateProvider: (providerName, data) => {
|
||||||
|
|||||||
@ -113,7 +113,8 @@ export const sendCompletion = async (
|
|||||||
messages: ChatCompletionMessageParam[],
|
messages: ChatCompletionMessageParam[],
|
||||||
abortController: AbortController,
|
abortController: AbortController,
|
||||||
tools: MCPTool[] = [],
|
tools: MCPTool[] = [],
|
||||||
stream: boolean = true
|
stream: boolean = true,
|
||||||
|
params: Record<string, object> = {}
|
||||||
): Promise<StreamCompletionResponse | CompletionResponse | undefined> => {
|
): Promise<StreamCompletionResponse | CompletionResponse | undefined> => {
|
||||||
if (!thread?.model?.id || !provider) return undefined
|
if (!thread?.model?.id || !provider) return undefined
|
||||||
|
|
||||||
@ -138,6 +139,7 @@ export const sendCompletion = async (
|
|||||||
messages,
|
messages,
|
||||||
tools: normalizeTools(tools),
|
tools: normalizeTools(tools),
|
||||||
tool_choice: tools.length ? 'auto' : undefined,
|
tool_choice: tools.length ? 'auto' : undefined,
|
||||||
|
...params,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
signal: abortController.signal,
|
signal: abortController.signal,
|
||||||
@ -150,6 +152,7 @@ export const sendCompletion = async (
|
|||||||
messages,
|
messages,
|
||||||
tools: normalizeTools(tools),
|
tools: normalizeTools(tools),
|
||||||
tool_choice: tools.length ? 'auto' : undefined,
|
tool_choice: tools.length ? 'auto' : undefined,
|
||||||
|
...params,
|
||||||
})
|
})
|
||||||
return completion
|
return completion
|
||||||
}
|
}
|
||||||
@ -248,7 +251,7 @@ export const extractToolCall = (
|
|||||||
// Create new tool call if this is the first chunk for it
|
// Create new tool call if this is the first chunk for it
|
||||||
if (!calls[index]) {
|
if (!calls[index]) {
|
||||||
calls[index] = {
|
calls[index] = {
|
||||||
id: deltaToolCalls[0]?.id || '',
|
id: deltaToolCalls[0]?.id || ulid(),
|
||||||
function: {
|
function: {
|
||||||
name: deltaToolCalls[0]?.function?.name || '',
|
name: deltaToolCalls[0]?.function?.name || '',
|
||||||
arguments: deltaToolCalls[0]?.function?.arguments || '',
|
arguments: deltaToolCalls[0]?.function?.arguments || '',
|
||||||
@ -261,7 +264,10 @@ export const extractToolCall = (
|
|||||||
currentCall = calls[index]
|
currentCall = calls[index]
|
||||||
|
|
||||||
// Append to function name or arguments if they exist in this chunk
|
// Append to function name or arguments if they exist in this chunk
|
||||||
if (deltaToolCalls[0]?.function?.name) {
|
if (
|
||||||
|
deltaToolCalls[0]?.function?.name &&
|
||||||
|
currentCall!.function.name !== deltaToolCalls[0]?.function?.name
|
||||||
|
) {
|
||||||
currentCall!.function.name += deltaToolCalls[0].function.name
|
currentCall!.function.name += deltaToolCalls[0].function.name
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -9,24 +9,28 @@ import { ThreadMessage } from '@janhq/core'
|
|||||||
export class CompletionMessagesBuilder {
|
export class CompletionMessagesBuilder {
|
||||||
private messages: ChatCompletionMessageParam[] = []
|
private messages: ChatCompletionMessageParam[] = []
|
||||||
|
|
||||||
constructor(messages: ThreadMessage[]) {
|
constructor(messages: ThreadMessage[], systemInstruction?: string) {
|
||||||
this.messages = messages
|
if (systemInstruction) {
|
||||||
.filter((e) => !e.metadata?.error)
|
|
||||||
.map<ChatCompletionMessageParam>((msg) => ({
|
|
||||||
role: msg.role,
|
|
||||||
content: msg.content[0]?.text?.value ?? '.',
|
|
||||||
}) as ChatCompletionMessageParam)
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Add a system message to the messages array.
|
|
||||||
* @param content - The content of the system message.
|
|
||||||
*/
|
|
||||||
addSystemMessage(content: string) {
|
|
||||||
this.messages.push({
|
this.messages.push({
|
||||||
role: 'system',
|
role: 'system',
|
||||||
content: content,
|
content: systemInstruction,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
this.messages.push(
|
||||||
|
...messages
|
||||||
|
.filter((e) => !e.metadata?.error)
|
||||||
|
.map<ChatCompletionMessageParam>(
|
||||||
|
(msg) =>
|
||||||
|
({
|
||||||
|
role: msg.role,
|
||||||
|
content:
|
||||||
|
msg.role === 'assistant'
|
||||||
|
? this.normalizeContent(msg.content[0]?.text?.value ?? '.')
|
||||||
|
: (msg.content[0]?.text?.value ?? '.'),
|
||||||
|
}) as ChatCompletionMessageParam
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add a user message to the messages array.
|
* Add a user message to the messages array.
|
||||||
@ -52,7 +56,7 @@ export class CompletionMessagesBuilder {
|
|||||||
) {
|
) {
|
||||||
this.messages.push({
|
this.messages.push({
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: content,
|
content: this.normalizeContent(content),
|
||||||
refusal: refusal,
|
refusal: refusal,
|
||||||
tool_calls: calls,
|
tool_calls: calls,
|
||||||
})
|
})
|
||||||
@ -78,4 +82,22 @@ export class CompletionMessagesBuilder {
|
|||||||
getMessages(): ChatCompletionMessageParam[] {
|
getMessages(): ChatCompletionMessageParam[] {
|
||||||
return this.messages
|
return this.messages
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize the content of a message by removing reasoning content.
|
||||||
|
* This is useful to ensure that reasoning content does not get sent to the model.
|
||||||
|
* @param content
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
|
private normalizeContent = (content: string): string => {
|
||||||
|
// Reasoning content should not be sent to the model
|
||||||
|
if (content.includes('<think>')) {
|
||||||
|
const match = content.match(/<think>([\s\S]*?)<\/think>/)
|
||||||
|
if (match?.index !== undefined) {
|
||||||
|
const splitIndex = match.index + match[0].length
|
||||||
|
content = content.slice(splitIndex).trim()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return content
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user