fix: enhance tool use and model provider not persisted issues (#5094)

* chore: enhance tool use loop * fix: create new custom provider is not saved * chore: bump llama.cpp b5488 * chore: normalize reasoning assistant response * chore: fix tool call parse in stream mode * fix: give tool call default generated id * fix: system instruction should be on top of the history * chore: allow users to add parameters
2025-05-26 15:12:55 +07:00 · 2025-05-26 15:12:55 +07:00 · b8de48c9e9
commit b8de48c9e9
parent 2744e787d1
13 changed files with 77 additions and 48 deletions
--- a/extensions/engine-management-extension/rolldown.config.mjs
+++ b/extensions/engine-management-extension/rolldown.config.mjs
@ -15,7 +15,7 @@ export default defineConfig([
        `http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
      ),
      PLATFORM: JSON.stringify(process.platform),
-      CORTEX_ENGINE_VERSION: JSON.stringify('b5371'),
+      CORTEX_ENGINE_VERSION: JSON.stringify('b5488'),
      DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
      DEFAULT_REMOTE_MODELS: JSON.stringify(models),
      DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
@ -38,7 +38,7 @@ export default defineConfig([
      file: 'dist/node/index.cjs.js',
    },
    define: {
-      CORTEX_ENGINE_VERSION: JSON.stringify('b5371'),
+      CORTEX_ENGINE_VERSION: JSON.stringify('b5488'),
    },
  },
 ])
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@ -2,7 +2,7 @@
 set BIN_PATH=./bin
 set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
-set ENGINE_VERSION=b5371
+set ENGINE_VERSION=b5488
@REM Download llama.cpp binaries
 set DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@ -2,7 +2,7 @@
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
-ENGINE_VERSION=b5371
+ENGINE_VERSION=b5488
 CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
 ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
 CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}
--- a/extensions/inference-cortex-extension/rolldown.config.mjs
+++ b/extensions/inference-cortex-extension/rolldown.config.mjs
@ -19,7 +19,7 @@ export default defineConfig([
      CORTEX_SOCKET_URL: JSON.stringify(
        `ws://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
      ),
-      CORTEX_ENGINE_VERSION: JSON.stringify('b5371'),
+      CORTEX_ENGINE_VERSION: JSON.stringify('b5488'),
    },
  },
  {
--- a/src-tauri/binaries/download.bat
+++ b/src-tauri/binaries/download.bat
@ -1,7 +1,7 @@
@echo off
 set CORTEX_VERSION=1.0.13-rc6
-set ENGINE_VERSION=b5371
+set ENGINE_VERSION=b5488
 set ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
 set ENGINE_DOWNLOAD_GGML_URL=https://github.com/ggml-org/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
 set CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%
--- a/src-tauri/binaries/download.sh
+++ b/src-tauri/binaries/download.sh
@ -15,7 +15,7 @@ download() {
 # Read CORTEX_VERSION
 CORTEX_VERSION=1.0.13-rc6
-ENGINE_VERSION=b5371
+ENGINE_VERSION=b5488
 CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
 ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
 CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}
--- a/src-tauri/src/core/cmd.rs
+++ b/src-tauri/src/core/cmd.rs
@ -98,7 +98,6 @@ pub fn get_jan_data_folder_path<R: Runtime>(app_handle: tauri::AppHandle<R>) ->
    }
    let app_configurations = get_app_configurations(app_handle);
    log::debug!("data_folder: {}", app_configurations.data_folder);
    PathBuf::from(app_configurations.data_folder)
 }
@ -177,8 +176,6 @@ pub fn get_configuration_file_path<R: Runtime>(app_handle: tauri::AppHandle<R>)
        .unwrap_or(&app_path.join("../"))
        .join(package_name);
    log::debug!("old_data_dir: {}", old_data_dir.display());
    if old_data_dir.exists() {
        return old_data_dir.join(CONFIGURATION_FILE_NAME);
    } else {
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@ -85,6 +85,7 @@ pub fn run() {
        .setup(|app| {
            app.handle().plugin(
                tauri_plugin_log::Builder::default()
                    .level(log::LevelFilter::Info)
                    .targets([
                        tauri_plugin_log::Target::new(tauri_plugin_log::TargetKind::Stdout),
                        tauri_plugin_log::Target::new(tauri_plugin_log::TargetKind::Webview),
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@ -38,7 +38,7 @@
    "security": {
      "csp": {
        "default-src": "'self' customprotocol: asset: http://localhost:* http://127.0.0.1:* ws://localhost:* ws://127.0.0.1:*",
-        "connect-src": "ipc: http://ipc.localhost http://127.0.0.1:* ws://localhost:* ws://127.0.0.1:* https:",
+        "connect-src": "ipc: http://ipc.localhost http://127.0.0.1:* ws://localhost:* ws://127.0.0.1:* https: http:",
        "font-src": [
          "https://fonts.gstatic.com blob: data: tauri://localhost http://tauri.localhost"
        ],
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@ -88,36 +88,35 @@ export const useChat = () => {
          updateLoadingModel(false)
        }
-        const builder = new CompletionMessagesBuilder(messages)
+        const builder = new CompletionMessagesBuilder(
-        if (currentAssistant?.instructions?.length > 0)
+          messages,
-          builder.addSystemMessage(currentAssistant?.instructions || '')
+          currentAssistant?.instructions
-        // REMARK: Would it possible to not attach the entire message history to the request?
+        )
-        // TODO: If not amend messages history here
+
        builder.addUserMessage(message)
        let isCompleted = false
-        let attempts = 0
+        let availableTools = selectedModel?.capabilities?.includes('tools')
        const availableTools = selectedModel?.capabilities?.includes('tools')
          ? tools
          : []
        while (
          !isCompleted &&
-          !abortController.signal.aborted &&
+          !abortController.signal.aborted
          // TODO: Max attempts can be set in the provider settings later
          attempts < 10
        ) {
          attempts += 1
          const completion = await sendCompletion(
            activeThread,
            provider,
            builder.getMessages(),
            abortController,
            availableTools,
            currentAssistant.parameters?.stream === false ? false : true,
            currentAssistant.parameters as unknown as Record<string, object>
            // TODO: replace it with according provider setting later on
-            selectedProvider === 'llama.cpp' && availableTools.length > 0
+            // selectedProvider === 'llama.cpp' && availableTools.length > 0
-              ? false
+            //   ? false
-              : true
+            //   : true
          )
          if (!completion) throw new Error('No completion received')
@ -164,6 +163,7 @@ export const useChat = () => {
          addMessage(updatedMessage ?? finalContent)
          isCompleted = !toolCalls.length
          availableTools = []
        }
      } catch (error) {
        toast.error(
@ -188,7 +188,6 @@ export const useChat = () => {
      setAbortController,
      updateLoadingModel,
      tools,
      selectedProvider,
      updateTokenSpeed,
    ]
  )
--- a/web-app/src/hooks/useModelProvider.ts
+++ b/web-app/src/hooks/useModelProvider.ts
@ -58,9 +58,13 @@ export const useModelProvider = create<ModelProviderState>()(
              active: existingProvider ? existingProvider?.active : true,
            }
          })
          return {
-            providers: updatedProviders,
+            providers: [
              ...updatedProviders,
              ...existingProviders.filter(
                (e) => !updatedProviders.some((p) => p.provider === e.provider)
              ),
            ],
          }
        }),
      updateProvider: (providerName, data) => {
--- a/web-app/src/lib/completion.ts
+++ b/web-app/src/lib/completion.ts
@ -113,7 +113,8 @@ export const sendCompletion = async (
  messages: ChatCompletionMessageParam[],
  abortController: AbortController,
  tools: MCPTool[] = [],
-  stream: boolean = true
+  stream: boolean = true,
  params: Record<string, object> = {}
 ): Promise<StreamCompletionResponse | CompletionResponse | undefined> => {
  if (!thread?.model?.id || !provider) return undefined
@ -138,6 +139,7 @@ export const sendCompletion = async (
          messages,
          tools: normalizeTools(tools),
          tool_choice: tools.length ? 'auto' : undefined,
          ...params,
        },
        {
          signal: abortController.signal,
@ -150,6 +152,7 @@ export const sendCompletion = async (
        messages,
        tools: normalizeTools(tools),
        tool_choice: tools.length ? 'auto' : undefined,
        ...params,
      })
  return completion
 }
@ -248,7 +251,7 @@ export const extractToolCall = (
    // Create new tool call if this is the first chunk for it
    if (!calls[index]) {
      calls[index] = {
-        id: deltaToolCalls[0]?.id || '',
+        id: deltaToolCalls[0]?.id || ulid(),
        function: {
          name: deltaToolCalls[0]?.function?.name || '',
          arguments: deltaToolCalls[0]?.function?.arguments || '',
@ -261,7 +264,10 @@ export const extractToolCall = (
      currentCall = calls[index]
      // Append to function name or arguments if they exist in this chunk
-      if (deltaToolCalls[0]?.function?.name) {
+      if (
        deltaToolCalls[0]?.function?.name &&
        currentCall!.function.name !== deltaToolCalls[0]?.function?.name
      ) {
        currentCall!.function.name += deltaToolCalls[0].function.name
      }
--- a/web-app/src/lib/messages.ts
+++ b/web-app/src/lib/messages.ts
@ -9,24 +9,28 @@ import { ThreadMessage } from '@janhq/core'
 export class CompletionMessagesBuilder {
  private messages: ChatCompletionMessageParam[] = []
-  constructor(messages: ThreadMessage[]) {
+  constructor(messages: ThreadMessage[], systemInstruction?: string) {
-    this.messages = messages
+    if (systemInstruction) {
      .filter((e) => !e.metadata?.error)
      .map<ChatCompletionMessageParam>((msg) => ({
        role: msg.role,
        content: msg.content[0]?.text?.value ?? '.',
      }) as ChatCompletionMessageParam)
  }
  /**
   * Add a system message to the messages array.
   * @param content - The content of the system message.
   */
  addSystemMessage(content: string) {
      this.messages.push({
        role: 'system',
-      content: content,
+        content: systemInstruction,
      })
    }
    this.messages.push(
      ...messages
        .filter((e) => !e.metadata?.error)
        .map<ChatCompletionMessageParam>(
          (msg) =>
            ({
              role: msg.role,
              content:
                msg.role === 'assistant'
                  ? this.normalizeContent(msg.content[0]?.text?.value ?? '.')
                  : (msg.content[0]?.text?.value ?? '.'),
            }) as ChatCompletionMessageParam
        )
    )
  }
  /**
   * Add a user message to the messages array.
@ -52,7 +56,7 @@ export class CompletionMessagesBuilder {
  ) {
    this.messages.push({
      role: 'assistant',
-      content: content,
+      content: this.normalizeContent(content),
      refusal: refusal,
      tool_calls: calls,
    })
@ -78,4 +82,22 @@ export class CompletionMessagesBuilder {
  getMessages(): ChatCompletionMessageParam[] {
    return this.messages
  }
  /**
   * Normalize the content of a message by removing reasoning content.
   * This is useful to ensure that reasoning content does not get sent to the model.
   * @param content
   * @returns
   */
  private normalizeContent = (content: string): string => {
    // Reasoning content should not be sent to the model
    if (content.includes('<think>')) {
      const match = content.match(/<think>([\s\S]*?)<\/think>/)
      if (match?.index !== undefined) {
        const splitIndex = match.index + match[0].length
        content = content.slice(splitIndex).trim()
      }
    }
    return content
  }
 }