Merge pull request #6720 from menloresearch/release/v0.7.0

Sync release v0.7.0 to dev
2025-10-06 22:31:06 +07:00 · 2025-10-06 22:31:06 +07:00 · 6c4dd85e6f
commit 6c4dd85e6f
parent 0588cb34c6 9bfec5c7b3
22 changed files with 372 additions and 272 deletions
--- a/.github/workflows/template-tauri-build-windows-x64.yml
+++ b/.github/workflows/template-tauri-build-windows-x64.yml
@ -250,13 +250,3 @@ jobs:
          asset_path: ./src-tauri/target/release/bundle/nsis/${{ steps.metadata.outputs.FILE_NAME }}
          asset_name: ${{ steps.metadata.outputs.FILE_NAME }}
          asset_content_type: application/octet-stream
-      - name: Upload release assert if public provider is github
-        if: inputs.public_provider == 'github'
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        uses: actions/upload-release-asset@v1.0.1
-        with:
-          upload_url: ${{ inputs.upload_url }}
-          asset_path: ./src-tauri/target/release/bundle/msi/${{ steps.metadata.outputs.MSI_FILE_NAME  }}
-          asset_name: ${{ steps.metadata.outputs.MSI_FILE_NAME  }}
-          asset_content_type: application/octet-stream
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/commands.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/commands.rs
@ -87,19 +87,25 @@ pub async fn is_model_supported(
    );

    const RESERVE_BYTES: u64 = 2288490189;
-    let total_system_memory = system_info.total_memory * 1024 * 1024;
+    let total_system_memory: u64 = match system_info.gpus.is_empty() {
+        // on MacOS with unified memory, treat RAM = 0 for now
+        true => 0,
+        false => system_info.total_memory * 1024 * 1024,
+    };
+
    // Calculate total VRAM from all GPUs
-    let total_vram: u64 = if system_info.gpus.is_empty() {
+    let total_vram: u64 = match system_info.gpus.is_empty() {
        // On macOS with unified memory, GPU info may be empty
        // Use total RAM as VRAM since memory is shared
+        true => {
            log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
-        total_system_memory
-    } else {
-        system_info
+            system_info.total_memory * 1024 * 1024
+        }
+        false => system_info
            .gpus
            .iter()
            .map(|g| g.total_memory * 1024 * 1024)
-            .sum::<u64>()
+            .sum::<u64>(),
    };

    log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
@ -113,7 +119,7 @@ pub async fn is_model_supported(
    let usable_total_memory = if total_system_memory > RESERVE_BYTES {
        (total_system_memory - RESERVE_BYTES) + usable_vram
    } else {
-        0
+        usable_vram
    };
    log::info!("System RAM: {} bytes", &total_system_memory);
    log::info!("Total VRAM: {} bytes", &total_vram);
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/model_planner.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/model_planner.rs
@ -80,25 +80,25 @@ pub async fn plan_model_load(

    log::info!("Got GPUs:\n{:?}", &sys_info.gpus);

-    let total_ram: u64 = sys_info.total_memory * 1024 * 1024;
-    log::info!(
-        "Total system memory reported from tauri_plugin_hardware(in bytes): {}",
-        &total_ram
-    );
+    let total_ram: u64 = match sys_info.gpus.is_empty() {
+        // Consider RAM as 0 for unified memory
+        true => 0,
+        false => sys_info.total_memory * 1024 * 1024,
+    };

-    let total_vram: u64 = if sys_info.gpus.is_empty() {
-        // On macOS with unified memory, GPU info may be empty
-        // Use total RAM as VRAM since memory is shared
+    // Calculate total VRAM from all GPUs
+    let total_vram: u64 = match sys_info.gpus.is_empty() {
+        true => {
            log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
-        total_ram
-    } else {
-        sys_info
+            sys_info.total_memory * 1024 * 1024
+        }
+        false => sys_info
            .gpus
            .iter()
            .map(|g| g.total_memory * 1024 * 1024)
-            .sum::<u64>()
+            .sum::<u64>(),
    };
-
+    log::info!("Total RAM reported/calculated (in bytes): {}", &total_ram);
    log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
    let usable_vram: u64 = if total_vram > RESERVE_BYTES {
        (((total_vram - RESERVE_BYTES) as f64) * multiplier) as u64
--- a/tests/checklist.md
+++ b/tests/checklist.md
@ -16,7 +16,7 @@ Before testing, set-up the following in the old version to make sure that we can
 - [ ] Change the `App Data` to some other folder
 - [ ] Create a Custom Provider 
 - [ ] Disable some model providers
- [NEW] Change llama.cpp setting of 2 models 
+- [ ] Change llama.cpp setting of 2 models 
 #### Validate that the update does not corrupt existing user data or settings (before and after update show the same information):
 - [ ] Threads
 	- [ ] Previously used model and assistants is shown correctly 
@ -73,35 +73,44 @@ Before testing, set-up the following in the old version to make sure that we can
 		- [ ] Ensure that when this value is changed, there is no broken UI caused by it
 	- [ ] Code Block
 	- [ ] Show Line Numbers
- [ENG] Ensure that when click on `Reset` in the `Appearance` section, it reset back to the default values
- [ENG] Ensure that when click on `Reset` in the `Code Block` section, it reset back to the default values
+- [ ] [0.7.0] Compact Token Counter will show token counter in side chat input when toggle, if not it will show a small token counter below the chat input
+- [ ] [ENG] Ensure that when click on `Reset` in the `Appearance` section, it reset back to the default values
+- [ ] [ENG] Ensure that when click on `Reset` in the `Code Block` section, it reset back to the default values

 #### In `Model Providers`:

 In `Llama.cpp`:
 - [ ] After downloading a model from hub, the model is listed with the correct name under `Models`
 - [ ] Can import `gguf` model with no error
+- [ ] [0.7.0] While importing, there should be an import indication appear under `Models`
 - [ ] Imported model will be listed with correct name under the `Models`
+- [ ] [0.6.9] Take a `gguf` file and delete the `.gguf` extensions from the file name, import it into Jan and verify that it works.
+- [ ] [0.6.10] Can import vlm models and chat with images
+- [ ] [0.6.10] Import a file that is not `mmproj` in the `mmproj field` should show validation error
+- [ ] [0.6.10] Import `mmproj` from different models should error
+- [ ] [0.7.0] Users can customize model display names according to their own preferences.
 - [ ] Check that when click `delete` the model will be removed from the list
 - [ ] Deleted model doesn't appear in the selectable models section in chat input (even in old threads that use the model previously)
 - [ ] Ensure that user can re-import deleted imported models
+- [ ] [0.6.8] Ensure that there is a recommended `llama.cpp` for each system and that it works out of the box for users.
+- [ ] [0.6.10] Change to an older version of llama.cpp backend. Click on `Check for Llamacpp Updates` it should alert that there is a new version.
+- [ ] [0.7.0] Users can cancel a backend download while it is in progress.
+- [ ] [0.6.10] Try `Install backend from file` for a backend and it should show as an option for backend
+- [ ] [0.7.0] User can install a backend from file in both .tar.gz and .zip formats, and the backend appears in the backend selection menu
+- [ ] [0.7.0] A manually installed backend is automatically selected after import, and the backend menu updates to show it as the latest imported backend.
 - [ ] Enable `Auto-Unload Old Models`, and ensure that only one model can run / start at a time. If there are two model running at the time of enable, both of them will be stopped. 
 - [ ] Disable `Auto-Unload Old Models`, and ensure that multiple models can run at the same time.
 - [ ] Enable  `Context Shift` and ensure that context can run for long without encountering memory error. Use the `banana test` by turn on fetch MCP => ask local model to fetch and summarize the history of banana (banana has a very long history on wiki it turns out). It should run out of context memory sufficiently fast if `Context Shift` is not enabled.
+
+In `Model Settings`:
 - [ ] [0.6.8] Ensure that user can change the Jinja chat template of individual model and it doesn't affect the template of other model
- [ ] [0.6.8] Ensure that there is a recommended `llama.cpp` for each system and that it works out of the box for users.
 - [ ] [0.6.8] Ensure we can override Tensor Buffer Type in the model settings to offload layers between GPU and CPU => Download any MoE Model (i.e., gpt-oss-20b) => Set tensor buffer type as `blk\\.([0-30]*[02468])\\.ffn_.*_exps\\.=CPU` => check if those tensors are in cpu and run inference (you can view the app.log if it contains `--override-tensor", "blk\\\\.([0-30]*[02468])\\\\.ffn_.*_exps\\\\.=CPU`)
- [ ] [0.6.9] Take a `gguf` file and delete the `.gguf` extensions from the file name, import it into Jan and verify that it works.
- [ ] [0.6.10] Can import vlm models and chat with images
- [ ] [0.6.10] Import model on mmproj field should show validation error
- [ ] [0.6.10] Import mmproj from different models should not be able to chat with the models
- [ ] [0.6.10] Change to an older version of llama.cpp backend. Click on `Check for Llamacpp Updates` it should alert that there is a new version.
- [ ] [0.6.10] Try `Install backend from file` for a backend and it should show as an option for backend

 In Remote Model Providers:
 - [ ] Check that the following providers are presence:
 	- [ ] OpenAI
 	- [ ] Anthropic
+    - [ ] [0.7.0] Azure
 	- [ ] Cohere
 	- [ ] OpenRouter
 	- [ ] Mistral
@ -113,12 +122,15 @@ In Remote Model Providers:
 - [ ] Delete a model and ensure that it doesn't show up in the `Models` list view or in the selectable dropdown in chat input.
 - [ ] Ensure that a deleted model also not selectable or appear in old threads that used it.
 - [ ] Adding of new model manually works and user can chat with the newly added model without error (you can add back the model you just delete for testing)
- [ ] [0.6.9] Make sure that Ollama set-up  as a custom provider work with Jan
+- [ ] [0.7.0] Vision capabilities are now automatically detected for vision models
+- [ ] [0.7.0] New default models are available for adding to remote providers through a drop down (OpenAI, Mistral, Groq)
+
 In Custom Providers:
 - [ ] Ensure that user can create a new custom providers with the right baseURL and API key.
 - [ ] Click `Refresh` should retrieve a list of available models from the Custom Providers.
 - [ ] User can chat with the custom providers
 - [ ] Ensure that Custom Providers can be deleted and won't reappear in a new session
+- [ ] [0.6.9] Make sure that Ollama set-up  as a custom provider work with Jan

 In general:
 - [ ] Disabled Model Provider should not show up as selectable in chat input of new thread and old thread alike (old threads' chat input should show `Select Model` instead of disabled model)
@ -162,9 +174,10 @@ Ensure that the following section information show up for hardware
 - [ ] When the user click `Always Allow` on the pop up, the tool will retain permission and won't ask for confirmation again. (this applied at an individual tool level, not at the MCP server level)
 - [ ] If `Allow All MCP Tool Permissions` is enabled, in every new thread,  there should not be any confirmation dialog pop up when a tool is called.
 - [ ] When the pop-up appear, make sure that the `Tool Parameters` is also shown with detail in the pop-up
- [ ] [0.6.9] Go to Enter JSON configuration when created a new MCp => paste the JSON config inside => click `Save` => server works
+- [ ] [0.6.9] Go to Enter JSON configuration when created a new MCP => paste the JSON config inside => click `Save` => server works
 - [ ] [0.6.9] If individual JSON config format is failed, the MCP server should not be activated
 - [ ] [0.6.9] Make sure that MCP server can be used with streamable-http transport => connect to Smithery and test MCP server
+- [ ] [0.7.0] When deleting an MCP Server, a toast notification is shown

 #### In `Local API Server`:
 - [ ] User can `Start Server` and chat with the default endpoint
@ -175,7 +188,8 @@ Ensure that the following section information show up for hardware
 - [ ] [0.6.9] When the startup configuration, the last used model is also automatically start (users does not have to manually start a model before starting the server)
 - [ ] [0.6.9] Make sure that you can send an image to a Local API Server and it also works (can set up Local API Server as a Custom Provider in Jan to test)
 - [ ] [0.6.10] Make sure you are still able to see API key when server local status is running
-
+- [ ] [0.7.0] Users can see the Jan API Server Swagger UI by opening the following path in their browser `http://<ip>:<port>`
+- [ ] [0.7.0] Users can set the trusted host to * in the server configuration to accept requests from all host or without host
 #### In `HTTPS Proxy`:
 - [ ] Model download request goes through proxy endpoint

@ -188,6 +202,7 @@ Ensure that the following section information show up for hardware
 - [ ] Clicking download work inside the Model card HTML
 - [ ] [0.6.9] Check that the model recommendation base on user hardware work as expected in the Model Hub
 - [ ] [0.6.10] Check that model of the same name but different author can be found in the Hub catalog (test with [https://huggingface.co/unsloth/Qwen3-4B-Thinking-2507-GGUF](https://huggingface.co/unsloth/Qwen3-4B-Thinking-2507-GGUF))
+- [ ] [0.7.0] Support downloading models with the same name from different authors, models not listed on the hub will be prefixed with the author name

 ## D. Threads

@ -214,19 +229,30 @@ Ensure that the following section information show up for hardware
 - [ ] User can send message with different type of text content (e.g text, emoji, ...)
 - [ ] When request model to generate a markdown table, the table is correctly formatted as returned from the model.
 - [ ] When model generate code, ensure that the code snippets is properly formatted according to the `Appearance -> Code Block` setting.
+- [ ] [0.7.0] LaTeX formulas now render correctly in chat. Both inline \(...\) and block \[...\] formats are supported. Code blocks and HTML tags are not affected
 - [ ] Users can edit their old message and user can regenerate the answer based on the new message
 - [ ] User can click `Copy` to copy the model response
+- [ ] [0.6.10] When click on copy code block from model generation, it will only copy one code-block at a time instead of multiple code block at once
 - [ ] User can click `Delete` to delete either the user message or the model response.
 - [ ] The token speed appear when a response from model is being generated and the final value is show under the response. 
 - [ ] Make sure that user when using IME keyboard to type Chinese and Japanese character and they press `Enter`, the `Send` button doesn't trigger automatically after each words.
- [ ] [0.6.9] Attach an image to the chat input and see if you can chat with it using a remote model
- [ ] [0.6.9] Attach an image to the chat input and see if you can chat with it using a local model
+- [ ] [0.6.9] Attach an image to the chat input and see if you can chat with it using a Remote model & Local model
 - [ ] [0.6.9] Check that you can paste an image to text box from your system clipboard (Copy - Paste)
- [ ] [0.6.9] Make sure that user can favourite a model in the llama.cpp list and see the favourite model selection in chat input
+- [ ] [0.6.10] User can Paste (e.g Ctrl + v) text into chat input when it is a vision model
+- [ ] [0.6.9] Make sure that user can favourite a model in the Model list and see the favourite model selection in chat input
 - [ ] [0.6.10] User can click mode's setting on chat, enable Auto-Optimize Settings, and continue chatting with the model without interruption.
  - [ ] Verify this works with at least two models of different sizes (e.g., 1B and 7B).
- [ ] [0.6.10] User can Paste (e.g Ctrl + v) text into chat input when it is a vision model
- [ ] [0.6.10] When click on copy code block from model generation, it will only copy one code-block at a time instead of multiple code block at once
+- [ ] [0.7.0] When chatting with a model, the UI displays a token usage counter showing the percentage of context consumed.
+- [ ] [0.7.0] When chatting with a model, the scroll no longer follows the model’s streaming response; it only auto-scrolls when the user sends a new message
+#### In Project
+
+- [ ] [0.7.0] User can create new project
+- [ ] [0.7.0] User can add existing threads to a project
+- [ ] [0.7.0] When the user attempts to delete a project, a confirmation dialog must appear warning that this action will permanently delete the project and all its associated threads. 
+- [ ] [0.7.0] The user can successfully delete a project, and all threads contained within that project are also permanently deleted.
+- [ ] [0.7.0] A thread that already belongs to a project cannot be re-added to the same project.
+- [ ] [0.7.0] Favorited threads retain their "favorite" status even after being added to a project
+
 ## E. Assistants
 - [ ] There is always at least one default Assistant which is Jan
 - [ ] The default Jan assistant has `stream = True` by default 
@ -238,6 +264,7 @@ Ensure that the following section information show up for hardware

 In `Settings -> General`:
 - [ ] Change the location of the `App Data` to some other path that is not the default path
+- [ ] [0.7.0] Users cannot set the data location to root directories (e.g., C:\, D:\ on Windows), but can select subfolders within those drives (e.g., C:\data, D:\data)
 - [ ] Click on `Reset` button in `Other` to factory reset the app:
 	- [ ] All threads deleted
 	- [ ] All Assistant deleted except for default Jan Assistant
--- a/web-app/src/components/ui/dropdown-menu.tsx
+++ b/web-app/src/components/ui/dropdown-menu.tsx
@ -41,7 +41,7 @@ function DropdownMenuContent({
        data-slot="dropdown-menu-content"
        sideOffset={sideOffset}
        className={cn(
-          'bg-main-view select-none text-main-view-fg border-main-view-fg/5 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-[51] max-h-(--radix-dropdown-menu-content-available-height) min-w-[8rem] origin-(--radix-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border p-1 shadow-md',
+          'bg-main-view select-none text-main-view-fg border-main-view-fg/5 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 max-h-(--radix-dropdown-menu-content-available-height) min-w-[8rem] origin-(--radix-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border p-1 shadow-md z-[90]',
          className
        )}
        {...props}
--- a/web-app/src/containers/DropdownModelProvider.tsx
+++ b/web-app/src/containers/DropdownModelProvider.tsx
@ -24,6 +24,7 @@ import { predefinedProviders } from '@/consts/providers'
 import { useServiceHub } from '@/hooks/useServiceHub'
 import { PlatformFeatures } from '@/lib/platform/const'
 import { PlatformFeature } from '@/lib/platform/types'
+import { getLastUsedModel } from '@/utils/getModelToStart'

 type DropdownModelProviderProps = {
  model?: ThreadModel
@ -39,16 +40,6 @@ interface SearchableModel {
 }

 // Helper functions for localStorage
-const getLastUsedModel = (): { provider: string; model: string } | null => {
-  try {
-    const stored = localStorage.getItem(localStorageKey.lastUsedModel)
-    return stored ? JSON.parse(stored) : null
-  } catch (error) {
-    console.debug('Failed to get last used model from localStorage:', error)
-    return null
-  }
-}
-
 const setLastUsedModel = (provider: string, model: string) => {
  try {
    localStorage.setItem(
@ -325,8 +316,7 @@ const DropdownModelProvider = ({
  // Create Fzf instance for fuzzy search
  const fzfInstance = useMemo(() => {
    return new Fzf(searchableItems, {
-      selector: (item) =>
-        `${getModelDisplayName(item.model)} ${item.model.id}`.toLowerCase(),
+      selector: (item) => `${getModelDisplayName(item.model)} ${item.model.id}`.toLowerCase(),
    })
  }, [searchableItems])

--- a/web-app/src/containers/ThreadList.tsx
+++ b/web-app/src/containers/ThreadList.tsx
@ -265,6 +265,8 @@ const SortableItem = memo(
                </DropdownMenuSubContent>
              </DropdownMenuSub>
              {thread.metadata?.project && (
+                <>
+                  <DropdownMenuSeparator />
                  <DropdownMenuItem
                    onClick={(e) => {
                      e.stopPropagation()
@ -282,8 +284,9 @@ const SortableItem = memo(
                    }}
                  >
                    <IconX size={16} />
-                  <span>{t('common:projects.removeFromProject')}</span>
+                    <span>Remove from project</span>
                  </DropdownMenuItem>
+                </>
              )}
              <DropdownMenuSeparator />
              <DeleteThreadDialog
@ -308,7 +311,11 @@ type ThreadListProps = {
  currentProjectId?: string
 }

-function ThreadList({ threads, variant = 'default', currentProjectId }: ThreadListProps) {
+function ThreadList({
+  threads,
+  variant = 'default',
+  currentProjectId,
+}: ThreadListProps) {
  const sortedThreads = useMemo(() => {
    return threads.sort((a, b) => {
      return (b.updated || 0) - (a.updated || 0)
@ -332,7 +339,12 @@ function ThreadList({ threads, variant = 'default', currentProjectId }: ThreadLi
        strategy={verticalListSortingStrategy}
      >
        {sortedThreads.map((thread, index) => (
-          <SortableItem key={index} thread={thread} variant={variant} currentProjectId={currentProjectId} />
+          <SortableItem
+            key={index}
+            thread={thread}
+            variant={variant}
+            currentProjectId={currentProjectId}
+          />
        ))}
      </SortableContext>
    </DndContext>
--- a/web-app/src/containers/dialogs/AddEditAssistant.tsx
+++ b/web-app/src/containers/dialogs/AddEditAssistant.tsx
@ -243,11 +243,7 @@ export default function AddEditAssistant({

  return (
    <Dialog open={open} onOpenChange={onOpenChange}>
-      <DialogContent
-        onInteractOutside={(e) => {
-          e.preventDefault()
-        }}
-      >
+      <DialogContent>
        <DialogHeader>
          <DialogTitle>
            {editingKey
--- a/web-app/src/hooks/useTokensCount.ts
+++ b/web-app/src/hooks/useTokensCount.ts
@ -3,6 +3,7 @@ import { ThreadMessage, ContentType } from '@janhq/core'
 import { useServiceHub } from './useServiceHub'
 import { useModelProvider } from './useModelProvider'
 import { usePrompt } from './usePrompt'
+import { removeReasoningContent } from '@/utils/reasoning'

 export interface TokenCountData {
  tokenCount: number
@ -69,7 +70,19 @@ export const useTokensCount = (
        } as ThreadMessage)
      }
    }
-    return result
+    return result.map((e) => ({
+      ...e,
+      content: e.content.map((c) => ({
+        ...c,
+        text:
+          c.type === 'text'
+            ? {
+                value: removeReasoningContent(c.text?.value ?? '.'),
+                annotations: [],
+              }
+            : c.text,
+      })),
+    }))
  }, [messages, prompt, uploadedFiles])

  // Debounced calculation that includes current prompt
--- a/web-app/src/lib/messages.ts
+++ b/web-app/src/lib/messages.ts
@ -2,6 +2,7 @@
 import { ChatCompletionMessageParam } from 'token.js'
 import { ChatCompletionMessageToolCall } from 'openai/resources'
 import { ThreadMessage } from '@janhq/core'
+import { removeReasoningContent } from '@/utils/reasoning'

 /**
 * @fileoverview Helper functions for creating chat completion request.
@ -24,7 +25,7 @@ export class CompletionMessagesBuilder {
          if (msg.role === 'assistant') {
            return {
              role: msg.role,
-              content: this.normalizeContent(
+              content: removeReasoningContent(
                msg.content[0]?.text?.value || '.'
              ),
            } as ChatCompletionMessageParam
@ -135,7 +136,7 @@ export class CompletionMessagesBuilder {
  ) {
    this.messages.push({
      role: 'assistant',
-      content: this.normalizeContent(content),
+      content: removeReasoningContent(content),
      refusal: refusal,
      tool_calls: calls,
    })
@ -202,30 +203,4 @@ export class CompletionMessagesBuilder {

    return result
  }
-  /**
-   * Normalize the content of a message by removing reasoning content.
-   * This is useful to ensure that reasoning content does not get sent to the model.
-   * @param content
-   * @returns
-   */
-  private normalizeContent = (content: string): string => {
-    // Reasoning content should not be sent to the model
-    if (content.includes('<think>')) {
-      const match = content.match(/<think>([\s\S]*?)<\/think>/)
-      if (match?.index !== undefined) {
-        const splitIndex = match.index + match[0].length
-        content = content.slice(splitIndex).trim()
-      }
-    }
-    if (content.includes('<|channel|>analysis<|message|>')) {
-      const match = content.match(
-        /<\|channel\|>analysis<\|message\|>([\s\S]*?)<\|start\|>assistant<\|channel\|>final<\|message\|>/
-      )
-      if (match?.index !== undefined) {
-        const splitIndex = match.index + match[0].length
-        content = content.slice(splitIndex).trim()
-      }
-    }
-    return content
-  }
 }
--- a/web-app/src/lib/utils.ts
+++ b/web-app/src/lib/utils.ts
@ -8,7 +8,6 @@ export function cn(...inputs: ClassValue[]) {
  return twMerge(clsx(inputs))
 }

-
 export function basenameNoExt(filePath: string): string {
  const base = path.basename(filePath);
  const VALID_EXTENSIONS = [".tar.gz", ".zip"];
@ -24,7 +23,6 @@ export function basenameNoExt(filePath: string): string {
  return base.slice(0, -path.extname(base).length);
 }

-
 /**
 * Get the display name for a model, falling back to the model ID if no display name is set
 */
--- a/web-app/src/locales/de-DE/settings.json
+++ b/web-app/src/locales/de-DE/settings.json
@ -169,6 +169,12 @@
    "serverLogs": "Server Logs",
    "serverLogsDesc": "Zeige detaillierte Logs des lokalen API-Servers an.",
    "openLogs": "Logs öffnen",
+    "swaggerDocs": "API-Dokumentation",
+    "swaggerDocsDesc": "Zeige interaktive API-Dokumentation (Swagger UI) an.",
+    "openDocs": "Dokumentation öffnen",
+    "startupConfiguration": "Startkonfiguration",
+    "runOnStartup": "Auto start",
+    "runOnStartupDesc": "Starte den lokalen API-Server automatisch beim Anwendungsstart. Verwendet das zuletzt verwendete Modell oder wählt das erste verfügbare Modell, falls nicht verfügbar.",
    "serverConfiguration": "Server Konfiguration",
    "serverHost": "Server Host",
    "serverHostDesc": "Netzwerkadresse für den Server.",
--- a/web-app/src/locales/en/settings.json
+++ b/web-app/src/locales/en/settings.json
@ -169,9 +169,12 @@
    "serverLogs": "Server Logs",
    "serverLogsDesc": "View detailed logs of the local API server.",
    "openLogs": "Open Logs",
+    "swaggerDocs": "API Documentation",
+    "swaggerDocsDesc": "View interactive API documentation (Swagger UI).",
+    "openDocs": "Open Docs",
    "startupConfiguration": "Startup Configuration",
-    "runOnStartup": "Enable by default on startup",
-    "runOnStartupDesc": "Automatically start the Local API Server when the application launches.",
+    "runOnStartup": "Auto start",
+    "runOnStartupDesc": "Automatically start the Local API Server when the application launches. Uses last used model, or picks the first available model if unavailable.",
    "serverConfiguration": "Server Configuration",
    "serverHost": "Server Host",
    "serverHostDesc": "Network address for the server.",
--- a/web-app/src/locales/id/settings.json
+++ b/web-app/src/locales/id/settings.json
@ -167,6 +167,12 @@
    "serverLogs": "Log Server",
    "serverLogsDesc": "Lihat log terperinci dari server API lokal.",
    "openLogs": "Buka Log",
+    "swaggerDocs": "Dokumentasi API",
+    "swaggerDocsDesc": "Lihat dokumentasi API interaktif (Swagger UI).",
+    "openDocs": "Buka Dokumentasi",
+    "startupConfiguration": "Konfigurasi Startup",
+    "runOnStartup": "Auto start",
+    "runOnStartupDesc": "Mulai Server API Lokal secara otomatis saat aplikasi diluncurkan. Menggunakan model terakhir yang digunakan, atau memilih model pertama yang tersedia jika tidak tersedia.",
    "serverConfiguration": "Konfigurasi Server",
    "serverHost": "Host Server",
    "serverHostDesc": "Alamat jaringan untuk server.",
--- a/web-app/src/locales/pl/settings.json
+++ b/web-app/src/locales/pl/settings.json
@ -167,9 +167,12 @@
    "serverLogs": "Dzienniki Serwera",
    "serverLogsDesc": "Wyświetl szczegółowe dzienniki lokalnego serwera API.",
    "openLogs": "Otwórz Dzienniki",
+    "swaggerDocs": "Dokumentacja API",
+    "swaggerDocsDesc": "Wyświetl interaktywną dokumentację API (Swagger UI).",
+    "openDocs": "Otwórz Dokumentację",
    "startupConfiguration": "Konfiguracja Startowa",
-    "runOnStartup": "Domyślnie włączaj przy starcie",
-    "runOnStartupDesc": "Automatycznie uruchamiaj lokalny serwer API podczas uruchamiania aplikacji.",
+    "runOnStartup": "Auto start",
+    "runOnStartupDesc": "Automatycznie uruchamiaj lokalny serwer API podczas uruchamiania aplikacji. Używa ostatnio używanego modelu lub wybiera pierwszy dostępny model, jeśli nie jest dostępny.",
    "serverConfiguration": "Konfiguracja Serwera",
    "serverHost": "Host",
    "serverHostDesc": "Adres sieciowy serwera.",
--- a/web-app/src/locales/vn/settings.json
+++ b/web-app/src/locales/vn/settings.json
@ -169,6 +169,12 @@
    "serverLogs": "Nhật ký máy chủ",
    "serverLogsDesc": "Xem nhật ký chi tiết của máy chủ API cục bộ.",
    "openLogs": "Mở nhật ký",
+    "swaggerDocs": "Tài liệu API",
+    "swaggerDocsDesc": "Xem tài liệu API tương tác (Swagger UI).",
+    "openDocs": "Mở tài liệu",
+    "startupConfiguration": "Cấu hình khởi động",
+    "runOnStartup": "Auto start",
+    "runOnStartupDesc": "Tự động khởi động Máy chủ API Cục bộ khi ứng dụng khởi chạy. Sử dụng mô hình đã dùng gần nhất hoặc chọn mô hình đầu tiên có sẵn nếu không khả dụng.",
    "serverConfiguration": "Cấu hình máy chủ",
    "serverHost": "Máy chủ lưu trữ",
    "serverHostDesc": "Địa chỉ mạng cho máy chủ.",
--- a/web-app/src/locales/zh-CN/settings.json
+++ b/web-app/src/locales/zh-CN/settings.json
@ -169,6 +169,12 @@
    "serverLogs": "服务器日志",
    "serverLogsDesc": "查看本地 API 服务器的详细日志。",
    "openLogs": "打开日志",
+    "swaggerDocs": "API 文档",
+    "swaggerDocsDesc": "查看交互式 API 文档（Swagger UI）。",
+    "openDocs": "打开文档",
+    "startupConfiguration": "启动配置",
+    "runOnStartup": "Auto start",
+    "runOnStartupDesc": "应用程序启动时自动启动本地 API 服务器。使用上次使用的模型，如果不可用则选择第一个可用模型。",
    "serverConfiguration": "服务器配置",
    "serverHost": "服务器主机",
    "serverHostDesc": "服务器的网络地址。",
--- a/web-app/src/locales/zh-TW/settings.json
+++ b/web-app/src/locales/zh-TW/settings.json
@ -167,6 +167,12 @@
    "serverLogs": "伺服器日誌",
    "serverLogsDesc": "檢視本機 API 伺服器的詳細日誌。",
    "openLogs": "開啟日誌",
+    "swaggerDocs": "API 文件",
+    "swaggerDocsDesc": "查看互動式 API 文件（Swagger UI）。",
+    "openDocs": "開啟文件",
+    "startupConfiguration": "啟動設定",
+    "runOnStartup": "Auto start",
+    "runOnStartupDesc": "應用程式啟動時自動啟動本機 API 伺服器。使用上次使用的模型，如果不可用則選擇第一個可用模型。",
    "serverConfiguration": "伺服器設定",
    "serverHost": "伺服器主機",
    "serverHostDesc": "伺服器的網路位址。",
--- a/web-app/src/providers/DataProvider.tsx
+++ b/web-app/src/providers/DataProvider.tsx
@ -11,8 +11,8 @@ import { useThreads } from '@/hooks/useThreads'
 import { useLocalApiServer } from '@/hooks/useLocalApiServer'
 import { useAppState } from '@/hooks/useAppState'
 import { AppEvent, events } from '@janhq/core'
-import { localStorageKey } from '@/constants/localStorage'
 import { SystemEvent } from '@/types/events'
+import { getModelToStart } from '@/utils/getModelToStart'

 export function DataProvider() {
  const { setProviders, selectedModel, selectedProvider, getProviderByName } =
@ -65,10 +65,13 @@ export function DataProvider() {

    // Listen for deep link events
    let unsubscribe = () => {}
-    serviceHub.events().listen(SystemEvent.DEEP_LINK, (event) => {
+    serviceHub
+      .events()
+      .listen(SystemEvent.DEEP_LINK, (event) => {
        const deep_link = event.payload as string
        handleDeepLink([deep_link])
-    }).then((unsub) => {
+      })
+      .then((unsub) => {
        unsubscribe = unsub
      })
    return () => {
@ -102,54 +105,6 @@ export function DataProvider() {
    })
  }, [serviceHub, setProviders])

-  const getLastUsedModel = (): { provider: string; model: string } | null => {
-    try {
-      const stored = localStorage.getItem(localStorageKey.lastUsedModel)
-      return stored ? JSON.parse(stored) : null
-    } catch (error) {
-      console.debug('Failed to get last used model from localStorage:', error)
-      return null
-    }
-  }
-
-  // Helper function to determine which model to start
-  const getModelToStart = () => {
-    // Use last used model if available
-    const lastUsedModel = getLastUsedModel()
-    if (lastUsedModel) {
-      const provider = getProviderByName(lastUsedModel.provider)
-      if (
-        provider &&
-        provider.models.some((m) => m.id === lastUsedModel.model)
-      ) {
-        return { model: lastUsedModel.model, provider }
-      }
-    }
-
-    // Use selected model if available
-    if (selectedModel && selectedProvider) {
-      const provider = getProviderByName(selectedProvider)
-      if (provider) {
-        return { model: selectedModel.id, provider }
-      }
-    }
-
-    // Use first model from llamacpp provider
-    const llamacppProvider = getProviderByName('llamacpp')
-    if (
-      llamacppProvider &&
-      llamacppProvider.models &&
-      llamacppProvider.models.length > 0
-    ) {
-      return {
-        model: llamacppProvider.models[0].id,
-        provider: llamacppProvider,
-      }
-    }
-
-    return null
-  }
-
  // Auto-start Local API Server on app startup if enabled
  useEffect(() => {
    if (enableOnStartup) {
@ -159,7 +114,11 @@ export function DataProvider() {
        return
      }

-      const modelToStart = getModelToStart()
+      const modelToStart = getModelToStart({
+        selectedModel,
+        selectedProvider,
+        getProviderByName,
+      })

      // Only start server if we have a model to load
      if (!modelToStart) {
--- a/web-app/src/routes/settings/local-api-server.tsx
+++ b/web-app/src/routes/settings/local-api-server.tsx
@ -15,7 +15,6 @@ import { useLocalApiServer } from '@/hooks/useLocalApiServer'
 import { useAppState } from '@/hooks/useAppState'
 import { useModelProvider } from '@/hooks/useModelProvider'
 import { useServiceHub } from '@/hooks/useServiceHub'
-import { localStorageKey } from '@/constants/localStorage'
 import { IconLogs } from '@tabler/icons-react'
 import { cn } from '@/lib/utils'
 import { ApiKeyInput } from '@/containers/ApiKeyInput'
@ -23,6 +22,7 @@ import { useEffect, useState } from 'react'
 import { PlatformGuard } from '@/lib/platform/PlatformGuard'
 import { PlatformFeature } from '@/lib/platform'
 import { toast } from 'sonner'
+import { getModelToStart } from '@/utils/getModelToStart'

 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 export const Route = createFileRoute(route.settings.local_api_server as any)({
@ -82,54 +82,6 @@ function LocalAPIServerContent() {
    setIsApiKeyEmpty(!isValid)
  }

-  const getLastUsedModel = (): { provider: string; model: string } | null => {
-    try {
-      const stored = localStorage.getItem(localStorageKey.lastUsedModel)
-      return stored ? JSON.parse(stored) : null
-    } catch (error) {
-      console.debug('Failed to get last used model from localStorage:', error)
-      return null
-    }
-  }
-
-  // Helper function to determine which model to start
-  const getModelToStart = () => {
-    // Use last used model if available
-    const lastUsedModel = getLastUsedModel()
-    if (lastUsedModel) {
-      const provider = getProviderByName(lastUsedModel.provider)
-      if (
-        provider &&
-        provider.models.some((m) => m.id === lastUsedModel.model)
-      ) {
-        return { model: lastUsedModel.model, provider }
-      }
-    }
-
-    // Use selected model if available
-    if (selectedModel && selectedProvider) {
-      const provider = getProviderByName(selectedProvider)
-      if (provider) {
-        return { model: selectedModel.id, provider }
-      }
-    }
-
-    // Use first model from llamacpp provider
-    const llamacppProvider = getProviderByName('llamacpp')
-    if (
-      llamacppProvider &&
-      llamacppProvider.models &&
-      llamacppProvider.models.length > 0
-    ) {
-      return {
-        model: llamacppProvider.models[0].id,
-        provider: llamacppProvider,
-      }
-    }
-
-    return null
-  }
-
  const [isModelLoading, setIsModelLoading] = useState(false)

  const toggleAPIServer = async () => {
@ -137,7 +89,7 @@ function LocalAPIServerContent() {
    if (serverStatus === 'stopped') {
      console.log('Starting server with port:', serverPort)
      toast.info('Starting server...', {
-        description: `Attempting to start server on port ${serverPort}`
+        description: `Attempting to start server on port ${serverPort}`,
      })

      if (!apiKey || apiKey.toString().trim().length === 0) {
@ -146,20 +98,37 @@ function LocalAPIServerContent() {
      }
      setShowApiKeyError(false)

-      const modelToStart = getModelToStart()
+      setServerStatus('pending')
+
+      // Check if there's already a loaded model
+      serviceHub
+        .models()
+        .getActiveModels()
+        .then((loadedModels) => {
+          if (loadedModels && loadedModels.length > 0) {
+            console.log(`Using already loaded model: ${loadedModels[0]}`)
+            // Model already loaded, just start the server
+            return Promise.resolve()
+          } else {
+            // No loaded model, start one first
+            const modelToStart = getModelToStart({
+              selectedModel,
+              selectedProvider,
+              getProviderByName,
+            })
+
            // Only start server if we have a model to load
            if (!modelToStart) {
              console.warn(
                'Cannot start Local API Server: No model available to load'
              )
-        return
+              throw new Error('No model available to load')
            }

-      setServerStatus('pending')
            setIsModelLoading(true) // Start loading state

            // Start the model first
-      serviceHub
+            return serviceHub
              .models()
              .startModel(modelToStart.provider, modelToStart.model)
              .then(() => {
@ -169,6 +138,8 @@ function LocalAPIServerContent() {
                // Add a small delay for the backend to update state
                return new Promise((resolve) => setTimeout(resolve, 500))
              })
+          }
+        })
        .then(() => {
          // Then start the server
          return window.core?.api?.startServer({
@ -196,31 +167,31 @@ function LocalAPIServerContent() {
          toast.dismiss()

          // Extract error message from various error formats
-          const errorMsg = error && typeof error === 'object' && 'message' in error
+          const errorMsg =
+            error && typeof error === 'object' && 'message' in error
              ? String(error.message)
              : String(error)

          // Port-related errors (highest priority)
          if (errorMsg.includes('Address already in use')) {
            toast.error('Port has been occupied', {
-              description: `Port ${serverPort} is already in use. Please try a different port.`
+              description: `Port ${serverPort} is already in use. Please try a different port.`,
            })
          }
          // Model-related errors
          else if (errorMsg.includes('Invalid or inaccessible model path')) {
            toast.error('Invalid or inaccessible model path', {
-              description: errorMsg
+              description: errorMsg,
            })
-          }
-          else if (errorMsg.includes('model')) {
+          } else if (errorMsg.includes('model')) {
            toast.error('Failed to start model', {
-              description: errorMsg
+              description: errorMsg,
            })
          }
          // Generic server errors
          else {
            toast.error('Failed to start server', {
-              description: errorMsg
+              description: errorMsg,
            })
          }
        })
@ -294,6 +265,22 @@ function LocalAPIServerContent() {
                </div>
              }
            >
+              <CardItem
+                title={t('settings:localApiServer.runOnStartup')}
+                description={t('settings:localApiServer.runOnStartupDesc')}
+                actions={
+                  <Switch
+                    checked={enableOnStartup}
+                    onCheckedChange={(checked) => {
+                      if (!apiKey || apiKey.toString().trim().length === 0) {
+                        setShowApiKeyError(true)
+                        return
+                      }
+                      setEnableOnStartup(checked)
+                    }}
+                  />
+                }
+              />
              <CardItem
                title={t('settings:localApiServer.serverLogs')}
                description={t('settings:localApiServer.serverLogsDesc')}
@ -312,24 +299,34 @@ function LocalAPIServerContent() {
                  </Button>
                }
              />
-            </Card>

-            {/* Startup Configuration */}
-            <Card title={t('settings:localApiServer.startupConfiguration')}>
              <CardItem
-                title={t('settings:localApiServer.runOnStartup')}
-                description={t('settings:localApiServer.runOnStartupDesc')}
+                title={t('settings:localApiServer.swaggerDocs')}
+                description={t('settings:localApiServer.swaggerDocsDesc')}
                actions={
-                  <Switch
-                    checked={enableOnStartup}
-                    onCheckedChange={(checked) => {
-                      if (!apiKey || apiKey.toString().trim().length === 0) {
-                        setShowApiKeyError(true)
-                        return
-                      }
-                      setEnableOnStartup(checked)
-                    }}
-                  />
+                  <a
+                    href={`http://${serverHost}:${serverPort}`}
+                    target="_blank"
+                    rel="noopener noreferrer"
+                  >
+                    <Button
+                      asChild
+                      variant="link"
+                      size="sm"
+                      className="p-0 text-main-view-fg/80"
+                      disabled={!isServerRunning}
+                      title={t('settings:localApiServer.swaggerDocs')}
+                    >
+                      <div
+                        className={cn(
+                          'cursor-pointer flex items-center justify-center rounded-sm hover:bg-main-view-fg/15 bg-main-view-fg/10 transition-all duration-200 ease-in-out px-2 py-1 gap-1',
+                          !isServerRunning && 'opacity-50 cursor-not-allowed'
+                        )}
+                      >
+                        <span>{t('settings:localApiServer.openDocs')}</span>
+                      </div>
+                    </Button>
+                  </a>
                }
              />
            </Card>
--- a/web-app/src/utils/getModelToStart.ts
+++ b/web-app/src/utils/getModelToStart.ts
@ -0,0 +1,69 @@
+import { localStorageKey } from '@/constants/localStorage'
+import type { ModelInfo } from '@janhq/core'
+
+export const getLastUsedModel = (): {
+  provider: string
+  model: string
+} | null => {
+  try {
+    const stored = localStorage.getItem(localStorageKey.lastUsedModel)
+    return stored ? JSON.parse(stored) : null
+  } catch (error) {
+    console.debug('Failed to get last used model from localStorage:', error)
+    return null
+  }
+}
+
+// Helper function to determine which model to start
+export const getModelToStart = (params: {
+  selectedModel?: ModelInfo | null
+  selectedProvider?: string | null
+  getProviderByName: (name: string) => ModelProvider | undefined
+}): { model: string; provider: ModelProvider } | null => {
+  const { selectedModel, selectedProvider, getProviderByName } = params
+
+  // Use last used model if available
+  const lastUsedModel = getLastUsedModel()
+  if (lastUsedModel) {
+    const provider = getProviderByName(lastUsedModel.provider)
+    if (provider && provider.models.some((m) => m.id === lastUsedModel.model)) {
+      return { model: lastUsedModel.model, provider }
+    } else {
+      // Last used model not found under provider, fallback to first llamacpp model
+      const llamacppProvider = getProviderByName('llamacpp')
+      if (
+        llamacppProvider &&
+        llamacppProvider.models &&
+        llamacppProvider.models.length > 0
+      ) {
+        return {
+          model: llamacppProvider.models[0].id,
+          provider: llamacppProvider,
+        }
+      }
+    }
+  }
+
+  // Use selected model if available
+  if (selectedModel && selectedProvider) {
+    const provider = getProviderByName(selectedProvider)
+    if (provider) {
+      return { model: selectedModel.id, provider }
+    }
+  }
+
+  // Use first model from llamacpp provider
+  const llamacppProvider = getProviderByName('llamacpp')
+  if (
+    llamacppProvider &&
+    llamacppProvider.models &&
+    llamacppProvider.models.length > 0
+  ) {
+    return {
+      model: llamacppProvider.models[0].id,
+      provider: llamacppProvider,
+    }
+  }
+
+  return null
+}
--- a/web-app/src/utils/reasoning.ts
+++ b/web-app/src/utils/reasoning.ts
@ -6,10 +6,42 @@ import {
 } from '@janhq/core'

 // Helper function to get reasoning content from an object
-function getReasoning(obj: { reasoning_content?: string | null; reasoning?: string | null } | null | undefined): string | null {
+function getReasoning(
+  obj:
+    | { reasoning_content?: string | null; reasoning?: string | null }
+    | null
+    | undefined
+): string | null {
  return obj?.reasoning_content ?? obj?.reasoning ?? null
 }

+/**
+ * Normalize the content of a message by removing reasoning content.
+ * This is useful to ensure that reasoning content does not get sent to the model.
+ * @param content
+ * @returns
+ */
+export function removeReasoningContent(content: string): string {
+  // Reasoning content should not be sent to the model
+  if (content.includes('<think>')) {
+    const match = content.match(/<think>([\s\S]*?)<\/think>/)
+    if (match?.index !== undefined) {
+      const splitIndex = match.index + match[0].length
+      content = content.slice(splitIndex).trim()
+    }
+  }
+  if (content.includes('<|channel|>analysis<|message|>')) {
+    const match = content.match(
+      /<\|channel\|>analysis<\|message\|>([\s\S]*?)<\|start\|>assistant<\|channel\|>final<\|message\|>/
+    )
+    if (match?.index !== undefined) {
+      const splitIndex = match.index + match[0].length
+      content = content.slice(splitIndex).trim()
+    }
+  }
+  return content
+}
+
 // Extract reasoning from a message (for completed responses)
 export function extractReasoningFromMessage(
  message: chatCompletionRequestMessage | ChatCompletionMessage