Merge branch 'dev' into blog/add-deepresearch-piece

2025-08-07 16:34:22 +08:00 · 2025-08-07 16:34:22 +08:00 · d646856b7c
commit d646856b7c
parent e38d653f4d 6a699d8004
324 changed files with 24112 additions and 579 deletions
--- a/.github/workflows/jan-astro-docs.yml
+++ b/.github/workflows/jan-astro-docs.yml
@ -0,0 +1,98 @@
 name: Jan Astro Docs
 on:
  push:
    branches:
      - dev
    paths:
      - 'website/**'
      - '.github/workflows/jan-astro-docs.yml'
  pull_request:
    paths:
      - 'website/**'
      - '.github/workflows/jan-astro-docs.yml'
    # Review gh actions docs if you want to further define triggers, paths, etc
    # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on
  workflow_dispatch:
 jobs:
  deploy:
    name: Deploy to CloudFlare Pages
    env:
      CLOUDFLARE_PROJECT_NAME: astro-docs
    runs-on: ubuntu-latest
    permissions:
      contents: write
      deployments: write
      pull-requests: write
    steps:
      - uses: actions/checkout@v3
      - uses: actions/setup-node@v3
        with:
          node-version: 20
      - uses: oven-sh/setup-bun@v2
      - name: Install jq
        uses: dcarbone/install-jq-action@v2.0.1
      - name: Fill env vars
        continue-on-error: true
        working-directory: website
        run: |
          env_example_file=".env.example"
          touch .env
          while IFS= read -r line || [[ -n "$line" ]]; do
            if [[ "$line" == *"="* ]]; then
              var_name=$(echo $line | cut -d '=' -f 1)
              echo $var_name
              var_value="$(jq -r --arg key "$var_name" '.[$key]' <<< "$SECRETS")"
              echo "$var_name=$var_value" >> .env
            fi
          done < "$env_example_file"
        env:
          SECRETS: '${{ toJson(secrets) }}'
      - name: Install dependencies
        working-directory: website
        run: bun install
      - name: Build website
        working-directory: website
        run: bun run build
      - name: copy redirects and headers
        continue-on-error: true
        working-directory: website
        run: |
          cp _redirects dist/_redirects
          cp _headers dist/_headers
      - name: Publish to Cloudflare Pages PR Preview and Staging
        if: github.event_name == 'pull_request'
        uses: cloudflare/pages-action@v1
        with:
          apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
          accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
          projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
          directory: ./website/dist
          # Optional: Enable this if you want to have GitHub Deployments triggered
          gitHubToken: ${{ secrets.GITHUB_TOKEN }}
        id: deployCloudflarePages
      - uses: mshick/add-pr-comment@v2
        if: github.event_name == 'pull_request'
        with:
          message: |
            Preview URL Astro Docs: ${{ steps.deployCloudflarePages.outputs.url }}
      - name: Publish to Cloudflare Pages Production
        if: (github.event_name == 'push' && github.ref == 'refs/heads/dev') || (github.event_name == 'workflow_dispatch' && github.ref == 'refs/heads/dev')
        uses: cloudflare/pages-action@v1
        with:
          apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
          accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
          projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
          directory: ./website/dist
          branch: main
          # Optional: Enable this if you want to have GitHub Deployments triggered
          gitHubToken: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/jan-docs-new-release.yaml
+++ b/.github/workflows/jan-docs-new-release.yaml
@ -1,63 +0,0 @@
 name: Deploy Docs on new release
 on:
  release:
    types:
      - published
      - edited
      - released
 jobs:
  deploy:
    name: Deploy to CloudFlare Pages
    env:
      CLOUDFLARE_PROJECT_NAME: docs
    runs-on: ubuntu-latest
    permissions:
      contents: write
      deployments: write
      pull-requests: write
    steps:
      - uses: actions/checkout@v4
        with:
          ref: dev
      - uses: actions/setup-node@v3
        with:
          node-version: 20
      - name: Install jq      
        uses: dcarbone/install-jq-action@v2.0.1
      - name: Fill env vars
        working-directory: docs
        run: |
          env_example_file=".env.example"
          touch .env
          while IFS= read -r line || [[ -n "$line" ]]; do
            if [[ "$line" == *"="* ]]; then
              var_name=$(echo $line | cut -d '=' -f 1)
              echo $var_name
              var_value="$(jq -r --arg key "$var_name" '.[$key]' <<< "$SECRETS")"
              echo "$var_name=$var_value" >> .env
            fi
          done < "$env_example_file"
        env:
          SECRETS: '${{ toJson(secrets) }}'
      - name: Install dependencies
        working-directory: docs
        run: yarn install
      - name: Build website
        working-directory: docs
        run: export NODE_ENV=production && yarn build && cp _redirects out/_redirects && cp _headers out/_headers
      - name: Publish to Cloudflare Pages Production
        uses: cloudflare/pages-action@v1
        with:
          apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
          accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
          projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
          directory: ./docs/out
          branch: main
          # Optional: Enable this if you want to have GitHub Deployments triggered
          gitHubToken: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/jan-docs.yml
+++ b/.github/workflows/jan-docs.yml
@ -26,7 +26,7 @@ jobs:
      deployments: write
      pull-requests: write
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - uses: actions/setup-node@v3
        with:
          node-version: 20
--- a/.github/workflows/jan-linter-and-test.yml
+++ b/.github/workflows/jan-linter-and-test.yml
@ -1,4 +1,4 @@
-name: Test - Linter & Playwright
+name: Linter & Test
 on:
  workflow_dispatch:
  push:
--- a/.github/workflows/jan-tauri-build-nightly.yaml
+++ b/.github/workflows/jan-tauri-build-nightly.yaml
@ -20,6 +20,7 @@ on:
 jobs:
  set-public-provider:
    runs-on: ubuntu-latest
    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
    outputs:
      public_provider: ${{ steps.set-public-provider.outputs.public_provider }}
      ref: ${{ steps.set-public-provider.outputs.ref }}
@ -47,11 +48,13 @@ jobs:
          fi
  # Job create Update app version based on latest release tag with build number and save to output
  get-update-version:
    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
    uses: ./.github/workflows/template-get-update-version.yml
  build-macos:
    uses: ./.github/workflows/template-tauri-build-macos.yml
    needs: [get-update-version, set-public-provider]
    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
    secrets: inherit
    with:
      ref: ${{ needs.set-public-provider.outputs.ref }}
@ -64,6 +67,7 @@ jobs:
    uses: ./.github/workflows/template-tauri-build-windows-x64.yml
    secrets: inherit
    needs: [get-update-version, set-public-provider]
    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
    with:
      ref: ${{ needs.set-public-provider.outputs.ref }}
      public_provider: ${{ needs.set-public-provider.outputs.public_provider }}
@ -74,6 +78,7 @@ jobs:
    uses: ./.github/workflows/template-tauri-build-linux-x64.yml
    secrets: inherit
    needs: [get-update-version, set-public-provider]
    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
    with:
      ref: ${{ needs.set-public-provider.outputs.ref }}
      public_provider: ${{ needs.set-public-provider.outputs.public_provider }}
@ -91,6 +96,7 @@ jobs:
        build-macos,
      ]
    runs-on: ubuntu-latest
    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
    steps:
      - name: Getting the repo
        uses: actions/checkout@v3
@ -224,49 +230,3 @@ jobs:
          RUN_ID=${{ github.run_id }}
          COMMENT="This is the build for this pull request. You can download it from the Artifacts section here: [Build URL](https://github.com/${{ github.repository }}/actions/runs/${RUN_ID})."
          gh pr comment $PR_URL --body "$COMMENT"
  # AutoQA trigger for S3 builds
  trigger-autoqa-s3:
    needs:
      [
        build-macos,
        build-windows-x64,
        build-linux-x64,
        get-update-version,
        set-public-provider,
        sync-temp-to-latest,
      ]
    if: needs.set-public-provider.outputs.public_provider == 'aws-s3'
    uses: ./.github/workflows/autoqa-template.yml
    with:
      jan_app_windows_source: 'https://delta.jan.ai/nightly/Jan-nightly_${{ needs.get-update-version.outputs.new_version }}_x64-setup.exe'
      jan_app_ubuntu_source: 'https://delta.jan.ai/nightly/Jan-nightly_${{ needs.get-update-version.outputs.new_version }}_amd64.deb'
      jan_app_macos_source: 'https://delta.jan.ai/nightly/Jan-nightly_${{ needs.get-update-version.outputs.new_version }}_universal.dmg'
      is_nightly: true
      source_type: 'url'
    secrets:
      RP_TOKEN: ${{ secrets.RP_TOKEN }}
  # AutoQA trigger for artifact builds
  trigger-autoqa-artifacts:
    needs:
      [
        build-macos,
        build-windows-x64,
        build-linux-x64,
        get-update-version,
        set-public-provider,
      ]
    if: needs.set-public-provider.outputs.public_provider == 'none'
    uses: ./.github/workflows/autoqa-template.yml
    with:
      jan_app_windows_source: '' # Not needed for artifacts
      jan_app_ubuntu_source: '' # Not needed for artifacts
      jan_app_macos_source: '' # Not needed for artifacts
      is_nightly: true
      source_type: 'local'
      artifact_name_windows: 'jan-windows-${{ needs.get-update-version.outputs.new_version }}'
      artifact_name_ubuntu: 'jan-linux-amd64-${{ needs.get-update-version.outputs.new_version }}-deb'
      artifact_name_macos: 'jan-nightly-mac-universal-${{ needs.get-update-version.outputs.new_version }}.dmg'
    secrets:
      RP_TOKEN: ${{ secrets.RP_TOKEN }}
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@ -1,5 +0,0 @@
 {
    "recommendations": [
        "esbenp.prettier-vscode"
    ]
 }
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -1,7 +0,0 @@
 {
  "editor.defaultFormatter": "esbenp.prettier-vscode",
  "editor.formatOnSave": true,
  "[rust]": {
    "editor.defaultFormatter": "rust-lang.rust-analyzer"
  }
 }
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # Jan - Local AI Assistant
-![Jan banner](./JanBanner.png)
+![Jan AI](docs/src/pages/docs/_assets/jan-app.png)
 <p align="center">
  <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
@ -12,62 +12,50 @@
 </p>
 <p align="center">
-  <a href="https://jan.ai/docs/quickstart">Getting Started</a> 
+  <a href="https://jan.ai/docs/quickstart">Getting Started</a>
-  - <a href="https://jan.ai/docs">Docs</a> 
+  - <a href="https://jan.ai/docs">Docs</a>
-  - <a href="https://jan.ai/changelog">Changelog</a> 
+  - <a href="https://jan.ai/changelog">Changelog</a>
-  - <a href="https://github.com/menloresearch/jan/issues">Bug reports</a> 
+  - <a href="https://github.com/menloresearch/jan/issues">Bug reports</a>
  - <a href="https://discord.gg/AsJ8krTT3N">Discord</a>
 </p>
-Jan is a ChatGPT-alternative that runs 100% offline on your device. Our goal is to make it easy for a layperson to download and run LLMs and use AI with **full control** and **privacy**.
+Jan is an AI assistant that can run 100% offline on your device. Download and run LLMs with
-
+**full control** and **privacy**.
 **⚠️ Jan is in active development.**
 ## Installation
-Because clicking a button is still the easiest way to get started:
+The easiest way to get started is by downloading one of the following versions for your respective operating system:
 <table>
  <tr>
    <td><b>Platform</b></td>
    <td><b>Stable</b></td>
    <td><b>Beta</b></td>
    <td><b>Nightly</b></td>
  </tr>
  <tr>
    <td><b>Windows</b></td>
    <td><a href='https://app.jan.ai/download/latest/win-x64'>jan.exe</a></td>
    <td><a href='https://app.jan.ai/download/beta/win-x64'>jan.exe</a></td>
    <td><a href='https://app.jan.ai/download/nightly/win-x64'>jan.exe</a></td>
  </tr>
  <tr>
    <td><b>macOS</b></td>
    <td><a href='https://app.jan.ai/download/latest/mac-universal'>jan.dmg</a></td>
    <td><a href='https://app.jan.ai/download/beta/mac-universal'>jan.dmg</a></td>
    <td><a href='https://app.jan.ai/download/nightly/mac-universal'>jan.dmg</a></td>
  </tr>
  <tr>
    <td><b>Linux (deb)</b></td>
    <td><a href='https://app.jan.ai/download/latest/linux-amd64-deb'>jan.deb</a></td>
    <td><a href='https://app.jan.ai/download/beta/linux-amd64-deb'>jan.deb</a></td>
    <td><a href='https://app.jan.ai/download/nightly/linux-amd64-deb'>jan.deb</a></td>
  </tr>
  <tr>
    <td><b>Linux (AppImage)</b></td>
    <td><a href='https://app.jan.ai/download/latest/linux-amd64-appimage'>jan.AppImage</a></td>
    <td><a href='https://app.jan.ai/download/beta/linux-amd64-appimage'>jan.AppImage</a></td>
    <td><a href='https://app.jan.ai/download/nightly/linux-amd64-appimage'>jan.AppImage</a></td>
  </tr>
 </table>
 Download from [jan.ai](https://jan.ai/) or [GitHub Releases](https://github.com/menloresearch/jan/releases).
 ## Demo
 <video width="100%" controls>
  <source src="./docs/public/assets/videos/enable-tool-call-for-models.mp4" type="video/mp4">
  Your browser does not support the video tag.
 </video>
 ## Features
@ -149,13 +137,12 @@ For detailed compatibility, check our [installation guides](https://jan.ai/docs/
 ## Troubleshooting
-When things go sideways (they will):
+If things go sideways:
 1. Check our [troubleshooting docs](https://jan.ai/docs/troubleshooting)
 2. Copy your error logs and system specs
 3. Ask for help in our [Discord](https://discord.gg/FTk2MvZwJH) `#🆘|jan-help` channel
 We keep logs for 24 hours, so don't procrastinate on reporting issues.
 ## Contributing
@ -175,15 +162,6 @@ Contributions welcome. See [CONTRIBUTING.md](CONTRIBUTING.md) for the full spiel
 - **Jobs**: hr@jan.ai
 - **General Discussion**: [Discord](https://discord.gg/FTk2MvZwJH)
 ## Trust & Safety
 **Friendly reminder**: We're not trying to scam you.
 - We won't ask for personal information
 - Jan is completely free (no premium version exists)
 - We don't have a cryptocurrency or ICO
 - We're bootstrapped and not seeking your investment (yet)
 ## License
 Apache 2.0 - Because sharing is caring.
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@ -132,6 +132,12 @@ export abstract class BaseExtension implements ExtensionType {
            setting.controllerProps.options = setting.controllerProps.options?.length
              ? setting.controllerProps.options
              : oldSettings.find((e: any) => e.key === setting.key)?.controllerProps?.options
          if ('recommended' in setting.controllerProps) {
            const oldRecommended = oldSettings.find((e: any) => e.key === setting.key)?.controllerProps?.recommended
            if (oldRecommended !== undefined && oldRecommended !== "") {
              setting.controllerProps.recommended = oldRecommended
            }
          }
        })
      }
      localStorage.setItem(this.name, JSON.stringify(settings))
--- a/core/src/types/setting/settingComponent.ts
+++ b/core/src/types/setting/settingComponent.ts
@ -60,4 +60,5 @@ export type DropdownComponentProps = {
  value: string
  type?: InputType
  options?: DropdownOption[]
  recommended?: string
 }
--- a/docs/src/pages/docs/_assets/hf_hub.png
+++ b/docs/src/pages/docs/_assets/hf_hub.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_2.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_2.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_3.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_3.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_4.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_4.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_5.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_5.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_6.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_6.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_7.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_7.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_8.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_8.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_9.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_9.png
--- a/docs/src/pages/docs/_assets/hf_jan_setup.png
+++ b/docs/src/pages/docs/_assets/hf_jan_setup.png
--- a/docs/src/pages/docs/_assets/hf_providers.png
+++ b/docs/src/pages/docs/_assets/hf_providers.png
--- a/docs/src/pages/docs/remote-models/_meta.json
+++ b/docs/src/pages/docs/remote-models/_meta.json
@ -26,5 +26,9 @@
  "openrouter": {
    "title": "OpenRouter",
    "href": "/docs/remote-models/openrouter"
  },
  "huggingface": {
    "title": "Hugging Face",
    "href": "/docs/remote-models/huggingface"
  }
 }
--- a/docs/src/pages/docs/remote-models/huggingface.mdx
+++ b/docs/src/pages/docs/remote-models/huggingface.mdx
@ -0,0 +1,152 @@
 ---
 title: Hugging Face
 description: Learn how to integrate Hugging Face models with Jan using the Router or Inference Endpoints.
 keywords:
  [
    Hugging Face,
    Jan,
    Jan AI,
    Hugging Face Router,
    Hugging Face Inference Endpoints,
    Hugging Face API,
    Hugging Face Integration,
    Hugging Face API Integration
  ]
 ---
 import { Callout, Steps } from 'nextra/components'
 import { Settings, Plus } from 'lucide-react'
 # Hugging Face
 Jan supports Hugging Face models through two methods: the new **HF Router** (recommended) and **Inference Endpoints**. Both methods require a Hugging Face token and **billing to be set up**.
 ![HuggingFace Inference Providers](../_assets/hf_providers.png)
 ## Option 1: HF Router (Recommended)
 The HF Router provides access to models from multiple providers (Replicate, Together AI, SambaNova, Fireworks, Cohere, and more) through a single endpoint.
 <Steps>
 ### Step 1: Get Your HF Token
 Visit [Hugging Face Settings > Access Tokens](https://huggingface.co/settings/tokens) and create a token. Make sure you have billing set up on your account.
 ### Step 2: Configure Jan
 1. Go to **Settings** > **Model Providers** > **HuggingFace**
 2. Enter your HF token
 3. Use this URL: `https://router.huggingface.co/v1`
 ![Jan HF Setup](../_assets/hf_jan_setup.png)
 You can find out more about the HF Router [here](https://huggingface.co/docs/inference-providers/index).
 ### Step 3: Start Using Models
 Jan comes with three HF Router models pre-configured. Select one and start chatting immediately.
 </Steps>
 <Callout type='info'>
 The HF Router automatically routes your requests to the best available provider for each model, giving you access to a wide variety of models without managing individual endpoints.
 </Callout>
 ## Option 2: HF Inference Endpoints
 For more control over specific models and deployment configurations, you can use Hugging Face Inference Endpoints.
 <Steps>
 ### Step 1: Navigate to the HuggingFace Model Hub
 Visit the [Hugging Face Model Hub](https://huggingface.co/models) (make sure you are logged in) and pick the model you want to use.
 ![HuggingFace Model Hub](../_assets/hf_hub.png)
 ### Step 2: Configure HF Inference Endpoint and Deploy
 After you have selected the model you want to use, click on the **Deploy** button and select a deployment method. We will select HF Inference Endpoints for this one.
 ![HuggingFace Deployment](../_assets/hf_jan_nano.png)
 <br/>
 This will take you to the deployment set up page. For this example, we will leave the default settings as they are under the GPU tab and click on **Create Endpoint**.
 ![HuggingFace Deployment](../_assets/hf_jan_nano_2.png)
 <br/>
 Once your endpoint is ready, test that it works on the **Test your endpoint** tab.
 ![HuggingFace Deployment](../_assets/hf_jan_nano_3.png)
 <br/>
 If you get a response, you can click on **Copy** to copy the endpoint URL and API key.
 <Callout type='info'>
  You will need to be logged into the HuggingFace Inference Endpoints and have a credit card on file to deploy a model.
 </Callout>
 ### Step 3: Configure Jan
 If you do not have an API key you can create one under **Settings** > **Access Tokens** [here](https://huggingface.co/settings/tokens). Once you finish, copy the token and add it to Jan alongside your endpoint URL at **Settings** > **Model Providers** > **HuggingFace**.
 **3.1 HF Token**
 ![Get Token](../_assets/hf_jan_nano_5.png)
 <br/>
 **3.2 HF Endpoint URL**
 ![Endpoint URL](../_assets/hf_jan_nano_4.png)
 <br/>
 **3.3 Jan Settings**
 ![Jan Settings](../_assets/hf_jan_nano_6.png)
 <Callout type='warning'>
 Make sure to add `/v1/` to the end of your endpoint URL. This is required by the OpenAI API.
 </Callout>
 **3.4 Add Model Details**
 ![Add Model Details](../_assets/hf_jan_nano_7.png)
 ### Step 4: Start Using the Model
 Now you can start using the model in any chat.
 ![Start Using the Model](../_assets/hf_jan_nano_8.png)
 If you want to learn how to use Jan Nano with MCP, check out [the guide here](../jan-models/jan-nano-32).
 <br/>
 </Steps>
 ## Available Hugging Face Models
 **Option 1 (HF Router):** Access to models from multiple providers as shown in the providers image above.
 **Option 2 (Inference Endpoints):** You can follow the steps above with a large amount of models on Hugging Face and bring them to Jan. Check out other models in the [Hugging Face Model Hub](https://huggingface.co/models).
 ## Troubleshooting
 Common issues and solutions:
 **1. Started a chat but the model is not responding**
 - Verify your API_KEY/HF_TOKEN is correct and not expired
 - Ensure you have billing set up on your HF account
 - For Inference Endpoints: Ensure the model you're trying to use is running again since, after a while, they go idle so that you don't get charged when you are not using it
 ![Model Running](../_assets/hf_jan_nano_9.png)
 **2. Connection Problems**
 - Check your internet connection
 - Verify Hugging Face's system status
 - Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
 **3. Model Unavailable**
 - Confirm your API key has access to the model
 - Check if you're using the correct model ID
 - Verify your Hugging Face account has the necessary permissions
 Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the
 [Hugging Face's documentation](https://docs.huggingface.co/en/inference-endpoints/index).
--- a/docs/src/pages/platforms/_meta.json
+++ b/docs/src/pages/platforms/_meta.json
@ -1,9 +0,0 @@
 {
  "-- Switcher": {
    "type": "separator",
    "title": "Switcher"
  },
  "index": {
    "display": "hidden"
  }
 }
--- a/docs/src/pages/platforms/index.mdx
+++ b/docs/src/pages/platforms/index.mdx
@ -1,87 +0,0 @@
 ---
 title: Coming Soon
 description: Exciting new features and platforms are on the way. Stay tuned for Jan Web, Jan Mobile, and our API Platform.
 keywords:
  [
    Jan,
    Customizable Intelligence, LLM,
    local AI,
    privacy focus,
    free and open source,
    private and offline,
    conversational AI,
    no-subscription fee,
    large language models,
    coming soon,
    Jan Web,
    Jan Mobile,
    API Platform,
  ]
 ---
 import { Callout } from 'nextra/components'
 <div className="text-center py-12">
  <div className="mb-8">
    <h1 className="text-4xl font-bold bg-gradient-to-r from-blue-600 to-purple-600 bg-clip-text text-transparent mb-4 py-2">
      🚀 Coming Soon
    </h1>
    <p className="text-xl text-gray-600 dark:text-gray-300 max-w-2xl mx-auto">
      We're working on the next stage of Jan - making our local assistant more powerful and available in more platforms.
    </p>
  </div>
  <div className="grid grid-cols-1 md:grid-cols-3 gap-6 max-w-4xl mx-auto mb-12">
    <div className="p-6 border border-gray-200 dark:border-gray-700 rounded-lg bg-gradient-to-br from-blue-50 to-indigo-50 dark:from-blue-900/20 dark:to-indigo-900/20">
      <div className="text-3xl mb-3">🌐</div>
      <h3 className="text-lg font-semibold mb-2">Jan Web</h3>
      <p className="text-sm text-gray-600 dark:text-gray-400">
        Access Jan directly from your browser with our powerful web interface
      </p>
    </div>
    <div className="p-6 border border-gray-200 dark:border-gray-700 rounded-lg bg-gradient-to-br from-green-50 to-emerald-50 dark:from-green-900/20 dark:to-emerald-900/20">
      <div className="text-3xl mb-3">📱</div>
      <h3 className="text-lg font-semibold mb-2">Jan Mobile</h3>
      <p className="text-sm text-gray-600 dark:text-gray-400">
        Take Jan on the go with our native mobile applications
      </p>
    </div>
    <div className="p-6 border border-gray-200 dark:border-gray-700 rounded-lg bg-gradient-to-br from-purple-50 to-pink-50 dark:from-purple-900/20 dark:to-pink-900/20">
      <div className="text-3xl mb-3">⚡</div>
      <h3 className="text-lg font-semibold mb-2">Jan Server</h3>
      <p className="text-sm text-gray-600 dark:text-gray-400">
        Integrate Jan's capabilities into your applications with our API
      </p>
    </div>
  </div>
  <Callout type="info">
    **Stay Updated**: Follow our [GitHub repository](https://github.com/menloresearch/jan) and join our [Discord community](https://discord.com/invite/FTk2MvZwJH) for the latest updates on these exciting releases!
  </Callout>
  <div className="mt-12">
    <h2 className="text-2xl font-semibold mb-6">What to Expect</h2>
    <div className="text-left max-w-2xl mx-auto space-y-4">
      <div className="flex items-start gap-3">
        <span className="text-green-500 text-xl">✓</span>
        <div>
          <strong>Seamless Experience:</strong> Unified interface across all platforms
        </div>
      </div>
      <div className="flex items-start gap-3">
        <span className="text-green-500 text-xl">✓</span>
        <div>
          <strong>Privacy First:</strong> Same privacy-focused approach you trust
        </div>
      </div>
      <div className="flex items-start gap-3">
        <span className="text-green-500 text-xl">✓</span>
        <div>
          <strong>Developer Friendly:</strong> Robust APIs and comprehensive documentation
        </div>
      </div>
    </div>
  </div>
 </div>
--- a/docs/src/pages/post/_assets/gpt-oss
+++ b/docs/src/pages/post/_assets/gpt-oss
--- a/docs/src/pages/post/_assets/jan
+++ b/docs/src/pages/post/_assets/jan
--- a/docs/src/pages/post/_assets/jan
+++ b/docs/src/pages/post/_assets/jan
--- a/docs/src/pages/post/_assets/run
+++ b/docs/src/pages/post/_assets/run
--- a/docs/src/pages/post/run-gpt-oss-locally.mdx
+++ b/docs/src/pages/post/run-gpt-oss-locally.mdx
@ -0,0 +1,211 @@
 ---
 title: "Run OpenAI's gpt-oss locally in 5 mins (Beginner Guide)"
 description: "Complete 5-minute beginner guide to running OpenAI's gpt-oss locally. Step-by-step setup with Jan AI for private, offline AI conversations."
 tags: OpenAI, gpt-oss, local AI, Jan, privacy, Apache-2.0, llama.cpp, Ollama, LM Studio
 categories: guides
 date: 2025-08-06
 ogImage: assets/gpt-oss%20locally.jpeg   
 twitter:
  card: summary_large_image
  site: "@jandotai"
  title: "Run OpenAI's gpt-oss Locally in 5 Minutes (Beginner Guide)"
  description: "Complete 5-minute beginner guide to running OpenAI's gpt-oss locally with Jan AI for private, offline conversations."
  image: assets/gpt-oss%20locally.jpeg
 ---
 import { Callout } from 'nextra/components'
 import CTABlog from '@/components/Blog/CTA'
 # Run OpenAI's gpt-oss Locally in 5 mins
 ![gpt-oss running locally in Jan interface](./_assets/gpt-oss%20locally.jpeg)
 OpenAI launched [gpt-oss](https://openai.com/index/introducing-gpt-oss/), marking their return to open-source AI after GPT-2. This model is designed to run locally on consumer hardware. This guide shows you how to install and run gpt-oss on your computer for private, offline AI conversations.
 ## What is gpt-oss?
 gpt-oss is OpenAI's open-source large language model, released under the Apache-2.0 license. Unlike ChatGPT, gpt-oss:
 - Runs completely offline - No internet required after setup
 - 100% private - Your conversations never leave your device  
 - Unlimited usage - No token limits or rate limiting
 - Free forever - No subscription fees
 - Commercial use allowed - Apache-2.0 license permits business use
 Running AI models locally means everything happens on your own hardware, giving you complete control over your data and conversations.
 ## gpt-oss System Requirements
 | Component | Minimum | Recommended |
 |-----------|---------|-------------|
 | **RAM** | 16 GB | 32 GB+ |
 | **Storage** | 11+ GB free | 25 GB+ free |
 | **CPU** | 4 cores | 8+ cores |
 | **GPU** | Optional | Modern GPU with 6GB+ VRAM recommended |
 | **OS** | Windows 10+, macOS 11+, Linux | Latest versions |
 **Installation apps available:**
 - **Jan** (Recommended - easiest setup) 
 - **llama.cpp** (Command line)
 - **Ollama** (Docker-based)
 - **LM Studio** (GUI alternative)
 ## How to install gpt-oss locally with Jan (5 mins)
 ### Step 1: Download Jan
 First download Jan to run gpt-oss locally: [Download Jan AI](https://jan.ai/)
 <Callout type="info">
 Jan is the simplest way to run AI models locally. It automatically handles CPU/GPU optimization, provides a clean chat interface, and requires zero technical knowledge.
 </Callout>
 ### Step 2: Install gpt-oss Model (2-3 minutes)
 ![Jan Hub showing gpt-oss model in the hub](./_assets/jan%20hub%20gpt-oss%20locally.jpeg)
 1. Open Jan Hub -> search "gpt-oss" (it appears at the top)
 2. Click Download and wait for completion (~11GB download)
 3. Installation is automatic - Jan handles everything
 ### Step 3: Start using gpt-oss offline (30 seconds)
 ![Jan interface with gpt-oss model selected and ready to chat](./_assets/jan%20gpt-oss.jpeg)
 1. Go to New Chat → select gpt-oss-20b from model picker
 2. Start chatting - Jan automatically optimizes for your hardware
 3. You're done! Your AI conversations now stay completely private
 Success: Your gpt-oss setup is complete. No internet required for chatting, unlimited usage, zero subscription fees.
 ## Jan with gpt-oss vs ChatGPT vs other Local AI Models
 | Feature | gpt-oss (Local) | ChatGPT Plus | Claude Pro | Other Local Models |
 |---------|----------------|--------------|------------|-------------------|
 | Cost | Free forever | $20/month | $20/month | Free |
 | Privacy | 100% private | Data sent to OpenAI | Data sent to Anthropic | 100% private |
 | Internet | Offline after setup | Requires internet | Requires internet | Offline |
 | Usage limits | Unlimited | Rate limited | Rate limited | Unlimited |
 | Performance | Good (hardware dependent) | Excellent | Excellent | Varies |
 | Setup difficulty | Easy with Jan | None | None | Varies |
 ## Alternative Installation Methods
 ### Option 1: Jan (Recommended)
 - Best for: Complete beginners, users wanting GUI interface
 - Setup time: 5 minutes
 - Difficulty: Very Easy
 Already covered above - [Download Jan](https://jan.ai/)
 ### Option 2: llama.cpp (Command Line)
 - Best for: Developers, terminal users, custom integrations  
 - Setup time: 10-15 minutes  
 - Difficulty: Intermediate
 ```bash
 # macOS
 brew install llama-cpp
 # Windows: grab Windows exe from releases
 curl -L -o gpt-oss-20b.gguf https://huggingface.co/openai/gpt-oss-20b-gguf/resolve/main/gpt-oss-20b.gguf
 ./main -m gpt-oss-20b.gguf --chat-simple
 # Add GPU acceleration (adjust -ngl value based on your GPU VRAM)
 ./main -m gpt-oss-20b.gguf --chat-simple -ngl 20
 ```
 ### Option 3: Ollama (Docker-Based)
 Best for: Docker users, server deployments  
 Setup time: 5-10 minutes  
 Difficulty: Intermediate
 ```bash
 # Install from https://ollama.com
 ollama run gpt-oss:20b
 ```
 ### Option 4: LM Studio (GUI Alternative)
 Best for: Users wanting GUI but not Jan  
 Setup time: 10 minutes  
 Difficulty: Easy
 1. Download LM Studio from official website
 2. Go to Models → search "gpt-oss-20b (GGUF)"
 3. Download the model (wait for completion)
 4. Go to Chat tab → select the model and start chatting
 ## gpt-oss Performance & Troubleshooting
 ### Expected Performance Benchmarks
 | Hardware Setup | First Response | Subsequent Responses | Tokens/Second |
 |---------------|---------------|---------------------|---------------|
 | **16GB RAM + CPU only** | 30-45 seconds | 3-6 seconds | 3-8 tokens/sec |
 | **32GB RAM + RTX 3060** | 15-25 seconds | 1-3 seconds | 15-25 tokens/sec |
 | **32GB RAM + RTX 4080+** | 8-15 seconds | 1-2 seconds | 25-45 tokens/sec |
 ### Common Issues & Solutions
 Performance optimization tips:
 - First response is slow: Normal - kernels compile once, then speed up dramatically
 - Out of VRAM error: Reduce context length in settings or switch to CPU mode
 - Out of memory: Close memory-heavy apps (Chrome, games, video editors)
 - Slow responses: Check if other apps are using GPU/CPU heavily
 Quick fixes:
 1. Restart Jan if responses become slow
 2. Lower context window from 4096 to 2048 tokens
 3. Enable CPU mode if GPU issues persist
 4. Free up RAM by closing unused applications
 ## Frequently Asked Questions (FAQ)
 ### Is gpt-oss completely free?
 Yes! gpt-oss is 100% free under Apache-2.0 license. No subscription fees, no token limits, no hidden costs.
 ### How much internet data does gpt-oss use?
 Only for the initial 11GB download. After installation, gpt-oss works completely offline with zero internet usage.
 ### Can I use gpt-oss for commercial projects?
 Absolutely! The Apache-2.0 license permits commercial use, modification, and distribution.
 ### Is gpt-oss better than ChatGPT?
 gpt-oss offers different advantages: complete privacy, unlimited usage, offline capability, and no costs. ChatGPT may have better performance but requires internet and subscriptions.
 ### What happens to my conversations with gpt-oss?
 Your conversations stay 100% on your device. Nothing is sent to OpenAI, Jan, or any external servers.
 ### Can I run gpt-oss on a Mac with 8GB RAM?
 No, gpt-oss requires minimum 16GB RAM. Consider upgrading your RAM or using cloud-based alternatives.
 ### How do I update gpt-oss to newer versions?
 Jan automatically notifies you of updates. Simply click update in Jan Hub when new versions are available.
 ## Why Choose gpt-oss Over ChatGPT Plus?
 gpt-oss advantages:
 - $0/month vs $20/month for ChatGPT Plus
 - 100% private - no data leaves your device  
 - Unlimited usage - no rate limits or restrictions
 - Works offline - no internet required after setup
 - Commercial use allowed - build businesses with it
 When to choose ChatGPT Plus instead:
 - You need the absolute best performance
 - You don't want to manage local installation
 - You have less than 16GB RAM
 ## Get started with gpt-oss today
 ![gpt-oss running locally with complete privacy](./_assets/run%20gpt-oss%20locally%20in%20jan.jpeg)
 Ready to try gpt-oss?
 - Download Jan: [https://jan.ai/](https://jan.ai/)
 - View source code: [https://github.com/menloresearch/jan](https://github.com/menloresearch/jan)
 - Need help? Check our [local AI guide](/post/run-ai-models-locally) for beginners
 <CTABlog />
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@ -6,9 +6,11 @@
    "controllerType": "dropdown",
    "controllerProps": {
      "value": "none",
-      "options": []
+      "options": [],
      "recommended": ""
    }
  },
  {
    "key": "auto_update_engine",
    "title": "Auto update engine",
@ -23,18 +25,6 @@
    "controllerType": "checkbox",
    "controllerProps": { "value": true }
  },
  {
    "key": "chat_template",
    "title": "Custom Jinja Chat template",
    "description": "Custom Jinja chat_template to be used for the model",
    "controllerType": "input",
    "controllerProps": {
      "value": "",
      "placeholder": "e.g., {% for message in messages %}...{% endfor %} (default is read from GGUF)",
      "type": "text",
      "textAlign": "right"
    }
  },
  {
    "key": "threads",
    "title": "Threads",
--- a/extensions/llamacpp-extension/src/backend.ts
+++ b/extensions/llamacpp-extension/src/backend.ts
@ -27,8 +27,18 @@ export async function listSupportedBackends(): Promise<
    if (features.avx) supportedBackends.push('win-avx-x64')
    if (features.avx2) supportedBackends.push('win-avx2-x64')
    if (features.avx512) supportedBackends.push('win-avx512-x64')
-    if (features.cuda11) supportedBackends.push('win-avx2-cuda-cu11.7-x64')
+    if (features.cuda11) {
-    if (features.cuda12) supportedBackends.push('win-avx2-cuda-cu12.0-x64')
+      if (features.avx512) supportedBackends.push('win-avx512-cuda-cu11.7-x64')
      else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu11.7-x64')
      else if (features.avx) supportedBackends.push('win-avx-cuda-cu11.7-x64')
      else supportedBackends.push('win-noavx-cuda-cu11.7-x64')
    }
    if (features.cuda12) {
      if (features.avx512) supportedBackends.push('win-avx512-cuda-cu12.0-x64')
      else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu12.0-x64')
      else if (features.avx) supportedBackends.push('win-avx-cuda-cu12.0-x64')
      else supportedBackends.push('win-noavx-cuda-cu12.0-x64')
    }
    if (features.vulkan) supportedBackends.push('win-vulkan-x64')
  }
  // not available yet, placeholder for future
@ -39,8 +49,22 @@ export async function listSupportedBackends(): Promise<
    if (features.avx) supportedBackends.push('linux-avx-x64')
    if (features.avx2) supportedBackends.push('linux-avx2-x64')
    if (features.avx512) supportedBackends.push('linux-avx512-x64')
-    if (features.cuda11) supportedBackends.push('linux-avx2-cuda-cu11.7-x64')
+    if (features.cuda11) {
-    if (features.cuda12) supportedBackends.push('linux-avx2-cuda-cu12.0-x64')
+      if (features.avx512)
        supportedBackends.push('linux-avx512-cuda-cu11.7-x64')
      else if (features.avx2)
        supportedBackends.push('linux-avx2-cuda-cu11.7-x64')
      else if (features.avx) supportedBackends.push('linux-avx-cuda-cu11.7-x64')
      else supportedBackends.push('linux-noavx-cuda-cu11.7-x64')
    }
    if (features.cuda12) {
      if (features.avx512)
        supportedBackends.push('linux-avx512-cuda-cu12.0-x64')
      else if (features.avx2)
        supportedBackends.push('linux-avx2-cuda-cu12.0-x64')
      else if (features.avx) supportedBackends.push('linux-avx-cuda-cu12.0-x64')
      else supportedBackends.push('linux-noavx-cuda-cu12.0-x64')
    }
    if (features.vulkan) supportedBackends.push('linux-vulkan-x64')
  }
  // not available yet, placeholder for future
@ -236,10 +260,16 @@ async function _getSupportedFeatures() {
      if (compareVersions(driverVersion, minCuda12DriverVersion) >= 0)
        features.cuda12 = true
    }
-
+    // Vulkan support check - only discrete GPUs with 6GB+ VRAM
-    if (gpuInfo.vulkan_info?.api_version) features.vulkan = true
+    if (
      gpuInfo.vulkan_info?.api_version &&
      gpuInfo.vulkan_info?.device_type === 'DISCRETE_GPU' &&
      gpuInfo.total_memory >= 6 * 1024
    ) {
      // 6GB (total_memory is in MB)
      features.vulkan = true
    }
  }
  return features
 }
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -39,6 +39,7 @@ type LlamacppConfig = {
  auto_unload: boolean
  chat_template: string
  n_gpu_layers: number
  override_tensor_buffer_t: string
  ctx_size: number
  threads: number
  threads_batch: number
@ -144,7 +145,6 @@ export default class llamacpp_extension extends AIEngine {
  readonly providerId: string = 'llamacpp'
  private config: LlamacppConfig
  private activeSessions: Map<number, SessionInfo> = new Map()
  private providerPath!: string
  private apiSecret: string = 'JustAskNow'
  private pendingDownloads: Map<string, Promise<void>> = new Map()
@ -297,6 +297,12 @@ export default class llamacpp_extension extends AIEngine {
          return { value: key, name: key }
        })
        // Set the recommended backend based on bestAvailableBackendString
        if (bestAvailableBackendString) {
          backendSetting.controllerProps.recommended =
            bestAvailableBackendString
        }
        const savedBackendSetting = await this.getSetting<string>(
          'version_backend',
          originalDefaultBackendValue
@ -357,9 +363,16 @@ export default class llamacpp_extension extends AIEngine {
      // Handle fresh installation case where version_backend might be 'none' or invalid
      if (
-        !effectiveBackendString ||
+        (!effectiveBackendString ||
-        effectiveBackendString === 'none' ||
+          effectiveBackendString === 'none' ||
-        !effectiveBackendString.includes('/')
+          !effectiveBackendString.includes('/') ||
          // If the selected backend is not in the list of supported backends
          // Need to reset too
          !version_backends.some(
            (e) => `${e.version}/${e.backend}` === effectiveBackendString
          )) &&
        // Ensure we have a valid best available backend
        bestAvailableBackendString
      ) {
        effectiveBackendString = bestAvailableBackendString
        logger.info(
@ -380,6 +393,17 @@ export default class llamacpp_extension extends AIEngine {
          })
        )
        logger.info(`Updated UI settings to show: ${effectiveBackendString}`)
        // Emit for updating fe
        if (events && typeof events.emit === 'function') {
          logger.info(
            `Emitting settingsChanged event for version_backend with value: ${effectiveBackendString}`
          )
          events.emit('settingsChanged', {
            key: 'version_backend',
            value: effectiveBackendString,
          })
        }
      }
      // Download and install the backend if not already present
@ -746,16 +770,6 @@ export default class llamacpp_extension extends AIEngine {
  override async onUnload(): Promise<void> {
    // Terminate all active sessions
    for (const [_, sInfo] of this.activeSessions) {
      try {
        await this.unload(sInfo.model_id)
      } catch (error) {
        logger.error(`Failed to unload model ${sInfo.model_id}:`, error)
      }
    }
    // Clear the sessions map
    this.activeSessions.clear()
  }
  onSettingUpdate<T>(key: string, value: T): void {
@ -1079,67 +1093,13 @@ export default class llamacpp_extension extends AIEngine {
   * Function to find a random port
   */
  private async getRandomPort(): Promise<number> {
-    const MAX_ATTEMPTS = 20000
+    try {
-    let attempts = 0
+      const port = await invoke<number>('get_random_port')
-
+      return port
-    while (attempts < MAX_ATTEMPTS) {
+    } catch {
-      const port = Math.floor(Math.random() * 1000) + 3000
+      logger.error('Unable to find a suitable port')
-
+      throw new Error('Unable to find a suitable port for model')
      const isAlreadyUsed = Array.from(this.activeSessions.values()).some(
        (info) => info.port === port
      )
      if (!isAlreadyUsed) {
        const isAvailable = await invoke<boolean>('is_port_available', { port })
        if (isAvailable) return port
      }
      attempts++
    }
    throw new Error('Failed to find an available port for the model to load')
  }
  private async sleep(ms: number): Promise<void> {
    return new Promise((resolve) => setTimeout(resolve, ms))
  }
  private async waitForModelLoad(
    sInfo: SessionInfo,
    timeoutMs = 240_000
  ): Promise<void> {
    await this.sleep(500) // Wait before first check
    const start = Date.now()
    while (Date.now() - start < timeoutMs) {
      try {
        const res = await fetch(`http://localhost:${sInfo.port}/health`)
        if (res.status === 503) {
          const body = await res.json()
          const msg = body?.error?.message ?? 'Model loading'
          logger.info(`waiting for model load... (${msg})`)
        } else if (res.ok) {
          const body = await res.json()
          if (body.status === 'ok') {
            return
          } else {
            logger.warn('Unexpected OK response from /health:', body)
          }
        } else {
          logger.warn(`Unexpected status ${res.status} from /health`)
        }
      } catch (e) {
        await this.unload(sInfo.model_id)
        throw new Error(`Model appears to have crashed: ${e}`)
      }
      await this.sleep(800) // Retry interval
    }
    await this.unload(sInfo.model_id)
    throw new Error(
      `Timed out loading model after ${timeoutMs}... killing llamacpp`
    )
  }
  override async load(
@ -1147,7 +1107,7 @@ export default class llamacpp_extension extends AIEngine {
    overrideSettings?: Partial<LlamacppConfig>,
    isEmbedding: boolean = false
  ): Promise<SessionInfo> {
-    const sInfo = this.findSessionByModel(modelId)
+    const sInfo = await this.findSessionByModel(modelId)
    if (sInfo) {
      throw new Error('Model already loaded!!')
    }
@ -1238,6 +1198,14 @@ export default class llamacpp_extension extends AIEngine {
    args.push('--jinja')
    args.push('--reasoning-format', 'none')
    args.push('-m', modelPath)
    // For overriding tensor buffer type, useful where
    // massive MOE models can be made faster by keeping attention on the GPU
    // and offloading the expert FFNs to the CPU.
    // This is an expert level settings and should only be used by people
    // who knows what they are doing.
    // Takes a regex with matching tensor name as input
    if (cfg.override_tensor_buffer_t)
      args.push('--override-tensor', cfg.override_tensor_buffer_t)
    args.push('-a', modelId)
    args.push('--port', String(port))
    if (modelConfig.mmproj_path) {
@ -1247,11 +1215,6 @@ export default class llamacpp_extension extends AIEngine {
      ])
      args.push('--mmproj', mmprojPath)
    }
    if (cfg.ctx_size !== undefined) {
      args.push('-c', String(cfg.ctx_size))
    }
    // Add remaining options from the interface
    if (cfg.chat_template) args.push('--chat-template', cfg.chat_template)
    const gpu_layers =
@ -1263,8 +1226,9 @@ export default class llamacpp_extension extends AIEngine {
    if (cfg.batch_size > 0) args.push('--batch-size', String(cfg.batch_size))
    if (cfg.ubatch_size > 0) args.push('--ubatch-size', String(cfg.ubatch_size))
    if (cfg.device.length > 0) args.push('--device', cfg.device)
-    if (cfg.split_mode.length > 0) args.push('--split-mode', cfg.split_mode)
+    if (cfg.split_mode.length > 0 && cfg.split_mode != 'layer')
-    if (cfg.main_gpu !== undefined)
+      args.push('--split-mode', cfg.split_mode)
    if (cfg.main_gpu !== undefined && cfg.main_gpu != 0)
      args.push('--main-gpu', String(cfg.main_gpu))
    // Boolean flags
@ -1280,19 +1244,26 @@ export default class llamacpp_extension extends AIEngine {
    } else {
      if (cfg.ctx_size > 0) args.push('--ctx-size', String(cfg.ctx_size))
      if (cfg.n_predict > 0) args.push('--n-predict', String(cfg.n_predict))
-      args.push('--cache-type-k', cfg.cache_type_k)
+      if (cfg.cache_type_k && cfg.cache_type_k != 'f16')
        args.push('--cache-type-k', cfg.cache_type_k)
      if (
-        (cfg.flash_attn && cfg.cache_type_v != 'f16') ||
+        cfg.flash_attn &&
        cfg.cache_type_v != 'f16' &&
        cfg.cache_type_v != 'f32'
      ) {
        args.push('--cache-type-v', cfg.cache_type_v)
      }
-      args.push('--defrag-thold', String(cfg.defrag_thold))
+      if (cfg.defrag_thold && cfg.defrag_thold != 0.1)
        args.push('--defrag-thold', String(cfg.defrag_thold))
-      args.push('--rope-scaling', cfg.rope_scaling)
+      if (cfg.rope_scaling && cfg.rope_scaling != 'none')
-      args.push('--rope-scale', String(cfg.rope_scale))
+        args.push('--rope-scaling', cfg.rope_scaling)
-      args.push('--rope-freq-base', String(cfg.rope_freq_base))
+      if (cfg.rope_scale && cfg.rope_scale != 1)
-      args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
+        args.push('--rope-scale', String(cfg.rope_scale))
      if (cfg.rope_freq_base && cfg.rope_freq_base != 0)
        args.push('--rope-freq-base', String(cfg.rope_freq_base))
      if (cfg.rope_freq_scale && cfg.rope_freq_scale != 1)
        args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
    }
    logger.info('Calling Tauri command llama_load with args:', args)
@ -1306,26 +1277,20 @@ export default class llamacpp_extension extends AIEngine {
        libraryPath,
        args,
      })
      // Store the session info for later use
      this.activeSessions.set(sInfo.pid, sInfo)
      await this.waitForModelLoad(sInfo)
      return sInfo
    } catch (error) {
-      logger.error('Error loading llama-server:\n', error)
+      logger.error('Error in load command:\n', error)
-      throw new Error(`Failed to load llama-server: ${error}`)
+      throw new Error(`Failed to load model:\n${error}`)
    }
  }
  override async unload(modelId: string): Promise<UnloadResult> {
-    const sInfo: SessionInfo = this.findSessionByModel(modelId)
+    const sInfo: SessionInfo = await this.findSessionByModel(modelId)
    if (!sInfo) {
      throw new Error(`No active session found for model: ${modelId}`)
    }
    const pid = sInfo.pid
    try {
      this.activeSessions.delete(pid)
      // Pass the PID as the session_id
      const result = await invoke<UnloadResult>('unload_llama_model', {
@ -1337,13 +1302,11 @@ export default class llamacpp_extension extends AIEngine {
        logger.info(`Successfully unloaded model with PID ${pid}`)
      } else {
        logger.warn(`Failed to unload model: ${result.error}`)
        this.activeSessions.set(sInfo.pid, sInfo)
      }
      return result
    } catch (error) {
      logger.error('Error in unload command:', error)
      this.activeSessions.set(sInfo.pid, sInfo)
      return {
        success: false,
        error: `Failed to unload model: ${error}`,
@ -1466,17 +1429,21 @@ export default class llamacpp_extension extends AIEngine {
    }
  }
-  private findSessionByModel(modelId: string): SessionInfo | undefined {
+  private async findSessionByModel(modelId: string): Promise<SessionInfo> {
-    return Array.from(this.activeSessions.values()).find(
+      try {
-      (session) => session.model_id === modelId
+          let sInfo = await invoke<SessionInfo>('find_session_by_model', {modelId})
-    )
+          return sInfo
      } catch (e) {
          logger.error(e)
          throw new Error(String(e))
      }
  }
  override async chat(
    opts: chatCompletionRequest,
    abortController?: AbortController
  ): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>> {
-    const sessionInfo = this.findSessionByModel(opts.model)
+    const sessionInfo = await this.findSessionByModel(opts.model)
    if (!sessionInfo) {
      throw new Error(`No active session found for model: ${opts.model}`)
    }
@ -1492,7 +1459,6 @@ export default class llamacpp_extension extends AIEngine {
        throw new Error('Model appears to have crashed! Please reload!')
      }
    } else {
      this.activeSessions.delete(sessionInfo.pid)
      throw new Error('Model have crashed! Please reload!')
    }
    const baseUrl = `http://localhost:${sessionInfo.port}/v1`
@ -1541,11 +1507,13 @@ export default class llamacpp_extension extends AIEngine {
  }
  override async getLoadedModels(): Promise<string[]> {
-    let lmodels: string[] = []
+      try {
-    for (const [_, sInfo] of this.activeSessions) {
+          let models: string[] = await invoke<string[]>('get_loaded_models')
-      lmodels.push(sInfo.model_id)
+          return models
-    }
+      } catch (e) {
-    return lmodels
+          logger.error(e)
          throw new Error(e)
      }
  }
  async getDevices(): Promise<DeviceList[]> {
@ -1575,7 +1543,7 @@ export default class llamacpp_extension extends AIEngine {
  }
  async embed(text: string[]): Promise<EmbeddingResponse> {
-    let sInfo = this.findSessionByModel('sentence-transformer-mini')
+    let sInfo = await this.findSessionByModel('sentence-transformer-mini')
    if (!sInfo) {
      const downloadedModelList = await this.list()
      if (
--- a/extensions/llamacpp-extension/src/test/backend.test.ts
+++ b/extensions/llamacpp-extension/src/test/backend.test.ts
@ -51,6 +51,164 @@ describe('Backend functions', () => {
      ])
    })
    it('should return CUDA backends with proper CPU instruction detection for Windows', async () => {
      // Mock system info with CUDA support and AVX512
      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
        os_type: 'windows',
        cpu: {
          arch: 'x86_64',
          extensions: ['avx', 'avx2', 'avx512'],
        },
        gpus: [
          {
            driver_version: '530.41',
            nvidia_info: { compute_capability: '8.6' },
          },
        ],
      })
      // Mock GitHub releases with CUDA backends
      const mockReleases = [
        {
          tag_name: 'v1.0.0',
          assets: [
            { name: 'llama-v1.0.0-bin-win-avx512-cuda-cu12.0-x64.tar.gz' },
            { name: 'llama-v1.0.0-bin-win-avx2-cuda-cu12.0-x64.tar.gz' },
            { name: 'llama-v1.0.0-bin-win-avx-cuda-cu12.0-x64.tar.gz' },
            { name: 'llama-v1.0.0-bin-win-noavx-cuda-cu12.0-x64.tar.gz' },
          ],
        },
      ]
      global.fetch = vi.fn().mockResolvedValue({
        ok: true,
        json: () => Promise.resolve(mockReleases),
      })
      const result = await listSupportedBackends()
      expect(result).toContain({ version: 'v1.0.0', backend: 'win-avx512-cuda-cu12.0-x64' })
    })
    it('should select appropriate CUDA backend based on CPU features - AVX2 only', async () => {
      // Mock system info with CUDA support but only AVX2
      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
        os_type: 'windows',
        cpu: {
          arch: 'x86_64',
          extensions: ['avx', 'avx2'], // No AVX512
        },
        gpus: [
          {
            driver_version: '530.41',
            nvidia_info: { compute_capability: '8.6' },
          },
        ],
      })
      const mockReleases = [
        {
          tag_name: 'v1.0.0',
          assets: [
            { name: 'llama-v1.0.0-bin-win-avx512-cuda-cu12.0-x64.tar.gz' },
            { name: 'llama-v1.0.0-bin-win-avx2-cuda-cu12.0-x64.tar.gz' },
            { name: 'llama-v1.0.0-bin-win-avx-cuda-cu12.0-x64.tar.gz' },
            { name: 'llama-v1.0.0-bin-win-noavx-cuda-cu12.0-x64.tar.gz' },
          ],
        },
      ]
      global.fetch = vi.fn().mockResolvedValue({
        ok: true,
        json: () => Promise.resolve(mockReleases),
      })
      const result = await listSupportedBackends()
      expect(result).toContain({ version: 'v1.0.0', backend: 'win-avx2-cuda-cu12.0-x64' })
      expect(result).not.toContain({ version: 'v1.0.0', backend: 'win-avx512-cuda-cu12.0-x64' })
    })
    it('should select appropriate CUDA backend based on CPU features - no AVX', async () => {
      // Mock system info with CUDA support but no AVX
      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
        os_type: 'windows',
        cpu: {
          arch: 'x86_64',
          extensions: [], // No AVX extensions
        },
        gpus: [
          {
            driver_version: '530.41',
            nvidia_info: { compute_capability: '8.6' },
          },
        ],
      })
      const mockReleases = [
        {
          tag_name: 'v1.0.0',
          assets: [
            { name: 'llama-v1.0.0-bin-win-avx512-cuda-cu12.0-x64.tar.gz' },
            { name: 'llama-v1.0.0-bin-win-avx2-cuda-cu12.0-x64.tar.gz' },
            { name: 'llama-v1.0.0-bin-win-avx-cuda-cu12.0-x64.tar.gz' },
            { name: 'llama-v1.0.0-bin-win-noavx-cuda-cu12.0-x64.tar.gz' },
          ],
        },
      ]
      global.fetch = vi.fn().mockResolvedValue({
        ok: true,
        json: () => Promise.resolve(mockReleases),
      })
      const result = await listSupportedBackends()
      expect(result).toContain({ version: 'v1.0.0', backend: 'win-noavx-cuda-cu12.0-x64' })
      expect(result).not.toContain({ version: 'v1.0.0', backend: 'win-avx2-cuda-cu12.0-x64' })
      expect(result).not.toContain({ version: 'v1.0.0', backend: 'win-avx512-cuda-cu12.0-x64' })
    })
    it('should return CUDA backends with proper CPU instruction detection for Linux', async () => {
      // Mock system info with CUDA support and AVX support
      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
        os_type: 'linux',
        cpu: {
          arch: 'x86_64',
          extensions: ['avx'], // Only AVX, no AVX2
        },
        gpus: [
          {
            driver_version: '530.60.13',
            nvidia_info: { compute_capability: '8.6' },
          },
        ],
      })
      const mockReleases = [
        {
          tag_name: 'v1.0.0',
          assets: [
            { name: 'llama-v1.0.0-bin-linux-avx512-cuda-cu12.0-x64.tar.gz' },
            { name: 'llama-v1.0.0-bin-linux-avx2-cuda-cu12.0-x64.tar.gz' },
            { name: 'llama-v1.0.0-bin-linux-avx-cuda-cu12.0-x64.tar.gz' },
            { name: 'llama-v1.0.0-bin-linux-noavx-cuda-cu12.0-x64.tar.gz' },
          ],
        },
      ]
      global.fetch = vi.fn().mockResolvedValue({
        ok: true,
        json: () => Promise.resolve(mockReleases),
      })
      const result = await listSupportedBackends()
      expect(result).toContain({ version: 'v1.0.0', backend: 'linux-avx-cuda-cu12.0-x64' })
      expect(result).not.toContain({ version: 'v1.0.0', backend: 'linux-avx2-cuda-cu12.0-x64' })
      expect(result).not.toContain({ version: 'v1.0.0', backend: 'linux-avx512-cuda-cu12.0-x64' })
    })
    it('should return supported backends for macOS arm64', async () => {
      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
        os_type: 'macos',
--- a/package.json
+++ b/package.json
@ -17,7 +17,7 @@
    "test:coverage": "vitest run --coverage",
    "test:prepare": "yarn build:icon && yarn copy:assets:tauri && yarn build --no-bundle ",
    "dev:web": "yarn workspace @janhq/web-app dev",
-    "dev:tauri": "yarn build:icon && yarn copy:assets:tauri && tauri dev",
+    "dev:tauri": "yarn build:icon && yarn copy:assets:tauri && cross-env IS_CLEAN=true tauri dev",
    "copy:assets:tauri": "cpx \"pre-install/*.tgz\" \"src-tauri/resources/pre-install/\"",
    "download:lib": "node ./scripts/download-lib.mjs",
    "download:bin": "node ./scripts/download-bin.mjs",
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@ -63,8 +63,12 @@ nix = "=0.30.1"
 [target.'cfg(windows)'.dependencies]
 libc = "0.2.172"
 windows-sys = { version = "0.60.2", features = ["Win32_Storage_FileSystem"] }
 [target.'cfg(not(any(target_os = "android", target_os = "ios")))'.dependencies]
 tauri-plugin-updater = "2"
 once_cell = "1.18"
 tauri-plugin-single-instance = { version = "2.0.0", features = ["deep-link"] }
 [target.'cfg(windows)'.dev-dependencies]
 tempfile = "3.20.0"
--- a/src-tauri/src/core/mcp.rs
+++ b/src-tauri/src/core/mcp.rs
@ -10,7 +10,11 @@ use tokio::{
    time::{sleep, timeout},
 };
-use super::{cmd::get_jan_data_folder_path, state::AppState};
+use super::{
    cmd::get_jan_data_folder_path, 
    state::AppState,
    utils::can_override_npx,
 };
 const DEFAULT_MCP_CONFIG: &str = r#"{
  "mcpServers": {
@ -512,8 +516,8 @@ async fn schedule_mcp_start_task<R: Runtime>(
        .ok_or_else(|| format!("Failed to extract command args from config for {name}"))?;
    let mut cmd = Command::new(command.clone());
-    
+
-    if command == "npx" {
+    if command == "npx" && can_override_npx() {
        let mut cache_dir = app_path.clone();
        cache_dir.push(".npx");
        let bun_x_path = format!("{}/bun", bin_path.display());
--- a/src-tauri/src/core/setup.rs
+++ b/src-tauri/src/core/setup.rs
@ -43,8 +43,8 @@ pub fn install_extensions(app: tauri::AppHandle, force: bool) -> Result<(), Stri
    let mut clean_up = force;
-    // Check CLEAN environment variable to optionally skip extension install
+    // Check IS_CLEAN environment variable to optionally skip extension install
-    if std::env::var("CLEAN").is_ok() {
+    if std::env::var("IS_CLEAN").is_ok() {
        clean_up = true;
    }
    log::info!(
--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@ -1,7 +1,9 @@
 use base64::{engine::general_purpose, Engine as _};
 use hmac::{Hmac, Mac};
 use rand::{rngs::StdRng, Rng, SeedableRng};
 use serde::{Deserialize, Serialize};
 use sha2::Sha256;
 use std::collections::HashSet;
 use std::path::PathBuf;
 use std::process::Stdio;
 use std::time::Duration;
@ -67,13 +69,39 @@ pub struct DeviceInfo {
    pub free: i32,
 }
 #[cfg(windows)]
 use std::os::windows::ffi::OsStrExt;
 #[cfg(windows)]
 use std::ffi::OsStr;
 #[cfg(windows)]
 use windows_sys::Win32::Storage::FileSystem::GetShortPathNameW;
 #[cfg(windows)]
 pub fn get_short_path<P: AsRef<std::path::Path>>(path: P) -> Option<String> {
    let wide: Vec<u16> = OsStr::new(path.as_ref())
        .encode_wide()
        .chain(Some(0))
        .collect();
    let mut buffer = vec![0u16; 260];
    let len = unsafe { GetShortPathNameW(wide.as_ptr(), buffer.as_mut_ptr(), buffer.len() as u32) };
    if len > 0 {
        Some(String::from_utf16_lossy(&buffer[..len as usize]))
    } else {
        None
    }
 }
 // --- Load Command ---
 #[tauri::command]
 pub async fn load_llama_model(
    state: State<'_, AppState>,
    backend_path: &str,
    library_path: Option<&str>,
-    args: Vec<String>,
+    mut args: Vec<String>,
 ) -> ServerResult<SessionInfo> {
    let mut process_map = state.llama_server_process.lock().await;
@ -105,13 +133,38 @@ pub async fn load_llama_model(
            8080
        }
    };
-
+    // FOR MODEL PATH; TODO: DO SIMILARLY FOR MMPROJ PATH
-    let model_path = args
+    let model_path_index = args
        .iter()
        .position(|arg| arg == "-m")
-        .and_then(|i| args.get(i + 1))
+        .ok_or(ServerError::LlamacppError("Missing `-m` flag".into()))?;
-        .cloned()
+
-        .unwrap_or_default();
+    let model_path = args
        .get(model_path_index + 1)
        .ok_or(ServerError::LlamacppError("Missing path after `-m`".into()))?
        .clone();
    let model_path_pb = PathBuf::from(model_path);
    if !model_path_pb.exists() {
        return Err(ServerError::LlamacppError(format!(
            "Invalid or inaccessible model path: {}",
            model_path_pb.display().to_string(),
        )));
    }
    #[cfg(windows)]
    {
        // use short path on Windows
        if let Some(short) = get_short_path(&model_path_pb) {
            args[model_path_index + 1] = short;
        } else {
            args[model_path_index + 1] = model_path_pb.display().to_string();
        }
    }
    #[cfg(not(windows))]
    {
        args[model_path_index + 1] = model_path_pb.display().to_string();
    }
    // -----------------------------------------------------------------
    let api_key = args
        .iter()
@ -181,7 +234,6 @@ pub async fn load_llama_model(
    // Create channels for communication between tasks
    let (ready_tx, mut ready_rx) = mpsc::channel::<bool>(1);
    let (error_tx, mut error_rx) = mpsc::channel::<String>(1);
    // Spawn task to monitor stdout for readiness
    let _stdout_task = tokio::spawn(async move {
@ -228,20 +280,10 @@ pub async fn load_llama_model(
                        // Check for critical error indicators that should stop the process
                        let line_lower = line.to_string().to_lowercase();
                        if line_lower.contains("error loading model")
                            || line_lower.contains("unknown model architecture")
                            || line_lower.contains("fatal")
                            || line_lower.contains("cuda error")
                            || line_lower.contains("out of memory")
                            || line_lower.contains("error")
                            || line_lower.contains("failed")
                        {
                            let _ = error_tx.send(line.to_string()).await;
                        }
                        // Check for readiness indicator - llama-server outputs this when ready
-                        else if line.contains("server is listening on")
+                        if line_lower.contains("server is listening on")
-                            || line.contains("starting the main loop")
+                            || line_lower.contains("starting the main loop")
-                            || line.contains("server listening on")
+                            || line_lower.contains("server listening on")
                        {
                            log::info!("Server appears to be ready based on stderr: '{}'", line);
                            let _ = ready_tx.send(true).await;
@ -279,26 +321,6 @@ pub async fn load_llama_model(
                log::info!("Server is ready to accept requests!");
                break;
            }
            // Error occurred
            Some(error_msg) = error_rx.recv() => {
                log::error!("Server encountered an error: {}", error_msg);
                // Give process a moment to exit naturally
                tokio::time::sleep(Duration::from_millis(100)).await;
                // Check if process already exited
                if let Some(status) = child.try_wait()? {
                    log::info!("Process exited with code {:?}", status);
                    return Err(ServerError::LlamacppError(error_msg));
                } else {
                    log::info!("Process still running, killing it...");
                    let _ = child.kill().await;
                }
                // Get full stderr output
                let stderr_output = stderr_task.await.unwrap_or_default();
                return Err(ServerError::LlamacppError(format!("Error: {}\n\nFull stderr:\n{}", error_msg, stderr_output)));
            }
            // Check for process exit more frequently
            _ = tokio::time::sleep(Duration::from_millis(50)) => {
                // Check if process exited
@ -332,7 +354,7 @@ pub async fn load_llama_model(
        pid: pid.clone(),
        port: port,
        model_id: model_id,
-        model_path: model_path,
+        model_path: model_path_pb.display().to_string(),
        api_key: api_key,
    };
@ -704,7 +726,308 @@ pub async fn is_process_running(pid: i32, state: State<'_, AppState>) -> Result<
 }
 // check port availability
-#[tauri::command]
+fn is_port_available(port: u16) -> bool {
 pub fn is_port_available(port: u16) -> bool {
    std::net::TcpListener::bind(("127.0.0.1", port)).is_ok()
 }
 #[tauri::command]
 pub async fn get_random_port(state: State<'_, AppState>) -> Result<u16, String> {
    const MAX_ATTEMPTS: u32 = 20000;
    let mut attempts = 0;
    let mut rng = StdRng::from_entropy();
    // Get all active ports from sessions
    let map = state.llama_server_process.lock().await;
    let used_ports: HashSet<u16> = map
        .values()
        .filter_map(|session| {
            // Convert valid ports to u16 (filter out placeholder ports like -1)
            if session.info.port > 0 && session.info.port <= u16::MAX as i32 {
                Some(session.info.port as u16)
            } else {
                None
            }
        })
        .collect();
    drop(map); // unlock early
    while attempts < MAX_ATTEMPTS {
        let port = rng.gen_range(3000..4000);
        if used_ports.contains(&port) {
            attempts += 1;
            continue;
        }
        if is_port_available(port) {
            return Ok(port);
        }
        attempts += 1;
    }
    Err("Failed to find an available port for the model to load".into())
 }
 // find session
 #[tauri::command]
 pub async fn find_session_by_model(
    model_id: String,
    state: State<'_, AppState>,
 ) -> Result<Option<SessionInfo>, String> {
    let map = state.llama_server_process.lock().await;
    let session_info = map
        .values()
        .find(|backend_session| backend_session.info.model_id == model_id)
        .map(|backend_session| backend_session.info.clone());
    Ok(session_info)
 }
 // get running models
 #[tauri::command]
 pub async fn get_loaded_models(state: State<'_, AppState>) -> Result<Vec<String>, String> {
    let map = state.llama_server_process.lock().await;
    let model_ids = map
        .values()
        .map(|backend_session| backend_session.info.model_id.clone())
        .collect();
    Ok(model_ids)
 }
 // tests
 //
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::path::PathBuf;
    #[cfg(windows)]
    use tempfile;
    #[test]
    fn test_parse_multiple_devices() {
        let output = r#"ggml_vulkan: Found 2 Vulkan devices:
 ggml_vulkan: 0 = NVIDIA GeForce RTX 3090 (NVIDIA) | uma: 0 | fp16: 1 | bf16: 0 | warp size: 32 | shared memory: 49152 | int dot: 0 | matrix cores: KHR_coopmat
 ggml_vulkan: 1 = AMD Radeon Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 0 | matrix cores: KHR_coopmat
 Available devices:
 Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)
 Vulkan1: AMD Radeon Graphics (RADV GFX1151) (87722 MiB, 87722 MiB free)
 "#;
        let devices = parse_device_output(output).unwrap();
        assert_eq!(devices.len(), 2);
        // Check first device
        assert_eq!(devices[0].id, "Vulkan0");
        assert_eq!(devices[0].name, "NVIDIA GeForce RTX 3090");
        assert_eq!(devices[0].mem, 24576);
        assert_eq!(devices[0].free, 24576);
        // Check second device
        assert_eq!(devices[1].id, "Vulkan1");
        assert_eq!(devices[1].name, "AMD Radeon Graphics (RADV GFX1151)");
        assert_eq!(devices[1].mem, 87722);
        assert_eq!(devices[1].free, 87722);
    }
    #[test]
    fn test_parse_single_device() {
        let output = r#"Available devices:
 CUDA0: NVIDIA GeForce RTX 4090 (24576 MiB, 24000 MiB free)"#;
        let devices = parse_device_output(output).unwrap();
        assert_eq!(devices.len(), 1);
        assert_eq!(devices[0].id, "CUDA0");
        assert_eq!(devices[0].name, "NVIDIA GeForce RTX 4090");
        assert_eq!(devices[0].mem, 24576);
        assert_eq!(devices[0].free, 24000);
    }
    #[test]
    fn test_parse_with_extra_whitespace_and_empty_lines() {
        let output = r#"
 Available devices:
 Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)
 Vulkan1: AMD Radeon Graphics (RADV GFX1151) (87722 MiB, 87722 MiB free)
 "#;
        let devices = parse_device_output(output).unwrap();
        assert_eq!(devices.len(), 2);
        assert_eq!(devices[0].id, "Vulkan0");
        assert_eq!(devices[1].id, "Vulkan1");
    }
    #[test]
    fn test_parse_different_backends() {
        let output = r#"Available devices:
 CUDA0: NVIDIA GeForce RTX 4090 (24576 MiB, 24000 MiB free)
 Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)
 SYCL0: Intel(R) Arc(TM) A750 Graphics (8000 MiB, 7721 MiB free)"#;
        let devices = parse_device_output(output).unwrap();
        assert_eq!(devices.len(), 3);
        assert_eq!(devices[0].id, "CUDA0");
        assert_eq!(devices[0].name, "NVIDIA GeForce RTX 4090");
        assert_eq!(devices[1].id, "Vulkan0");
        assert_eq!(devices[1].name, "NVIDIA GeForce RTX 3090");
        assert_eq!(devices[2].id, "SYCL0");
        assert_eq!(devices[2].name, "Intel(R) Arc(TM) A750 Graphics");
        assert_eq!(devices[2].mem, 8000);
        assert_eq!(devices[2].free, 7721);
    }
    #[test]
    fn test_parse_complex_gpu_names() {
        let output = r#"Available devices:
 Vulkan0: Intel(R) Arc(tm) A750 Graphics (DG2) (8128 MiB, 8128 MiB free)
 Vulkan1: AMD Radeon RX 7900 XTX (Navi 31) [RDNA 3] (24576 MiB, 24000 MiB free)"#;
        let devices = parse_device_output(output).unwrap();
        assert_eq!(devices.len(), 2);
        assert_eq!(devices[0].id, "Vulkan0");
        assert_eq!(devices[0].name, "Intel(R) Arc(tm) A750 Graphics (DG2)");
        assert_eq!(devices[0].mem, 8128);
        assert_eq!(devices[0].free, 8128);
        assert_eq!(devices[1].id, "Vulkan1");
        assert_eq!(devices[1].name, "AMD Radeon RX 7900 XTX (Navi 31) [RDNA 3]");
        assert_eq!(devices[1].mem, 24576);
        assert_eq!(devices[1].free, 24000);
    }
    #[test]
    fn test_parse_no_devices() {
        let output = r#"Available devices:"#;
        let devices = parse_device_output(output).unwrap();
        assert_eq!(devices.len(), 0);
    }
    #[test]
    fn test_parse_missing_header() {
        let output = r#"Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)"#;
        let result = parse_device_output(output);
        assert!(result.is_err());
        assert!(result
            .unwrap_err()
            .to_string()
            .contains("Could not find 'Available devices:' section"));
    }
    #[test]
    fn test_parse_malformed_device_line() {
        let output = r#"Available devices:
 Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)
 Invalid line without colon
 Vulkan1: AMD Radeon Graphics (RADV GFX1151) (87722 MiB, 87722 MiB free)"#;
        let devices = parse_device_output(output).unwrap();
        // Should skip the malformed line and parse the valid ones
        assert_eq!(devices.len(), 2);
        assert_eq!(devices[0].id, "Vulkan0");
        assert_eq!(devices[1].id, "Vulkan1");
    }
    #[test]
    fn test_parse_device_line_individual() {
        // Test the individual line parser
        let line = "Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)";
        let device = parse_device_line(line).unwrap().unwrap();
        assert_eq!(device.id, "Vulkan0");
        assert_eq!(device.name, "NVIDIA GeForce RTX 3090");
        assert_eq!(device.mem, 24576);
        assert_eq!(device.free, 24576);
    }
    #[test]
    fn test_memory_pattern_detection() {
        assert!(is_memory_pattern("24576 MiB, 24576 MiB free"));
        assert!(is_memory_pattern("8000 MiB, 7721 MiB free"));
        assert!(!is_memory_pattern("just some text"));
        assert!(!is_memory_pattern("24576 MiB"));
        assert!(!is_memory_pattern("24576, 24576"));
    }
    #[test]
    fn test_parse_memory_value() {
        assert_eq!(parse_memory_value("24576 MiB").unwrap(), 24576);
        assert_eq!(parse_memory_value("7721 MiB free").unwrap(), 7721);
        assert_eq!(parse_memory_value("8000").unwrap(), 8000);
        assert!(parse_memory_value("").is_err());
        assert!(parse_memory_value("not_a_number MiB").is_err());
    }
    #[test]
    fn test_find_memory_pattern() {
        let text = "NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)";
        let result = find_memory_pattern(text);
        assert!(result.is_some());
        let (_start, content) = result.unwrap();
        assert_eq!(content, "24576 MiB, 24576 MiB free");
        // Test with multiple parentheses
        let text = "Intel(R) Arc(tm) A750 Graphics (DG2) (8128 MiB, 8128 MiB free)";
        let result = find_memory_pattern(text);
        assert!(result.is_some());
        let (_start, content) = result.unwrap();
        assert_eq!(content, "8128 MiB, 8128 MiB free");
    }
    #[test]
    fn test_path_with_uncommon_dir_names() {
        const UNCOMMON_DIR_NAME: &str = "тест-你好-éàç-🚀";
        #[cfg(windows)]
        {
            let dir = tempfile::tempdir().expect("Failed to create temp dir");
            let long_path = dir.path().join(UNCOMMON_DIR_NAME);
            std::fs::create_dir(&long_path)
                .expect("Failed to create test directory with non-ASCII name");
            let short_path = get_short_path(&long_path);
            assert!(
                short_path.is_ascii(),
                "The resulting short path must be composed of only ASCII characters. Got: {}",
                short_path
            );
            assert!(
                PathBuf::from(&short_path).exists(),
                "The returned short path must exist on the filesystem"
            );
            assert_ne!(
                short_path,
                long_path.to_str().unwrap(),
                "Short path should not be the same as the long path"
            );
        }
        #[cfg(not(windows))]
        {
            // On Unix, paths are typically UTF-8 and there's no "short path" concept.
            let long_path_str = format!("/tmp/{}", UNCOMMON_DIR_NAME);
            let path_buf = PathBuf::from(&long_path_str);
            let displayed_path = path_buf.display().to_string();
            assert_eq!(
                displayed_path, long_path_str,
                "Path with non-ASCII characters should be preserved exactly on non-Windows platforms"
            );
        }
    }
 }
--- a/src-tauri/src/core/utils/mod.rs
+++ b/src-tauri/src/core/utils/mod.rs
@ -103,6 +103,21 @@ pub fn normalize_path(path: &Path) -> PathBuf {
    ret
 }
 pub fn can_override_npx() -> bool {
    // we need to check the CPU for the AVX2 instruction support if we are running under the MacOS
    // with Intel CPU. We can override `npx` command with `bun` only if CPU is
    // supporting AVX2, otherwise we need to use default `npx` binary
    #[cfg(all(target_os = "macos", any(target_arch = "x86", target_arch = "x86_64")))]
    {
        if !is_x86_feature_detected!("avx2") {
            log::warn!("Your CPU doesn't support AVX2 instruction, default npx binary will be used");
            return false; // we cannot override npx with bun binary
        }
    }
    true // by default, we can override npx with bun binary
 }
 #[tauri::command]
 pub fn write_yaml(
    app: tauri::AppHandle,
@ -195,3 +210,4 @@ pub fn is_library_available(library: &str) -> bool {
        }
    }
 }
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@ -95,7 +95,9 @@ pub fn run() {
            core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
            core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
            core::utils::extensions::inference_llamacpp_extension::server::get_devices,
-            core::utils::extensions::inference_llamacpp_extension::server::is_port_available,
+            core::utils::extensions::inference_llamacpp_extension::server::get_random_port,
            core::utils::extensions::inference_llamacpp_extension::server::find_session_by_model,
            core::utils::extensions::inference_llamacpp_extension::server::get_loaded_models,
            core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
            core::utils::extensions::inference_llamacpp_extension::server::is_process_running,
        ])
@ -129,6 +131,12 @@ pub fn run() {
            if let Err(e) = setup::install_extensions(app.handle().clone(), false) {
                log::error!("Failed to install extensions: {}", e);
            }
            #[cfg(any(windows, target_os = "linux"))]
            {
                use tauri_plugin_deep_link::DeepLinkExt;
                app.deep_link().register_all()?;
            }
            setup_mcp(app);
            Ok(())
        })
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@ -6,7 +6,7 @@
  "build": {
    "frontendDist": "../web-app/dist",
    "devUrl": "http://localhost:1420",
-    "beforeDevCommand": "cross-env IS_TAURI=true CLEAN=true yarn dev:web",
+    "beforeDevCommand": "cross-env IS_TAURI=true yarn dev:web",
    "beforeBuildCommand": "cross-env IS_TAURI=true yarn build:web"
  },
  "app": {
--- a/web-app/src/containers/DownloadManegement.tsx
+++ b/web-app/src/containers/DownloadManegement.tsx
@ -161,8 +161,14 @@ export function DownloadManagement() {
      console.debug('onFileDownloadError', state)
      removeDownload(state.modelId)
      removeLocalDownloadingModel(state.modelId)
      toast.error(t('common:toast.downloadFailed.title'), {
        id: 'download-failed',
        description: t('common:toast.downloadFailed.description', {
          item: state.modelId,
        }),
      })
    },
-    [removeDownload, removeLocalDownloadingModel]
+    [removeDownload, removeLocalDownloadingModel, t]
  )
  const onFileDownloadStopped = useCallback(
--- a/web-app/src/containers/ModelSetting.tsx
+++ b/web-app/src/containers/ModelSetting.tsx
@ -71,7 +71,7 @@ export function ModelSetting({
      })
      // Call debounced stopModel only when updating ctx_len or ngl
-      if (key === 'ctx_len' || key === 'ngl') {
+      if (key === 'ctx_len' || key === 'ngl' || key === 'chat_template') {
        debouncedStopModel(model.id)
      }
    }
@ -101,9 +101,17 @@ export function ModelSetting({
        <div className="px-4 space-y-6">
          {Object.entries(model.settings || {}).map(([key, value]) => {
            const config = value as ProviderSetting
            return (
              <div key={key} className="space-y-2">
-                <div className="flex items-start justify-between gap-8">
+                <div
                  className={cn(
                    'flex items-start justify-between gap-8 last:mb-2',
                    (key === 'chat_template' ||
                      key === 'override_tensor_buffer_t') &&
                      'flex-col gap-1 w-full'
                  )}
                >
                  <div className="space-y-1 mb-2">
                    <h3 className="font-medium">{config.title}</h3>
                    <p className="text-main-view-fg/70 text-xs">
--- a/web-app/src/containers/ThinkingBlock.tsx
+++ b/web-app/src/containers/ThinkingBlock.tsx
@ -30,14 +30,31 @@ const ThinkingBlock = ({ id, text }: Props) => {
  const { thinkingState, setThinkingState } = useThinkingStore()
  const { streamingContent } = useAppState()
  const { t } = useTranslation()
-  const loading = !text.includes('</think>') && streamingContent
+  // Check for thinking formats
  const hasThinkTag = text.includes('<think>') && !text.includes('</think>')
  const hasAnalysisChannel = text.includes('<|channel|>analysis<|message|>') && !text.includes('<|start|>assistant<|channel|>final<|message|>')
  const loading = (hasThinkTag || hasAnalysisChannel) && streamingContent
  const isExpanded = thinkingState[id] ?? (loading ? true : false)
  const handleClick = () => {
    const newExpandedState = !isExpanded
    setThinkingState(id, newExpandedState)
  }
-  if (!text.replace(/<\/?think>/g, '').trim()) return null
+  // Extract thinking content from either format
  const extractThinkingContent = (text: string) => {
    return text
      .replace(/<\/?think>/g, '')
      .replace(/<\|channel\|>analysis<\|message\|>/g, '')
      .replace(/<\|start\|>assistant<\|channel\|>final<\|message\|>/g, '')
      .replace(/assistant<\|channel\|>final<\|message\|>/g, '')
      .replace(/<\|channel\|>/g, '') // remove any remaining channel markers
      .replace(/<\|message\|>/g, '') // remove any remaining message markers  
      .replace(/<\|start\|>/g, '') // remove any remaining start markers
      .trim()
  }
  const thinkingContent = extractThinkingContent(text)
  if (!thinkingContent) return null
  return (
    <div
@ -63,7 +80,7 @@ const ThinkingBlock = ({ id, text }: Props) => {
        {isExpanded && (
          <div className="mt-2 pl-6 pr-4 text-main-view-fg/60">
-            <RenderMarkdown content={text.replace(/<\/?think>/g, '').trim()} />
+            <RenderMarkdown content={thinkingContent} />
          </div>
        )}
      </div>
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@ -170,18 +170,33 @@ export const ThreadContent = memo(
    )
    const { reasoningSegment, textSegment } = useMemo(() => {
-      const isThinking = text.includes('<think>') && !text.includes('</think>')
+      // Check for thinking formats
-      if (isThinking) return { reasoningSegment: text, textSegment: '' }
+      const hasThinkTag = text.includes('<think>') && !text.includes('</think>')
      const hasAnalysisChannel = text.includes('<|channel|>analysis<|message|>') && !text.includes('<|start|>assistant<|channel|>final<|message|>')
      if (hasThinkTag || hasAnalysisChannel) return { reasoningSegment: text, textSegment: '' }
-      const match = text.match(/<think>([\s\S]*?)<\/think>/)
+      // Check for completed think tag format
-      if (match?.index === undefined)
+      const thinkMatch = text.match(/<think>([\s\S]*?)<\/think>/)
-        return { reasoningSegment: undefined, textSegment: text }
+      if (thinkMatch?.index !== undefined) {
-
+        const splitIndex = thinkMatch.index + thinkMatch[0].length
-      const splitIndex = match.index + match[0].length
+        return {
-      return {
+          reasoningSegment: text.slice(0, splitIndex),
-        reasoningSegment: text.slice(0, splitIndex),
+          textSegment: text.slice(splitIndex),
-        textSegment: text.slice(splitIndex),
+        }
      }
      // Check for completed analysis channel format
      const analysisMatch = text.match(/<\|channel\|>analysis<\|message\|>([\s\S]*?)<\|start\|>assistant<\|channel\|>final<\|message\|>/)
      if (analysisMatch?.index !== undefined) {
        const splitIndex = analysisMatch.index + analysisMatch[0].length
        return {
          reasoningSegment: text.slice(0, splitIndex),
          textSegment: text.slice(splitIndex),
        }
      }
      return { reasoningSegment: undefined, textSegment: text }
    }, [text])
    const { getMessages, deleteMessage } = useMessages()
--- a/web-app/src/containers/dialogs/OutOfContextDialog.tsx
+++ b/web-app/src/containers/dialogs/OutOfContextDialog.tsx
@ -36,7 +36,10 @@ export default function OutOfContextPromiseModal() {
  return (
    <Dialog open={isModalOpen} onOpenChange={handleDialogOpen}>
-      <DialogContent>
+      <DialogContent
        showCloseButton={false}
        onInteractOutside={(e) => e.preventDefault()}
      >
        <DialogHeader>
          <DialogTitle>{t('model-errors:title')}</DialogTitle>
        </DialogHeader>
@ -57,7 +60,7 @@ export default function OutOfContextPromiseModal() {
            {t('model-errors:truncateInput')}
          </Button>
          <Button
-            asChild
+            autoFocus
            onClick={() => {
              handleContextLength()
            }}
--- a/web-app/src/containers/dynamicControllerSetting/DropdownControl.tsx
+++ b/web-app/src/containers/dynamicControllerSetting/DropdownControl.tsx
@ -5,30 +5,61 @@ import {
  DropdownMenuTrigger,
 } from '@/components/ui/dropdown-menu'
 import {
  Tooltip,
  TooltipTrigger,
  TooltipContent,
 } from '@/components/ui/tooltip'
 import { IconStarFilled } from '@tabler/icons-react'
 import { cn } from '@/lib/utils'
 // Dropdown component
 type DropdownControlProps = {
  value: string
  options?: Array<{ value: number | string; name: string }>
  recommended?: string
  onChange: (value: number | string) => void
 }
 export function DropdownControl({
  value,
  options = [],
  recommended,
  onChange,
 }: DropdownControlProps) {
  const isSelected =
    options.find((option) => option.value === value)?.name || value
  return (
    <DropdownMenu>
-      <DropdownMenuTrigger className="bg-main-view-fg/5 hover:bg-main-view-fg/8 px-2 py-1 rounded font-medium cursor-pointer">
+      <DropdownMenuTrigger className="bg-main-view-fg/5 hover:bg-main-view-fg/8 px-3 py-1 rounded-sm font-medium cursor-pointer">
-        {options.find((option) => option.value === value)?.name || value}
+        {isSelected}
      </DropdownMenuTrigger>
-      <DropdownMenuContent align="end">
+      <DropdownMenuContent align="end" className="max-h-70">
        {options.map((option, optionIndex) => (
          <DropdownMenuItem
            key={optionIndex}
            onClick={() => onChange(option.value)}
            className={cn(
              'flex items-center justify-between my-1',
              isSelected === option.name
                ? 'bg-main-view-fg/6 hover:bg-main-view-fg/6'
                : ''
            )}
          >
-            {option.name}
+            <span>{option.name}</span>
            {recommended === option.value && (
              <Tooltip>
                <TooltipTrigger asChild>
                  <div className="cursor-pointer">
                    <IconStarFilled className="text-accent" />
                  </div>
                </TooltipTrigger>
                <TooltipContent side="top" sideOffset={8} className="z-50">
                  Recommended
                </TooltipContent>
              </Tooltip>
            )}
          </DropdownMenuItem>
        ))}
      </DropdownMenuContent>
--- a/web-app/src/containers/dynamicControllerSetting/index.tsx
+++ b/web-app/src/containers/dynamicControllerSetting/index.tsx
@ -28,6 +28,7 @@ type DynamicControllerProps = {
    min?: number
    max?: number
    step?: number
    recommended?: string
  }
  onChange: (value: string | boolean | number) => void
 }
@ -65,6 +66,7 @@ export function DynamicControllerSetting({
      <DropdownControl
        value={controllerProps.value as string}
        options={controllerProps.options}
        recommended={controllerProps.recommended}
        onChange={(newValue) => onChange(newValue)}
      />
    )
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@ -365,8 +365,7 @@ export const useChat = () => {
            if (
              typeof errorMessage === 'string' &&
              errorMessage.includes(OUT_OF_CONTEXT_SIZE) &&
-              selectedModel &&
+              selectedModel
              troubleshooting
            ) {
              const method = await showIncreaseContextSizeModal()
              if (method === 'ctx_len') {
--- a/web-app/src/hooks/useMCPServers.ts
+++ b/web-app/src/hooks/useMCPServers.ts
@ -25,8 +25,8 @@ type MCPServerStoreState = {
  editServer: (key: string, config: MCPServerConfig) => void
  deleteServer: (key: string) => void
  setServers: (servers: MCPServers) => void
-  syncServers: () => void
+  syncServers: () => Promise<void>
-  syncServersAndRestart: () => void
+  syncServersAndRestart: () => Promise<void>
 }
 export const useMCPServers = create<MCPServerStoreState>()((set, get) => ({
--- a/web-app/src/hooks/useModelProvider.ts
+++ b/web-app/src/hooks/useModelProvider.ts
@ -2,6 +2,7 @@ import { create } from 'zustand'
 import { persist, createJSONStorage } from 'zustand/middleware'
 import { localStorageKey } from '@/constants/localStorage'
 import { sep } from '@tauri-apps/api/path'
 import { modelSettings } from '@/lib/predefined'
 type ModelProviderState = {
  providers: ModelProvider[]
@ -211,8 +212,21 @@ export const useModelProvider = create<ModelProviderState>()(
      name: localStorageKey.modelProvider,
      storage: createJSONStorage(() => localStorage),
      migrate: (persistedState: unknown, version: number) => {
-        const state = persistedState as ModelProviderState
+        const state = persistedState as ModelProviderState & {
-        
+          providers: Array<
            ModelProvider & {
              models: Array<
                Model & {
                  settings?: Record<string, unknown> & {
                    chatTemplate?: string
                    chat_template?: string
                  }
                }
              >
            }
          >
        }
        // Migration for cont_batching description update (version 0 -> 1)
        if (version === 0 && state?.providers) {
          state.providers = state.providers.map((provider) => {
@ -221,7 +235,8 @@ export const useModelProvider = create<ModelProviderState>()(
                if (setting.key === 'cont_batching') {
                  return {
                    ...setting,
-                    description: 'Enable continuous batching (a.k.a dynamic batching) for concurrent requests.'
+                    description:
                      'Enable continuous batching (a.k.a dynamic batching) for concurrent requests.',
                  }
                }
                return setting
@ -230,9 +245,65 @@ export const useModelProvider = create<ModelProviderState>()(
            return provider
          })
        }
        // Migration for chatTemplate key to chat_template (version 1 -> 2)
        if (version === 1 && state?.providers) {
          state.providers.forEach((provider) => {
            if (provider.models) {
              provider.models.forEach((model) => {
                // Initialize settings if it doesn't exist
                if (!model.settings) {
                  model.settings = {}
                }
                // Migrate chatTemplate key to chat_template
                if (model.settings.chatTemplate) {
                  model.settings.chat_template = model.settings.chatTemplate
                  delete model.settings.chatTemplate
                }
                // Add missing chat_template setting if it doesn't exist
                if (!model.settings.chat_template) {
                  model.settings.chat_template = {
                    ...modelSettings.chatTemplate,
                    controller_props: {
                      ...modelSettings.chatTemplate.controller_props,
                    },
                  }
                }
              })
            }
          })
        }
        // Migration for override_tensor_buffer_type key (version 2 -> 3)
        if (version === 2 && state?.providers) {
          state.providers.forEach((provider) => {
            if (provider.models) {
              provider.models.forEach((model) => {
                // Initialize settings if it doesn't exist
                if (!model.settings) {
                  model.settings = {}
                }
                // Add missing override_tensor_buffer_type setting if it doesn't exist
                if (!model.settings.override_tensor_buffer_t) {
                  model.settings.override_tensor_buffer_t = {
                    ...modelSettings.override_tensor_buffer_t,
                    controller_props: {
                      ...modelSettings.override_tensor_buffer_t
                        .controller_props,
                    },
                  }
                }
              })
            }
          })
        }
        return state
      },
-      version: 1,
+      version: 3,
    }
  )
 )
--- a/web-app/src/lib/tests/messages.test.ts
+++ b/web-app/src/lib/tests/messages.test.ts
@ -407,6 +407,50 @@ describe('CompletionMessagesBuilder', () => {
      const result = builder.getMessages()
      expect(result[0].content).toBe('Clean answer')
    })
    it('should remove analysis channel reasoning content', () => {
      const builder = new CompletionMessagesBuilder([])
      builder.addAssistantMessage(
        '<|channel|>analysis<|message|>Let me analyze this step by step...<|start|>assistant<|channel|>final<|message|>The final answer is 42.'
      )
      const result = builder.getMessages()
      expect(result[0].content).toBe('The final answer is 42.')
    })
    it('should handle analysis channel without final message', () => {
      const builder = new CompletionMessagesBuilder([])
      builder.addAssistantMessage(
        '<|channel|>analysis<|message|>Only analysis content here...'
      )
      const result = builder.getMessages()
      expect(result[0].content).toBe('<|channel|>analysis<|message|>Only analysis content here...')
    })
    it('should handle analysis channel with multiline content', () => {
      const builder = new CompletionMessagesBuilder([])
      builder.addAssistantMessage(
        '<|channel|>analysis<|message|>Step 1: First analysis\nStep 2: Second analysis\nStep 3: Final analysis<|start|>assistant<|channel|>final<|message|>Based on my analysis, here is the result.'
      )
      const result = builder.getMessages()
      expect(result[0].content).toBe('Based on my analysis, here is the result.')
    })
    it('should handle both think and analysis channel tags', () => {
      const builder = new CompletionMessagesBuilder([])
      builder.addAssistantMessage(
        '<think>Initial thought</think><|channel|>analysis<|message|>Detailed analysis<|start|>assistant<|channel|>final<|message|>Final response'
      )
      const result = builder.getMessages()
      expect(result[0].content).toBe('Final response')
    })
  })
  describe('integration tests', () => {
--- a/web-app/src/lib/messages.ts
+++ b/web-app/src/lib/messages.ts
@ -102,6 +102,15 @@ export class CompletionMessagesBuilder {
        content = content.slice(splitIndex).trim()
      }
    }
    if (content.includes('<|channel|>analysis<|message|>')) {
      const match = content.match(
        /<\|channel\|>analysis<\|message\|>([\s\S]*?)<\|start\|>assistant<\|channel\|>final<\|message\|>/
      )
      if (match?.index !== undefined) {
        const splitIndex = match.index + match[0].length
        content = content.slice(splitIndex).trim()
      }
    }
    return content
  }
 }
--- a/web-app/src/lib/predefined.ts
+++ b/web-app/src/lib/predefined.ts
@ -22,7 +22,6 @@ export const modelSettings = {
      type: 'number',
    },
  },
  temperature: {
    key: 'temperature',
    title: 'Temperature',
@ -121,4 +120,28 @@ export const modelSettings = {
      type: 'number',
    },
  },
  chatTemplate: {
    key: 'chat_template',
    title: 'Custom Jinja Chat template',
    description: 'Custom Jinja chat_template to be used for the model',
    controller_type: 'textarea',
    controller_props: {
      value: '',
      placeholder:
        'e.g., {% for message in messages %}...{% endfor %} (default is read from GGUF)',
      type: 'text',
      textAlign: 'right',
    },
  },
  override_tensor_buffer_t: {
    key: 'override_tensor_buffer_t',
    title: 'Override Tensor Buffer Type',
    description: 'Override the tensor buffer type for the model',
    controller_type: 'input',
    controller_props: {
      value: '',
      placeholder: 'e.g., layers\\.\\d+\\.ffn_.*=CPU',
      type: 'text',
    },
  },
 }
--- a/web-app/src/locales/en/common.json
+++ b/web-app/src/locales/en/common.json
@ -257,6 +257,10 @@
    "downloadCancelled": {
      "title": "Download Cancelled",
      "description": "The download process was cancelled"
    },
    "downloadFailed": {
      "title": "Download Failed",
      "description": "{{item}} download failed"
    }
  }
-}
+}
--- a/web-app/src/providers/DataProvider.tsx
+++ b/web-app/src/providers/DataProvider.tsx
@ -62,7 +62,12 @@ export function DataProvider() {
  // Check for app updates
  useEffect(() => {
-    checkForUpdate()
+    // Only check for updates if the auto updater is not disabled
    // App might be distributed via other package managers
    // or methods that handle updates differently
    if (!AUTO_UPDATER_DISABLED) {
      checkForUpdate()
    }
  }, [checkForUpdate])
  const handleDeepLink = (urls: string[] | null) => {
@ -79,7 +84,7 @@ export function DataProvider() {
      const resource = params.slice(1).join('/')
      // return { action, provider, resource }
      navigate({
-        to: route.hub.index,
+        to: route.hub.model,
        search: {
          repo: resource,
        },
--- a/web-app/src/routes/hub/$modelId.tsx
+++ b/web-app/src/routes/hub/$modelId.tsx
@ -1,5 +1,10 @@
 import HeaderPage from '@/containers/HeaderPage'
-import { createFileRoute, useParams, useNavigate } from '@tanstack/react-router'
+import {
  createFileRoute,
  useParams,
  useNavigate,
  useSearch,
 } from '@tanstack/react-router'
 import {
  IconArrowLeft,
  IconDownload,
@ -13,23 +18,38 @@ import { RenderMarkdown } from '@/containers/RenderMarkdown'
 import { useEffect, useMemo, useCallback, useState } from 'react'
 import { useModelProvider } from '@/hooks/useModelProvider'
 import { useDownloadStore } from '@/hooks/useDownloadStore'
-import { pullModel } from '@/services/models'
+import {
  CatalogModel,
  convertHfRepoToCatalogModel,
  fetchHuggingFaceRepo,
  pullModel,
 } from '@/services/models'
 import { Progress } from '@/components/ui/progress'
 import { Button } from '@/components/ui/button'
 import { cn } from '@/lib/utils'
 type SearchParams = {
  repo: string
 }
 export const Route = createFileRoute('/hub/$modelId')({
  component: HubModelDetail,
  validateSearch: (search: Record<string, unknown>): SearchParams => ({
    repo: search.repo as SearchParams['repo'],
  }),
 })
 function HubModelDetail() {
  const { modelId } = useParams({ from: Route.id })
  const navigate = useNavigate()
  const { sources, fetchSources } = useModelSources()
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  const search = useSearch({ from: Route.id as any })
  const { getProviderByName } = useModelProvider()
  const llamaProvider = getProviderByName('llamacpp')
  const { downloads, localDownloadingModels, addLocalDownloadingModel } =
    useDownloadStore()
  const [repoData, setRepoData] = useState<CatalogModel | undefined>()
  // State for README content
  const [readmeContent, setReadmeContent] = useState<string>('')
@ -39,10 +59,21 @@ function HubModelDetail() {
    fetchSources()
  }, [fetchSources])
  const fetchRepo = useCallback(async () => {
    const repoInfo = await fetchHuggingFaceRepo(search.repo || modelId)
    if (repoInfo) {
      const repoDetail = convertHfRepoToCatalogModel(repoInfo)
      setRepoData(repoDetail)
    }
  }, [modelId, search])
  useEffect(() => {
    fetchRepo()
  }, [modelId, fetchRepo])
  // Find the model data from sources
  const modelData = useMemo(() => {
-    return sources.find((model) => model.model_name === modelId)
+    return sources.find((model) => model.model_name === modelId) ?? repoData
-  }, [sources, modelId])
+  }, [sources, modelId, repoData])
  // Download processes
  const downloadProcesses = useMemo(
@ -116,7 +147,6 @@ function HubModelDetail() {
    })
  }, [modelData])
  // Fetch README content when modelData.readme is available
  useEffect(() => {
    if (modelData?.readme) {
--- a/web-app/src/routes/hub/index.tsx
+++ b/web-app/src/routes/hub/index.tsx
@ -31,7 +31,7 @@ import {
  CatalogModel,
  pullModel,
  fetchHuggingFaceRepo,
-  HuggingFaceRepo,
+  convertHfRepoToCatalogModel,
 } from '@/services/models'
 import { useDownloadStore } from '@/hooks/useDownloadStore'
 import { Progress } from '@/components/ui/progress'
@ -63,14 +63,16 @@ function Hub() {
    { value: 'newest', name: t('hub:sortNewest') },
    { value: 'most-downloaded', name: t('hub:sortMostDownloaded') },
  ]
-  const searchOptions = {
+  const searchOptions = useMemo(() => {
-    includeScore: true,
+    return {
-    // Search in `author` and in `tags` array
+      includeScore: true,
-    keys: ['model_name', 'quants.model_id'],
+      // Search in `author` and in `tags` array
-  }
+      keys: ['model_name', 'quants.model_id'],
    }
  }, [])
  const { sources, addSource, fetchSources, loading } = useModelSources()
-  const search = useSearch({ from: route.hub.index as any })
+
  const [searchValue, setSearchValue] = useState('')
  const [sortSelected, setSortSelected] = useState('newest')
  const [expandedModels, setExpandedModels] = useState<Record<string, boolean>>(
@ -92,48 +94,6 @@ function Hub() {
  const { getProviderByName } = useModelProvider()
  const llamaProvider = getProviderByName('llamacpp')
  // Convert HuggingFace repository to CatalogModel format
  const convertHfRepoToCatalogModel = useCallback(
    (repo: HuggingFaceRepo): CatalogModel => {
      // Extract GGUF files from the repository siblings
      const ggufFiles =
        repo.siblings?.filter((file) =>
          file.rfilename.toLowerCase().endsWith('.gguf')
        ) || []
      // Convert GGUF files to quants format
      const quants = ggufFiles.map((file) => {
        // Format file size
        const formatFileSize = (size?: number) => {
          if (!size) return 'Unknown size'
          if (size < 1024 ** 3) return `${(size / 1024 ** 2).toFixed(1)} MB`
          return `${(size / 1024 ** 3).toFixed(1)} GB`
        }
        // Generate model_id from filename (remove .gguf extension, case-insensitive)
        const modelId = file.rfilename.replace(/\.gguf$/i, '')
        return {
          model_id: modelId,
          path: `https://huggingface.co/${repo.modelId}/resolve/main/${file.rfilename}`,
          file_size: formatFileSize(file.size),
        }
      })
      return {
        model_name: repo.modelId,
        description: `**Metadata:** ${repo.pipeline_tag}\n\n **Tags**: ${repo.tags?.join(', ')}`,
        developer: repo.author,
        downloads: repo.downloads || 0,
        num_quants: quants.length,
        quants: quants,
        created_at: repo.created_at,
        readme: `https://huggingface.co/${repo.modelId}/resolve/main/README.md`,
      }
    },
    []
  )
  const toggleModelExpansion = (modelId: string) => {
    setExpandedModels((prev) => ({
      ...prev,
@ -141,35 +101,6 @@ function Hub() {
    }))
  }
  useEffect(() => {
    if (search.repo) {
      setSearchValue(search.repo || '')
      setIsSearching(true)
      addModelSourceTimeoutRef.current = setTimeout(async () => {
        try {
          // Fetch HuggingFace repository information
          const repoInfo = await fetchHuggingFaceRepo(search.repo)
          if (repoInfo) {
            const catalogModel = convertHfRepoToCatalogModel(repoInfo)
            if (
              !sources.some((s) => s.model_name === catalogModel.model_name)
            ) {
              setHuggingFaceRepo(catalogModel)
              addSource(catalogModel)
            }
          }
          await fetchSources()
        } catch (error) {
          console.error('Error fetching repository info:', error)
        } finally {
          setIsSearching(false)
        }
      }, 500)
    }
  }, [convertHfRepoToCatalogModel, fetchSources, addSource, search, sources])
  // Sorting functionality
  const sortedModels = useMemo(() => {
    return [...sources].sort((a, b) => {
@ -264,9 +195,6 @@ function Hub() {
              addSource(catalogModel)
            }
          }
          // Original addSource logic (if needed)
          await fetchSources()
        } catch (error) {
          console.error('Error fetching repository info:', error)
        } finally {
--- a/web-app/src/routes/settings/mcp-servers.tsx
+++ b/web-app/src/routes/settings/mcp-servers.tsx
@ -83,11 +83,7 @@ function MCPServers() {
  }
  const handleSaveServer = async (name: string, config: MCPServerConfig) => {
-    try {
+    toggleServer(name, false)
      await toggleServer(name, false)
    } catch (error) {
      console.error('Error deactivating server:', error)
    }
    if (editingKey) {
      // If server name changed, delete old one and add new one
      if (editingKey !== name) {
@ -102,7 +98,7 @@ function MCPServers() {
    }
    syncServers()
-    await toggleServer(name, true)
+    toggleServer(name, true)
  }
  const handleEdit = (serverKey: string) => {
@ -147,25 +143,26 @@ function MCPServers() {
  ) => {
    if (jsonServerName) {
      try {
-        await toggleServer(jsonServerName, false)
+        toggleServer(jsonServerName, false)
      } catch (error) {
        console.error('Error deactivating server:', error)
      }
      // Save single server
      editServer(jsonServerName, data as MCPServerConfig)
-      syncServers()
+      toggleServer(jsonServerName, (data as MCPServerConfig).active || false)
      toggleServer(jsonServerName, true)
    } else {
      // Save all servers
      // Clear existing servers first
-      Object.keys(mcpServers).forEach((key) => {
+      Object.keys(mcpServers).forEach((serverKey) => {
-        deleteServer(key)
+        toggleServer(serverKey, false)
        deleteServer(serverKey)
      })
      // Add all servers from the JSON
      Object.entries(data as Record<string, MCPServerConfig>).forEach(
        ([key, config]) => {
          addServer(key, config)
          toggleServer(key, config.active || false)
        }
      )
    }
--- a/web-app/src/routes/threads/$threadId.tsx
+++ b/web-app/src/routes/threads/$threadId.tsx
@ -4,8 +4,7 @@ import { UIEventHandler } from 'react'
 import debounce from 'lodash.debounce'
 import cloneDeep from 'lodash.clonedeep'
 import { cn } from '@/lib/utils'
-import { ArrowDown } from 'lucide-react'
+import { ArrowDown, Play } from 'lucide-react'
 import { Play } from 'lucide-react'
 import HeaderPage from '@/containers/HeaderPage'
 import { useThreads } from '@/hooks/useThreads'
@ -328,7 +327,7 @@ function ThreadDetail() {
          >
            {showScrollToBottomBtn && (
              <div
-                className="bg-main-view-fg/10 px-4 border border-main-view-fg/5 flex items-center justify-center rounded-xl gap-x-2 cursor-pointer pointer-events-auto"
+                className="bg-main-view-fg/10 px-2 border border-main-view-fg/5 flex items-center justify-center rounded-xl gap-x-2 cursor-pointer pointer-events-auto"
                onClick={() => {
                  scrollToBottom(true)
                  setIsUserScrolling(false)
@ -340,7 +339,7 @@ function ThreadDetail() {
            )}
            {showGenerateAIResponseBtn && (
              <div
-                className="bg-main-view-fg/10 px-4 border border-main-view-fg/5 flex items-center justify-center rounded-xl gap-x-2 cursor-pointer pointer-events-auto"
+                className="mx-2 bg-main-view-fg/10 px-2 border border-main-view-fg/5 flex items-center justify-center rounded-xl gap-x-2 cursor-pointer pointer-events-auto"
                onClick={generateAIResponse}
              >
                <p className="text-xs">{t('common:generateAiResponse')}</p>
--- a/web-app/src/services/tests/models.test.ts
+++ b/web-app/src/services/tests/models.test.ts
@ -4,6 +4,7 @@ import {
  fetchModels,
  fetchModelCatalog,
  fetchHuggingFaceRepo,
  convertHfRepoToCatalogModel,
  updateModel,
  pullModel,
  abortDownload,
@ -12,6 +13,8 @@ import {
  stopModel,
  stopAllModels,
  startModel,
  HuggingFaceRepo,
  CatalogModel,
 } from '../models'
 import { EngineManager, Model } from '@janhq/core'
@ -334,7 +337,9 @@ describe('models service', () => {
      })
      // Test with full URL
-      await fetchHuggingFaceRepo('https://huggingface.co/microsoft/DialoGPT-medium')
+      await fetchHuggingFaceRepo(
        'https://huggingface.co/microsoft/DialoGPT-medium'
      )
      expect(fetch).toHaveBeenCalledWith(
        'https://huggingface.co/api/models/microsoft/DialoGPT-medium?blobs=true'
      )
@ -380,7 +385,7 @@ describe('models service', () => {
    it('should handle other HTTP errors', async () => {
      const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {})
-      
+
      ;(fetch as any).mockResolvedValue({
        ok: false,
        status: 500,
@ -394,13 +399,13 @@ describe('models service', () => {
        'Error fetching HuggingFace repository:',
        expect.any(Error)
      )
-      
+
      consoleSpy.mockRestore()
    })
    it('should handle network errors', async () => {
      const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {})
-      
+
      ;(fetch as any).mockRejectedValue(new Error('Network error'))
      const result = await fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
@ -410,7 +415,7 @@ describe('models service', () => {
        'Error fetching HuggingFace repository:',
        expect.any(Error)
      )
-      
+
      consoleSpy.mockRestore()
    })
@ -524,7 +529,303 @@ describe('models service', () => {
      expect(result).toEqual(mockRepoData)
      // Verify the GGUF file is present in siblings
-      expect(result?.siblings?.some(s => s.rfilename.endsWith('.gguf'))).toBe(true)
+      expect(result?.siblings?.some((s) => s.rfilename.endsWith('.gguf'))).toBe(
        true
      )
    })
  })
  describe('convertHfRepoToCatalogModel', () => {
    const mockHuggingFaceRepo: HuggingFaceRepo = {
      id: 'microsoft/DialoGPT-medium',
      modelId: 'microsoft/DialoGPT-medium',
      sha: 'abc123',
      downloads: 1500,
      likes: 75,
      tags: ['pytorch', 'transformers', 'text-generation'],
      pipeline_tag: 'text-generation',
      created_at: '2021-01-01T00:00:00Z',
      last_modified: '2021-12-01T00:00:00Z',
      private: false,
      disabled: false,
      gated: false,
      author: 'microsoft',
      siblings: [
        {
          rfilename: 'model-q4_0.gguf',
          size: 2 * 1024 * 1024 * 1024, // 2GB
          blobId: 'blob123',
        },
        {
          rfilename: 'model-q8_0.GGUF', // Test case-insensitive matching
          size: 4 * 1024 * 1024 * 1024, // 4GB
          blobId: 'blob456',
        },
        {
          rfilename: 'tokenizer.json', // Non-GGUF file (should be filtered out)
          size: 1024 * 1024, // 1MB
          blobId: 'blob789',
        },
      ],
    }
    it('should convert HuggingFace repo to catalog model format', () => {
      const result = convertHfRepoToCatalogModel(mockHuggingFaceRepo)
      const expected: CatalogModel = {
        model_name: 'microsoft/DialoGPT-medium',
        description: '**Tags**: pytorch, transformers, text-generation',
        developer: 'microsoft',
        downloads: 1500,
        num_quants: 2,
        quants: [
          {
            model_id: 'model-q4_0',
            path: 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q4_0.gguf',
            file_size: '2.0 GB',
          },
          {
            model_id: 'model-q8_0',
            path: 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q8_0.GGUF',
            file_size: '4.0 GB',
          },
        ],
        created_at: '2021-01-01T00:00:00Z',
        readme:
          'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/README.md',
      }
      expect(result).toEqual(expected)
    })
    it('should handle repository with no GGUF files', () => {
      const repoWithoutGGUF: HuggingFaceRepo = {
        ...mockHuggingFaceRepo,
        siblings: [
          {
            rfilename: 'tokenizer.json',
            size: 1024 * 1024,
            blobId: 'blob789',
          },
          {
            rfilename: 'config.json',
            size: 2048,
            blobId: 'blob101',
          },
        ],
      }
      const result = convertHfRepoToCatalogModel(repoWithoutGGUF)
      expect(result.num_quants).toBe(0)
      expect(result.quants).toEqual([])
    })
    it('should handle repository with no siblings', () => {
      const repoWithoutSiblings: HuggingFaceRepo = {
        ...mockHuggingFaceRepo,
        siblings: undefined,
      }
      const result = convertHfRepoToCatalogModel(repoWithoutSiblings)
      expect(result.num_quants).toBe(0)
      expect(result.quants).toEqual([])
    })
    it('should format file sizes correctly', () => {
      const repoWithVariousFileSizes: HuggingFaceRepo = {
        ...mockHuggingFaceRepo,
        siblings: [
          {
            rfilename: 'small-model.gguf',
            size: 500 * 1024 * 1024, // 500MB
            blobId: 'blob1',
          },
          {
            rfilename: 'large-model.gguf',
            size: 3.5 * 1024 * 1024 * 1024, // 3.5GB
            blobId: 'blob2',
          },
          {
            rfilename: 'unknown-size.gguf',
            // No size property
            blobId: 'blob3',
          },
        ],
      }
      const result = convertHfRepoToCatalogModel(repoWithVariousFileSizes)
      expect(result.quants[0].file_size).toBe('500.0 MB')
      expect(result.quants[1].file_size).toBe('3.5 GB')
      expect(result.quants[2].file_size).toBe('Unknown size')
    })
    it('should handle empty or undefined tags', () => {
      const repoWithEmptyTags: HuggingFaceRepo = {
        ...mockHuggingFaceRepo,
        tags: [],
      }
      const result = convertHfRepoToCatalogModel(repoWithEmptyTags)
      expect(result.description).toBe('**Tags**: ')
    })
    it('should handle missing downloads count', () => {
      const repoWithoutDownloads: HuggingFaceRepo = {
        ...mockHuggingFaceRepo,
        downloads: undefined as any,
      }
      const result = convertHfRepoToCatalogModel(repoWithoutDownloads)
      expect(result.downloads).toBe(0)
    })
    it('should correctly remove .gguf extension from model IDs', () => {
      const repoWithVariousGGUF: HuggingFaceRepo = {
        ...mockHuggingFaceRepo,
        siblings: [
          {
            rfilename: 'model.gguf',
            size: 1024,
            blobId: 'blob1',
          },
          {
            rfilename: 'MODEL.GGUF',
            size: 1024,
            blobId: 'blob2',
          },
          {
            rfilename: 'complex-model-name.gguf',
            size: 1024,
            blobId: 'blob3',
          },
        ],
      }
      const result = convertHfRepoToCatalogModel(repoWithVariousGGUF)
      expect(result.quants[0].model_id).toBe('model')
      expect(result.quants[1].model_id).toBe('MODEL')
      expect(result.quants[2].model_id).toBe('complex-model-name')
    })
    it('should generate correct download paths', () => {
      const result = convertHfRepoToCatalogModel(mockHuggingFaceRepo)
      expect(result.quants[0].path).toBe(
        'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q4_0.gguf'
      )
      expect(result.quants[1].path).toBe(
        'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q8_0.GGUF'
      )
    })
    it('should generate correct readme URL', () => {
      const result = convertHfRepoToCatalogModel(mockHuggingFaceRepo)
      expect(result.readme).toBe(
        'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/README.md'
      )
    })
    it('should handle GGUF files with case-insensitive extension matching', () => {
      const repoWithMixedCase: HuggingFaceRepo = {
        ...mockHuggingFaceRepo,
        siblings: [
          {
            rfilename: 'model-1.gguf',
            size: 1024,
            blobId: 'blob1',
          },
          {
            rfilename: 'model-2.GGUF',
            size: 1024,
            blobId: 'blob2',
          },
          {
            rfilename: 'model-3.GgUf',
            size: 1024,
            blobId: 'blob3',
          },
          {
            rfilename: 'not-a-model.txt',
            size: 1024,
            blobId: 'blob4',
          },
        ],
      }
      const result = convertHfRepoToCatalogModel(repoWithMixedCase)
      expect(result.num_quants).toBe(3)
      expect(result.quants).toHaveLength(3)
      expect(result.quants[0].model_id).toBe('model-1')
      expect(result.quants[1].model_id).toBe('model-2')
      expect(result.quants[2].model_id).toBe('model-3')
    })
    it('should handle edge cases with file size formatting', () => {
      const repoWithEdgeCases: HuggingFaceRepo = {
        ...mockHuggingFaceRepo,
        siblings: [
          {
            rfilename: 'tiny.gguf',
            size: 512, // < 1MB
            blobId: 'blob1',
          },
          {
            rfilename: 'exactly-1gb.gguf',
            size: 1024 * 1024 * 1024, // Exactly 1GB
            blobId: 'blob2',
          },
          {
            rfilename: 'zero-size.gguf',
            size: 0,
            blobId: 'blob3',
          },
        ],
      }
      const result = convertHfRepoToCatalogModel(repoWithEdgeCases)
      expect(result.quants[0].file_size).toBe('0.0 MB')
      expect(result.quants[1].file_size).toBe('1.0 GB')
      expect(result.quants[2].file_size).toBe('Unknown size') // 0 is falsy, so it returns 'Unknown size'
    })
    it('should handle missing optional fields gracefully', () => {
      const minimalRepo: HuggingFaceRepo = {
        id: 'minimal/repo',
        modelId: 'minimal/repo',
        sha: 'abc123',
        downloads: 0,
        likes: 0,
        tags: [],
        created_at: '2021-01-01T00:00:00Z',
        last_modified: '2021-12-01T00:00:00Z',
        private: false,
        disabled: false,
        gated: false,
        author: 'minimal',
        siblings: [
          {
            rfilename: 'model.gguf',
            blobId: 'blob1',
          },
        ],
      }
      const result = convertHfRepoToCatalogModel(minimalRepo)
      expect(result.model_name).toBe('minimal/repo')
      expect(result.developer).toBe('minimal')
      expect(result.downloads).toBe(0)
      expect(result.description).toBe('**Tags**: ')
      expect(result.quants[0].file_size).toBe('Unknown size')
    })
  })
 })
--- a/web-app/src/services/models.ts
+++ b/web-app/src/services/models.ts
@ -134,6 +134,47 @@ export const fetchHuggingFaceRepo = async (
  }
 }
 // Convert HuggingFace repository to CatalogModel format
 export const convertHfRepoToCatalogModel = (
  repo: HuggingFaceRepo
 ): CatalogModel => {
  // Extract GGUF files from the repository siblings
  const ggufFiles =
    repo.siblings?.filter((file) =>
      file.rfilename.toLowerCase().endsWith('.gguf')
    ) || []
  // Convert GGUF files to quants format
  const quants = ggufFiles.map((file) => {
    // Format file size
    const formatFileSize = (size?: number) => {
      if (!size) return 'Unknown size'
      if (size < 1024 ** 3) return `${(size / 1024 ** 2).toFixed(1)} MB`
      return `${(size / 1024 ** 3).toFixed(1)} GB`
    }
    // Generate model_id from filename (remove .gguf extension, case-insensitive)
    const modelId = file.rfilename.replace(/\.gguf$/i, '')
    return {
      model_id: modelId,
      path: `https://huggingface.co/${repo.modelId}/resolve/main/${file.rfilename}`,
      file_size: formatFileSize(file.size),
    }
  })
  return {
    model_name: repo.modelId,
    description: `**Tags**: ${repo.tags?.join(', ')}`,
    developer: repo.author,
    downloads: repo.downloads || 0,
    num_quants: quants.length,
    quants: quants,
    created_at: repo.created_at,
    readme: `https://huggingface.co/${repo.modelId}/resolve/main/README.md`,
  }
 }
 /**
 * Updates a model.
 * @param model The model to update.
--- a/web-app/src/types/global.d.ts
+++ b/web-app/src/types/global.d.ts
@ -19,6 +19,7 @@ declare global {
  declare const POSTHOG_KEY: string
  declare const POSTHOG_HOST: string
  declare const MODEL_CATALOG_URL: string
  declare const AUTO_UPDATER_DISABLED: boolean
  interface Window {
    core: AppCore | undefined
  }
--- a/web-app/vite.config.ts
+++ b/web-app/vite.config.ts
@ -33,19 +33,19 @@ export default defineConfig(({ mode }) => {
    define: {
      IS_TAURI: JSON.stringify(process.env.IS_TAURI),
      IS_MACOS: JSON.stringify(
-        process.env.TAURI_ENV_PLATFORM?.includes('darwin') ?? 'false'
+        process.env.TAURI_ENV_PLATFORM?.includes('darwin') ?? false
      ),
      IS_WINDOWS: JSON.stringify(
-        process.env.TAURI_ENV_PLATFORM?.includes('windows') ?? 'false'
+        process.env.TAURI_ENV_PLATFORM?.includes('windows') ?? false
      ),
      IS_LINUX: JSON.stringify(
-        process.env.TAURI_ENV_PLATFORM?.includes('linux') ?? 'false'
+        process.env.TAURI_ENV_PLATFORM?.includes('linux') ?? false
      ),
      IS_IOS: JSON.stringify(
-        process.env.TAURI_ENV_PLATFORM?.includes('ios') ?? 'false'
+        process.env.TAURI_ENV_PLATFORM?.includes('ios') ?? false
      ),
      IS_ANDROID: JSON.stringify(
-        process.env.TAURI_ENV_PLATFORM?.includes('android') ?? 'false'
+        process.env.TAURI_ENV_PLATFORM?.includes('android') ?? false
      ),
      PLATFORM: JSON.stringify(process.env.TAURI_ENV_PLATFORM),
@ -56,6 +56,9 @@ export default defineConfig(({ mode }) => {
      MODEL_CATALOG_URL: JSON.stringify(
        'https://raw.githubusercontent.com/menloresearch/model-catalog/main/model_catalog.json'
      ),
      AUTO_UPDATER_DISABLED: JSON.stringify(
        env.AUTO_UPDATER_DISABLED === 'true'
      ),
    },
    // Vite options tailored for Tauri development and only applied in `tauri dev` or `tauri build`
--- a/web-app/vitest.config.ts
+++ b/web-app/vitest.config.ts
@ -38,5 +38,6 @@ export default defineConfig({
    VERSION: JSON.stringify('test'),
    POSTHOG_KEY: JSON.stringify(''),
    POSTHOG_HOST: JSON.stringify(''),
    AUTO_UPDATER_DISABLED: JSON.stringify('false'),
  },
 })
--- a/website/.gitignore
+++ b/website/.gitignore
@ -0,0 +1,21 @@
 # build output
 dist/
 # generated types
 .astro/
 # dependencies
 node_modules/
 # logs
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
 pnpm-debug.log*
 # environment variables
 .env
 .env.production
 # macOS-specific files
 .DS_Store
--- a/website/README.md
+++ b/website/README.md
@ -0,0 +1,28 @@
 # Jan's Website
 This website is [built with Starlight](https://starlight.astro.build)
 Starlight looks for `.md` or `.mdx` files in the `src/content/docs/` directory. Each file is exposed
 as a route based on its file name.
 Images can be added to `src/assets/` and embedded in Markdown with a relative link.
 Static assets, like favicons, can be placed in the `public/` directory.
 If you want to add new pages, these can go in the `src/pages/` directory. Because of the topics plugin
 we are using ([starlight sidebar topics](https://starlight-sidebar-topics.netlify.app/docs/guides/excluded-pages/))
 you will need to exclude them from the sidebar by adding them to the exclude list in `astro.config.mjs`, e.g., `exclude: ['/example'],`.
 ## 🧞 Commands
 All commands are run from the root of the project, from a terminal:
 | Command                   | Action                                           |
 | :------------------------ | :----------------------------------------------- |
 | `bun install`             | Installs dependencies                            |
 | `bun dev`             | Starts local dev server at `localhost:4321`      |
 | `bun build`           | Build your production site to `./dist/`          |
 | `bun preview`         | Preview your build locally, before deploying     |
 | `bun astro ...`       | Run CLI commands like `astro add`, `astro check` |
 | `bun astro -- --help` | Get help using the Astro CLI                     |
--- a/website/astro.config.mjs
+++ b/website/astro.config.mjs
@ -0,0 +1,191 @@
 // @ts-check
 import { defineConfig } from 'astro/config'
 import starlight from '@astrojs/starlight'
 import starlightThemeRapide from 'starlight-theme-rapide'
 import starlightSidebarTopics from 'starlight-sidebar-topics'
 import mermaid from 'astro-mermaid'
 // https://astro.build/config
 export default defineConfig({
  // Deploy to the new v2 subdomain
  site: 'https://v2.jan.ai',
  // No 'base' property is needed, as this will be deployed to the root of the subdomain.
  integrations: [
    mermaid({
      theme: 'default',
      autoTheme: true,
    }),
    starlight({
      title: '👋 Jan',
      favicon: 'jan2.png',
      plugins: [
        starlightThemeRapide(),
        starlightSidebarTopics(
          [
            {
              label: 'Jan Desktop',
              link: '/',
              icon: 'rocket',
              items: [
                {
                  label: 'HOW TO',
                  items: [
                    {
                      label: 'Install 👋 Jan',
                      collapsed: false,
                      autogenerate: { directory: 'jan/installation' },
                    },
                    { label: 'Start Chatting', slug: 'jan/threads' },
                    {
                      label: 'Use Jan Models',
                      collapsed: true,
                      autogenerate: { directory: 'jan/jan-models' },
                    },
                    { label: 'Assistants', slug: 'jan/assistants' },
                  ],
                },
                {
                  label: 'Cloud Providers',
                  items: [
                    { label: 'Anthropic', slug: 'jan/remote-models/anthropic' },
                    { label: 'OpenAI', slug: 'jan/remote-models/openai' },
                    { label: 'Gemini', slug: 'jan/remote-models/google' },
                    {
                      label: 'OpenRouter',
                      slug: 'jan/remote-models/openrouter',
                    },
                    { label: 'Cohere', slug: 'jan/remote-models/cohere' },
                    { label: 'Mistral', slug: 'jan/remote-models/mistralai' },
                    { label: 'Groq', slug: 'jan/remote-models/groq' },
                  ],
                },
                {
                  label: 'EXPLANATION',
                  items: [
                    {
                      label: 'Local AI Engine',
                      slug: 'jan/explanation/llama-cpp',
                    },
                    {
                      label: 'Model Parameters',
                      slug: 'jan/explanation/model-parameters',
                    },
                  ],
                },
                {
                  label: 'ADVANCED',
                  items: [
                    { label: 'Manage Models', slug: 'jan/manage-models' },
                    { label: 'Model Context Protocol', slug: 'jan/mcp' },
                    {
                      label: 'MCP Examples',
                      collapsed: true,
                      items: [
                        {
                          label: 'Browser Control (Browserbase)',
                          slug: 'jan/mcp-examples/browser/browserbase',
                        },
                        {
                          label: 'Code Sandbox (E2B)',
                          slug: 'jan/mcp-examples/data-analysis/e2b',
                        },
                        {
                          label: 'Design Creation (Canva)',
                          slug: 'jan/mcp-examples/design/canva',
                        },
                        {
                          label: 'Deep Research (Octagon)',
                          slug: 'jan/mcp-examples/deepresearch/octagon',
                        },
                        {
                          label: 'Web Search with Exa',
                          slug: 'jan/mcp-examples/search/exa',
                        },
                      ],
                    },
                  ],
                },
                {
                  label: 'Local Server',
                  items: [
                    { label: 'Introduction', link: '/local-server/' },
                    { label: 'Server Setup', slug: 'local-server/api-server' },
                    {
                      label: 'Jan Data Folder',
                      slug: 'local-server/data-folder',
                    },
                    { label: 'Server Settings', slug: 'local-server/settings' },
                    {
                      label: 'Llama.cpp Server',
                      slug: 'local-server/llama-cpp',
                    },
                    {
                      label: 'Server Troubleshooting',
                      slug: 'local-server/troubleshooting',
                    },
                    {
                      label: 'Integrations',
                      collapsed: true,
                      autogenerate: { directory: 'local-server/integrations' },
                    },
                  ],
                },
                {
                  label: 'REFERENCE',
                  items: [
                    { label: 'Settings', slug: 'jan/settings' },
                    { label: 'Jan Data Folder', slug: 'jan/data-folder' },
                    { label: 'Troubleshooting', slug: 'jan/troubleshooting' },
                    { label: 'Privacy Policy', slug: 'jan/privacy' },
                  ],
                },
              ],
            },
            {
              label: 'Jan Mobile',
              link: '/mobile/',
              badge: { text: 'Coming Soon', variant: 'caution' },
              icon: 'phone',
              items: [{ label: 'Overview', slug: 'mobile' }],
            },
            {
              label: 'Jan Server',
              link: '/server/',
              badge: { text: 'Coming Soon', variant: 'caution' },
              icon: 'forward-slash',
              items: [{ label: 'Overview', slug: 'server' }],
            },
          ],
          {
            exclude: [
              '/prods',
              '/api-reference',
              '/products',
              '/products/**/*',
            ],
          }
        ),
      ],
      social: [
        {
          icon: 'github',
          label: 'GitHub',
          href: 'https://github.com/menloresearch/jan',
        },
        {
          icon: 'x.com',
          label: 'X',
          href: 'https://twitter.com/jandotai',
        },
        {
          icon: 'discord',
          label: 'Discord',
          href: 'https://discord.com/invite/FTk2MvZwJH',
        },
      ],
      components: {
        Header: './src/components/CustomNav.astro',
      },
    }),
  ],
 })
--- a/website/bun.lock
+++ b/website/bun.lock
--- a/website/package.json
+++ b/website/package.json
@ -0,0 +1,27 @@
 {
  "name": "website",
  "type": "module",
  "version": "0.0.1",
  "scripts": {
    "dev": "astro dev",
    "start": "astro dev",
    "build": "astro build",
    "preview": "astro preview",
    "astro": "astro"
  },
  "dependencies": {
    "@astrojs/starlight": "^0.35.1",
    "@lorenzo_lewis/starlight-utils": "^0.3.2",
    "astro": "^5.6.1",
    "astro-mermaid": "^1.0.4",
    "gsap": "^3.13.0",
    "mermaid": "^11.9.0",
    "phosphor-astro": "^2.1.0",
    "sharp": "^0.34.3",
    "starlight-openapi": "^0.19.1",
    "starlight-sidebar-topics": "^0.6.0",
    "starlight-theme-rapide": "^0.5.1",
    "starlight-videos": "^0.3.0"
  },
  "packageManager": "yarn@1.22.22"
 }
--- a/website/public/favicon.svg
+++ b/website/public/favicon.svg
@ -0,0 +1 @@
 <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 128 128"><path fill-rule="evenodd" d="M81 36 64 0 47 36l-1 2-9-10a6 6 0 0 0-9 9l10 10h-2L0 64l36 17h2L28 91a6 6 0 1 0 9 9l9-10 1 2 17 36 17-36v-2l9 10a6 6 0 1 0 9-9l-9-9 2-1 36-17-36-17-2-1 9-9a6 6 0 1 0-9-9l-9 10v-2Zm-17 2-2 5c-4 8-11 15-19 19l-5 2 5 2c8 4 15 11 19 19l2 5 2-5c4-8 11-15 19-19l5-2-5-2c-8-4-15-11-19-19l-2-5Z" clip-rule="evenodd"/><path d="M118 19a6 6 0 0 0-9-9l-3 3a6 6 0 1 0 9 9l3-3Zm-96 4c-2 2-6 2-9 0l-3-3a6 6 0 1 1 9-9l3 3c3 2 3 6 0 9Zm0 82c-2-2-6-2-9 0l-3 3a6 6 0 1 0 9 9l3-3c3-2 3-6 0-9Zm96 4a6 6 0 0 1-9 9l-3-3a6 6 0 1 1 9-9l3 3Z"/><style>path{fill:#000}@media (prefers-color-scheme:dark){path{fill:#fff}}</style></svg>
--- a/website/public/gifs/jan-nano-demo.gif
+++ b/website/public/gifs/jan-nano-demo.gif
--- a/website/public/jan.png
+++ b/website/public/jan.png
--- a/website/public/jan2.png
+++ b/website/public/jan2.png
--- a/website/public/openapi/openapi.json
+++ b/website/public/openapi/openapi.json
--- a/website/public/videos/jan-nano-demo.mp4
+++ b/website/public/videos/jan-nano-demo.mp4
--- a/website/src/assets/add_assistant.png
+++ b/website/src/assets/add_assistant.png
--- a/website/src/assets/anthropic.png
+++ b/website/src/assets/anthropic.png
--- a/website/src/assets/api-server-logs.png
+++ b/website/src/assets/api-server-logs.png
--- a/website/src/assets/api-server-ui.png
+++ b/website/src/assets/api-server-ui.png
--- a/website/src/assets/api-server.png
+++ b/website/src/assets/api-server.png
--- a/website/src/assets/api-server2.png
+++ b/website/src/assets/api-server2.png
--- a/website/src/assets/assistant-add-dialog.png
+++ b/website/src/assets/assistant-add-dialog.png
--- a/website/src/assets/assistant-dropdown-updated.png
+++ b/website/src/assets/assistant-dropdown-updated.png
--- a/website/src/assets/assistant-dropdown.png
+++ b/website/src/assets/assistant-dropdown.png
--- a/website/src/assets/assistant-edit-dialog.png
+++ b/website/src/assets/assistant-edit-dialog.png
--- a/website/src/assets/assistants-ui-overview.png
+++ b/website/src/assets/assistants-ui-overview.png
--- a/website/src/assets/browserbase.png
+++ b/website/src/assets/browserbase.png
--- a/website/src/assets/browserbase2.png
+++ b/website/src/assets/browserbase2.png
--- a/website/src/assets/browserbase3.png
+++ b/website/src/assets/browserbase3.png
--- a/website/src/assets/browserbase4.png
+++ b/website/src/assets/browserbase4.png
--- a/website/src/assets/browserbase5.png
+++ b/website/src/assets/browserbase5.png
--- a/website/src/assets/browserbase6.png
+++ b/website/src/assets/browserbase6.png
--- a/website/src/assets/browserbase7.png
+++ b/website/src/assets/browserbase7.png
--- a/website/src/assets/canva.png
+++ b/website/src/assets/canva.png
--- a/website/src/assets/canva2.png
+++ b/website/src/assets/canva2.png
--- a/website/src/assets/canva3.png
+++ b/website/src/assets/canva3.png
--- a/website/src/assets/canva4.png
+++ b/website/src/assets/canva4.png
--- a/Show More
+++ b/Show More
		`@ -0,0 +1 @@`
							<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 128 128"><path fill-rule="evenodd" d="M81 36 64 0 47 36l-1 2-9-10a6 6 0 0 0-9 9l10 10h-2L0 64l36 17h2L28 91a6 6 0 1 0 9 9l9-10 1 2 17 36 17-36v-2l9 10a6 6 0 1 0 9-9l-9-9 2-1 36-17-36-17-2-1 9-9a6 6 0 1 0-9-9l-9 10v-2Zm-17 2-2 5c-4 8-11 15-19 19l-5 2 5 2c8 4 15 11 19 19l2 5 2-5c4-8 11-15 19-19l5-2-5-2c-8-4-15-11-19-19l-2-5Z" clip-rule="evenodd"/><path d="M118 19a6 6 0 0 0-9-9l-3 3a6 6 0 1 0 9 9l3-3Zm-96 4c-2 2-6 2-9 0l-3-3a6 6 0 1 1 9-9l3 3c3 2 3 6 0 9Zm0 82c-2-2-6-2-9 0l-3 3a6 6 0 1 0 9 9l3-3c3-2 3-6 0-9Zm96 4a6 6 0 0 1-9 9l-3-3a6 6 0 1 1 9-9l3 3Z"/><style>path{fill:#000}@media (prefers-color-scheme:dark){path{fill:#fff}}</style></svg>