Merge branch 'dev' into blog/add-deepresearch-piece

2025-08-07 16:34:22 +08:00 · 2025-08-07 16:34:22 +08:00 · d646856b7c
commit d646856b7c
parent e38d653f4d 6a699d8004
324 changed files with 24112 additions and 579 deletions
--- a/.github/workflows/jan-astro-docs.yml
+++ b/.github/workflows/jan-astro-docs.yml
@ -0,0 +1,98 @@
+name: Jan Astro Docs
+
+on:
+  push:
+    branches:
+      - dev
+    paths:
+      - 'website/**'
+      - '.github/workflows/jan-astro-docs.yml'
+  pull_request:
+    paths:
+      - 'website/**'
+      - '.github/workflows/jan-astro-docs.yml'
+    # Review gh actions docs if you want to further define triggers, paths, etc
+    # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on
+  workflow_dispatch:
+
+jobs:
+  deploy:
+    name: Deploy to CloudFlare Pages
+    env:
+      CLOUDFLARE_PROJECT_NAME: astro-docs
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      deployments: write
+      pull-requests: write
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-node@v3
+        with:
+          node-version: 20
+
+      - uses: oven-sh/setup-bun@v2
+
+      - name: Install jq
+        uses: dcarbone/install-jq-action@v2.0.1
+
+      - name: Fill env vars
+        continue-on-error: true
+        working-directory: website
+        run: |
+          env_example_file=".env.example"
+          touch .env
+          while IFS= read -r line || [[ -n "$line" ]]; do
+            if [[ "$line" == *"="* ]]; then
+              var_name=$(echo $line | cut -d '=' -f 1)
+              echo $var_name
+              var_value="$(jq -r --arg key "$var_name" '.[$key]' <<< "$SECRETS")"
+              echo "$var_name=$var_value" >> .env
+            fi
+          done < "$env_example_file"
+        env:
+          SECRETS: '${{ toJson(secrets) }}'
+
+      - name: Install dependencies
+        working-directory: website
+        run: bun install
+      - name: Build website
+        working-directory: website
+        run: bun run build
+
+      - name: copy redirects and headers
+        continue-on-error: true
+        working-directory: website
+        run: |
+          cp _redirects dist/_redirects
+          cp _headers dist/_headers
+
+      - name: Publish to Cloudflare Pages PR Preview and Staging
+        if: github.event_name == 'pull_request'
+        uses: cloudflare/pages-action@v1
+        with:
+          apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
+          accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
+          projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
+          directory: ./website/dist
+          # Optional: Enable this if you want to have GitHub Deployments triggered
+          gitHubToken: ${{ secrets.GITHUB_TOKEN }}
+        id: deployCloudflarePages
+
+      - uses: mshick/add-pr-comment@v2
+        if: github.event_name == 'pull_request'
+        with:
+          message: |
+            Preview URL Astro Docs: ${{ steps.deployCloudflarePages.outputs.url }}
+
+      - name: Publish to Cloudflare Pages Production
+        if: (github.event_name == 'push' && github.ref == 'refs/heads/dev') || (github.event_name == 'workflow_dispatch' && github.ref == 'refs/heads/dev')
+        uses: cloudflare/pages-action@v1
+        with:
+          apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
+          accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
+          projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
+          directory: ./website/dist
+          branch: main
+          # Optional: Enable this if you want to have GitHub Deployments triggered
+          gitHubToken: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/jan-docs-new-release.yaml
+++ b/.github/workflows/jan-docs-new-release.yaml
@ -1,63 +0,0 @@
-name: Deploy Docs on new release
-
-on:
-  release:
-    types:
-      - published
-      - edited
-      - released
-
-jobs:
-  deploy:
-    name: Deploy to CloudFlare Pages
-    env:
-      CLOUDFLARE_PROJECT_NAME: docs
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-      deployments: write
-      pull-requests: write
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          ref: dev
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 20
-
-      - name: Install jq      
-        uses: dcarbone/install-jq-action@v2.0.1
-
-      - name: Fill env vars
-        working-directory: docs
-        run: |
-          env_example_file=".env.example"
-          touch .env
-          while IFS= read -r line || [[ -n "$line" ]]; do
-            if [[ "$line" == *"="* ]]; then
-              var_name=$(echo $line | cut -d '=' -f 1)
-              echo $var_name
-              var_value="$(jq -r --arg key "$var_name" '.[$key]' <<< "$SECRETS")"
-              echo "$var_name=$var_value" >> .env
-            fi
-          done < "$env_example_file"
-        env:
-          SECRETS: '${{ toJson(secrets) }}'
-
-      - name: Install dependencies
-        working-directory: docs
-        run: yarn install
-      - name: Build website
-        working-directory: docs
-        run: export NODE_ENV=production && yarn build && cp _redirects out/_redirects && cp _headers out/_headers
-
-      - name: Publish to Cloudflare Pages Production
-        uses: cloudflare/pages-action@v1
-        with:
-          apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
-          accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
-          projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
-          directory: ./docs/out
-          branch: main
-          # Optional: Enable this if you want to have GitHub Deployments triggered
-          gitHubToken: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/jan-docs.yml
+++ b/.github/workflows/jan-docs.yml
@ -26,7 +26,7 @@ jobs:
      deployments: write
      pull-requests: write
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - uses: actions/setup-node@v3
        with:
          node-version: 20
--- a/.github/workflows/jan-linter-and-test.yml
+++ b/.github/workflows/jan-linter-and-test.yml
@ -1,4 +1,4 @@
-name: Test - Linter & Playwright
+name: Linter & Test
 on:
  workflow_dispatch:
  push:
--- a/.github/workflows/jan-tauri-build-nightly.yaml
+++ b/.github/workflows/jan-tauri-build-nightly.yaml
@ -20,6 +20,7 @@ on:
 jobs:
  set-public-provider:
    runs-on: ubuntu-latest
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
    outputs:
      public_provider: ${{ steps.set-public-provider.outputs.public_provider }}
      ref: ${{ steps.set-public-provider.outputs.ref }}
@ -47,11 +48,13 @@ jobs:
          fi
  # Job create Update app version based on latest release tag with build number and save to output
  get-update-version:
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
    uses: ./.github/workflows/template-get-update-version.yml

  build-macos:
    uses: ./.github/workflows/template-tauri-build-macos.yml
    needs: [get-update-version, set-public-provider]
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
    secrets: inherit
    with:
      ref: ${{ needs.set-public-provider.outputs.ref }}
@ -64,6 +67,7 @@ jobs:
    uses: ./.github/workflows/template-tauri-build-windows-x64.yml
    secrets: inherit
    needs: [get-update-version, set-public-provider]
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
    with:
      ref: ${{ needs.set-public-provider.outputs.ref }}
      public_provider: ${{ needs.set-public-provider.outputs.public_provider }}
@ -74,6 +78,7 @@ jobs:
    uses: ./.github/workflows/template-tauri-build-linux-x64.yml
    secrets: inherit
    needs: [get-update-version, set-public-provider]
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
    with:
      ref: ${{ needs.set-public-provider.outputs.ref }}
      public_provider: ${{ needs.set-public-provider.outputs.public_provider }}
@ -91,6 +96,7 @@ jobs:
        build-macos,
      ]
    runs-on: ubuntu-latest
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
    steps:
      - name: Getting the repo
        uses: actions/checkout@v3
@ -224,49 +230,3 @@ jobs:
          RUN_ID=${{ github.run_id }}
          COMMENT="This is the build for this pull request. You can download it from the Artifacts section here: [Build URL](https://github.com/${{ github.repository }}/actions/runs/${RUN_ID})."
          gh pr comment $PR_URL --body "$COMMENT"
-
-  # AutoQA trigger for S3 builds
-  trigger-autoqa-s3:
-    needs:
-      [
-        build-macos,
-        build-windows-x64,
-        build-linux-x64,
-        get-update-version,
-        set-public-provider,
-        sync-temp-to-latest,
-      ]
-    if: needs.set-public-provider.outputs.public_provider == 'aws-s3'
-    uses: ./.github/workflows/autoqa-template.yml
-    with:
-      jan_app_windows_source: 'https://delta.jan.ai/nightly/Jan-nightly_${{ needs.get-update-version.outputs.new_version }}_x64-setup.exe'
-      jan_app_ubuntu_source: 'https://delta.jan.ai/nightly/Jan-nightly_${{ needs.get-update-version.outputs.new_version }}_amd64.deb'
-      jan_app_macos_source: 'https://delta.jan.ai/nightly/Jan-nightly_${{ needs.get-update-version.outputs.new_version }}_universal.dmg'
-      is_nightly: true
-      source_type: 'url'
-    secrets:
-      RP_TOKEN: ${{ secrets.RP_TOKEN }}
-
-  # AutoQA trigger for artifact builds
-  trigger-autoqa-artifacts:
-    needs:
-      [
-        build-macos,
-        build-windows-x64,
-        build-linux-x64,
-        get-update-version,
-        set-public-provider,
-      ]
-    if: needs.set-public-provider.outputs.public_provider == 'none'
-    uses: ./.github/workflows/autoqa-template.yml
-    with:
-      jan_app_windows_source: '' # Not needed for artifacts
-      jan_app_ubuntu_source: '' # Not needed for artifacts
-      jan_app_macos_source: '' # Not needed for artifacts
-      is_nightly: true
-      source_type: 'local'
-      artifact_name_windows: 'jan-windows-${{ needs.get-update-version.outputs.new_version }}'
-      artifact_name_ubuntu: 'jan-linux-amd64-${{ needs.get-update-version.outputs.new_version }}-deb'
-      artifact_name_macos: 'jan-nightly-mac-universal-${{ needs.get-update-version.outputs.new_version }}.dmg'
-    secrets:
-      RP_TOKEN: ${{ secrets.RP_TOKEN }}
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@ -1,5 +0,0 @@
-{
-    "recommendations": [
-        "esbenp.prettier-vscode"
-    ]
-}
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -1,7 +0,0 @@
-{
-  "editor.defaultFormatter": "esbenp.prettier-vscode",
-  "editor.formatOnSave": true,
-  "[rust]": {
-    "editor.defaultFormatter": "rust-lang.rust-analyzer"
-  }
-}
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # Jan - Local AI Assistant

-![Jan banner](./JanBanner.png)
+![Jan AI](docs/src/pages/docs/_assets/jan-app.png)

 <p align="center">
  <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
@ -12,62 +12,50 @@
 </p>

 <p align="center">
-  <a href="https://jan.ai/docs/quickstart">Getting Started</a> 
-  - <a href="https://jan.ai/docs">Docs</a> 
-  - <a href="https://jan.ai/changelog">Changelog</a> 
-  - <a href="https://github.com/menloresearch/jan/issues">Bug reports</a> 
+  <a href="https://jan.ai/docs/quickstart">Getting Started</a>
+  - <a href="https://jan.ai/docs">Docs</a>
+  - <a href="https://jan.ai/changelog">Changelog</a>
+  - <a href="https://github.com/menloresearch/jan/issues">Bug reports</a>
  - <a href="https://discord.gg/AsJ8krTT3N">Discord</a>
 </p>

-Jan is a ChatGPT-alternative that runs 100% offline on your device. Our goal is to make it easy for a layperson to download and run LLMs and use AI with **full control** and **privacy**.
-
-**⚠️ Jan is in active development.**
+Jan is an AI assistant that can run 100% offline on your device. Download and run LLMs with
+**full control** and **privacy**.

 ## Installation

-Because clicking a button is still the easiest way to get started:
+The easiest way to get started is by downloading one of the following versions for your respective operating system:

 <table>
  <tr>
    <td><b>Platform</b></td>
    <td><b>Stable</b></td>
-    <td><b>Beta</b></td>
    <td><b>Nightly</b></td>
  </tr>
  <tr>
    <td><b>Windows</b></td>
    <td><a href='https://app.jan.ai/download/latest/win-x64'>jan.exe</a></td>
-    <td><a href='https://app.jan.ai/download/beta/win-x64'>jan.exe</a></td>
    <td><a href='https://app.jan.ai/download/nightly/win-x64'>jan.exe</a></td>
  </tr>
  <tr>
    <td><b>macOS</b></td>
    <td><a href='https://app.jan.ai/download/latest/mac-universal'>jan.dmg</a></td>
-    <td><a href='https://app.jan.ai/download/beta/mac-universal'>jan.dmg</a></td>
    <td><a href='https://app.jan.ai/download/nightly/mac-universal'>jan.dmg</a></td>
  </tr>
  <tr>
    <td><b>Linux (deb)</b></td>
    <td><a href='https://app.jan.ai/download/latest/linux-amd64-deb'>jan.deb</a></td>
-    <td><a href='https://app.jan.ai/download/beta/linux-amd64-deb'>jan.deb</a></td>
    <td><a href='https://app.jan.ai/download/nightly/linux-amd64-deb'>jan.deb</a></td>
  </tr>
  <tr>
    <td><b>Linux (AppImage)</b></td>
    <td><a href='https://app.jan.ai/download/latest/linux-amd64-appimage'>jan.AppImage</a></td>
-    <td><a href='https://app.jan.ai/download/beta/linux-amd64-appimage'>jan.AppImage</a></td>
    <td><a href='https://app.jan.ai/download/nightly/linux-amd64-appimage'>jan.AppImage</a></td>
  </tr>
 </table>

 Download from [jan.ai](https://jan.ai/) or [GitHub Releases](https://github.com/menloresearch/jan/releases).

-## Demo
-
-<video width="100%" controls>
-  <source src="./docs/public/assets/videos/enable-tool-call-for-models.mp4" type="video/mp4">
-  Your browser does not support the video tag.
-</video>

 ## Features

@ -149,13 +137,12 @@ For detailed compatibility, check our [installation guides](https://jan.ai/docs/

 ## Troubleshooting

-When things go sideways (they will):
+If things go sideways:

 1. Check our [troubleshooting docs](https://jan.ai/docs/troubleshooting)
 2. Copy your error logs and system specs
 3. Ask for help in our [Discord](https://discord.gg/FTk2MvZwJH) `#🆘|jan-help` channel

-We keep logs for 24 hours, so don't procrastinate on reporting issues.

 ## Contributing

@ -175,15 +162,6 @@ Contributions welcome. See [CONTRIBUTING.md](CONTRIBUTING.md) for the full spiel
 - **Jobs**: hr@jan.ai
 - **General Discussion**: [Discord](https://discord.gg/FTk2MvZwJH)

-## Trust & Safety
-
-**Friendly reminder**: We're not trying to scam you.
-
- We won't ask for personal information
- Jan is completely free (no premium version exists)
- We don't have a cryptocurrency or ICO
- We're bootstrapped and not seeking your investment (yet)
-
 ## License

 Apache 2.0 - Because sharing is caring.
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@ -132,6 +132,12 @@ export abstract class BaseExtension implements ExtensionType {
            setting.controllerProps.options = setting.controllerProps.options?.length
              ? setting.controllerProps.options
              : oldSettings.find((e: any) => e.key === setting.key)?.controllerProps?.options
+          if ('recommended' in setting.controllerProps) {
+            const oldRecommended = oldSettings.find((e: any) => e.key === setting.key)?.controllerProps?.recommended
+            if (oldRecommended !== undefined && oldRecommended !== "") {
+              setting.controllerProps.recommended = oldRecommended
+            }
+          }
        })
      }
      localStorage.setItem(this.name, JSON.stringify(settings))
--- a/core/src/types/setting/settingComponent.ts
+++ b/core/src/types/setting/settingComponent.ts
@ -60,4 +60,5 @@ export type DropdownComponentProps = {
  value: string
  type?: InputType
  options?: DropdownOption[]
+  recommended?: string
 }
--- a/docs/src/pages/docs/_assets/hf_hub.png
+++ b/docs/src/pages/docs/_assets/hf_hub.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_2.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_2.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_3.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_3.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_4.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_4.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_5.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_5.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_6.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_6.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_7.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_7.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_8.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_8.png
--- a/docs/src/pages/docs/_assets/hf_jan_nano_9.png
+++ b/docs/src/pages/docs/_assets/hf_jan_nano_9.png
--- a/docs/src/pages/docs/_assets/hf_jan_setup.png
+++ b/docs/src/pages/docs/_assets/hf_jan_setup.png
--- a/docs/src/pages/docs/_assets/hf_providers.png
+++ b/docs/src/pages/docs/_assets/hf_providers.png
--- a/docs/src/pages/docs/remote-models/_meta.json
+++ b/docs/src/pages/docs/remote-models/_meta.json
@ -26,5 +26,9 @@
  "openrouter": {
    "title": "OpenRouter",
    "href": "/docs/remote-models/openrouter"
+  },
+  "huggingface": {
+    "title": "Hugging Face",
+    "href": "/docs/remote-models/huggingface"
  }
 }
--- a/docs/src/pages/docs/remote-models/huggingface.mdx
+++ b/docs/src/pages/docs/remote-models/huggingface.mdx
@ -0,0 +1,152 @@
+---
+title: Hugging Face
+description: Learn how to integrate Hugging Face models with Jan using the Router or Inference Endpoints.
+keywords:
+  [
+    Hugging Face,
+    Jan,
+    Jan AI,
+    Hugging Face Router,
+    Hugging Face Inference Endpoints,
+    Hugging Face API,
+    Hugging Face Integration,
+    Hugging Face API Integration
+  ]
+---
+
+import { Callout, Steps } from 'nextra/components'
+import { Settings, Plus } from 'lucide-react'
+
+# Hugging Face
+
+Jan supports Hugging Face models through two methods: the new **HF Router** (recommended) and **Inference Endpoints**. Both methods require a Hugging Face token and **billing to be set up**.
+
+![HuggingFace Inference Providers](../_assets/hf_providers.png)
+
+## Option 1: HF Router (Recommended)
+
+The HF Router provides access to models from multiple providers (Replicate, Together AI, SambaNova, Fireworks, Cohere, and more) through a single endpoint.
+
+<Steps>
+
+### Step 1: Get Your HF Token
+
+Visit [Hugging Face Settings > Access Tokens](https://huggingface.co/settings/tokens) and create a token. Make sure you have billing set up on your account.
+
+### Step 2: Configure Jan
+
+1. Go to **Settings** > **Model Providers** > **HuggingFace**
+2. Enter your HF token
+3. Use this URL: `https://router.huggingface.co/v1`
+
+![Jan HF Setup](../_assets/hf_jan_setup.png)
+
+You can find out more about the HF Router [here](https://huggingface.co/docs/inference-providers/index).
+
+### Step 3: Start Using Models
+
+Jan comes with three HF Router models pre-configured. Select one and start chatting immediately.
+
+</Steps>
+
+<Callout type='info'>
+The HF Router automatically routes your requests to the best available provider for each model, giving you access to a wide variety of models without managing individual endpoints.
+</Callout>
+
+## Option 2: HF Inference Endpoints
+
+For more control over specific models and deployment configurations, you can use Hugging Face Inference Endpoints.
+
+<Steps>
+
+### Step 1: Navigate to the HuggingFace Model Hub
+
+Visit the [Hugging Face Model Hub](https://huggingface.co/models) (make sure you are logged in) and pick the model you want to use.
+
+![HuggingFace Model Hub](../_assets/hf_hub.png)
+
+### Step 2: Configure HF Inference Endpoint and Deploy
+
+After you have selected the model you want to use, click on the **Deploy** button and select a deployment method. We will select HF Inference Endpoints for this one.
+
+![HuggingFace Deployment](../_assets/hf_jan_nano.png)
+<br/>
+
+This will take you to the deployment set up page. For this example, we will leave the default settings as they are under the GPU tab and click on **Create Endpoint**.
+
+![HuggingFace Deployment](../_assets/hf_jan_nano_2.png)
+<br/>
+
+Once your endpoint is ready, test that it works on the **Test your endpoint** tab.
+
+![HuggingFace Deployment](../_assets/hf_jan_nano_3.png)
+<br/>
+
+If you get a response, you can click on **Copy** to copy the endpoint URL and API key.
+
+<Callout type='info'>
+  You will need to be logged into the HuggingFace Inference Endpoints and have a credit card on file to deploy a model.
+</Callout>
+
+### Step 3: Configure Jan
+
+If you do not have an API key you can create one under **Settings** > **Access Tokens** [here](https://huggingface.co/settings/tokens). Once you finish, copy the token and add it to Jan alongside your endpoint URL at **Settings** > **Model Providers** > **HuggingFace**.
+
+**3.1 HF Token**
+![Get Token](../_assets/hf_jan_nano_5.png)
+<br/>
+
+**3.2 HF Endpoint URL**
+![Endpoint URL](../_assets/hf_jan_nano_4.png)
+<br/>
+
+**3.3 Jan Settings**
+![Jan Settings](../_assets/hf_jan_nano_6.png)
+
+<Callout type='warning'>
+Make sure to add `/v1/` to the end of your endpoint URL. This is required by the OpenAI API.
+</Callout>
+
+**3.4 Add Model Details**
+![Add Model Details](../_assets/hf_jan_nano_7.png)
+
+### Step 4: Start Using the Model
+
+Now you can start using the model in any chat.
+
+![Start Using the Model](../_assets/hf_jan_nano_8.png)
+
+If you want to learn how to use Jan Nano with MCP, check out [the guide here](../jan-models/jan-nano-32).
+<br/>
+
+</Steps>
+
+## Available Hugging Face Models
+
+**Option 1 (HF Router):** Access to models from multiple providers as shown in the providers image above.
+
+**Option 2 (Inference Endpoints):** You can follow the steps above with a large amount of models on Hugging Face and bring them to Jan. Check out other models in the [Hugging Face Model Hub](https://huggingface.co/models).
+
+## Troubleshooting
+
+Common issues and solutions:
+
+**1. Started a chat but the model is not responding**
+- Verify your API_KEY/HF_TOKEN is correct and not expired
+- Ensure you have billing set up on your HF account
+- For Inference Endpoints: Ensure the model you're trying to use is running again since, after a while, they go idle so that you don't get charged when you are not using it
+
+![Model Running](../_assets/hf_jan_nano_9.png)
+
+**2. Connection Problems**
+- Check your internet connection
+- Verify Hugging Face's system status
+- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
+
+**3. Model Unavailable**
+- Confirm your API key has access to the model
+- Check if you're using the correct model ID
+- Verify your Hugging Face account has the necessary permissions
+
+Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the
+[Hugging Face's documentation](https://docs.huggingface.co/en/inference-endpoints/index).
--- a/docs/src/pages/platforms/_meta.json
+++ b/docs/src/pages/platforms/_meta.json
@ -1,9 +0,0 @@
-{
-  "-- Switcher": {
-    "type": "separator",
-    "title": "Switcher"
-  },
-  "index": {
-    "display": "hidden"
-  }
-}
--- a/docs/src/pages/platforms/index.mdx
+++ b/docs/src/pages/platforms/index.mdx
@ -1,87 +0,0 @@
---
-title: Coming Soon
-description: Exciting new features and platforms are on the way. Stay tuned for Jan Web, Jan Mobile, and our API Platform.
-keywords:
-  [
-    Jan,
-    Customizable Intelligence, LLM,
-    local AI,
-    privacy focus,
-    free and open source,
-    private and offline,
-    conversational AI,
-    no-subscription fee,
-    large language models,
-    coming soon,
-    Jan Web,
-    Jan Mobile,
-    API Platform,
-  ]
---
-
-import { Callout } from 'nextra/components'
-
-<div className="text-center py-12">
-  <div className="mb-8">
-    <h1 className="text-4xl font-bold bg-gradient-to-r from-blue-600 to-purple-600 bg-clip-text text-transparent mb-4 py-2">
-      🚀 Coming Soon
-    </h1>
-    <p className="text-xl text-gray-600 dark:text-gray-300 max-w-2xl mx-auto">
-      We're working on the next stage of Jan - making our local assistant more powerful and available in more platforms.
-    </p>
-  </div>
-
-  <div className="grid grid-cols-1 md:grid-cols-3 gap-6 max-w-4xl mx-auto mb-12">
-    <div className="p-6 border border-gray-200 dark:border-gray-700 rounded-lg bg-gradient-to-br from-blue-50 to-indigo-50 dark:from-blue-900/20 dark:to-indigo-900/20">
-      <div className="text-3xl mb-3">🌐</div>
-      <h3 className="text-lg font-semibold mb-2">Jan Web</h3>
-      <p className="text-sm text-gray-600 dark:text-gray-400">
-        Access Jan directly from your browser with our powerful web interface
-      </p>
-    </div>
-
-    <div className="p-6 border border-gray-200 dark:border-gray-700 rounded-lg bg-gradient-to-br from-green-50 to-emerald-50 dark:from-green-900/20 dark:to-emerald-900/20">
-      <div className="text-3xl mb-3">📱</div>
-      <h3 className="text-lg font-semibold mb-2">Jan Mobile</h3>
-      <p className="text-sm text-gray-600 dark:text-gray-400">
-        Take Jan on the go with our native mobile applications
-      </p>
-    </div>
-
-    <div className="p-6 border border-gray-200 dark:border-gray-700 rounded-lg bg-gradient-to-br from-purple-50 to-pink-50 dark:from-purple-900/20 dark:to-pink-900/20">
-      <div className="text-3xl mb-3">⚡</div>
-      <h3 className="text-lg font-semibold mb-2">Jan Server</h3>
-      <p className="text-sm text-gray-600 dark:text-gray-400">
-        Integrate Jan's capabilities into your applications with our API
-      </p>
-    </div>
-  </div>
-
-  <Callout type="info">
-    **Stay Updated**: Follow our [GitHub repository](https://github.com/menloresearch/jan) and join our [Discord community](https://discord.com/invite/FTk2MvZwJH) for the latest updates on these exciting releases!
-  </Callout>
-
-  <div className="mt-12">
-    <h2 className="text-2xl font-semibold mb-6">What to Expect</h2>
-    <div className="text-left max-w-2xl mx-auto space-y-4">
-      <div className="flex items-start gap-3">
-        <span className="text-green-500 text-xl">✓</span>
-        <div>
-          <strong>Seamless Experience:</strong> Unified interface across all platforms
-        </div>
-      </div>
-      <div className="flex items-start gap-3">
-        <span className="text-green-500 text-xl">✓</span>
-        <div>
-          <strong>Privacy First:</strong> Same privacy-focused approach you trust
-        </div>
-      </div>
-      <div className="flex items-start gap-3">
-        <span className="text-green-500 text-xl">✓</span>
-        <div>
-          <strong>Developer Friendly:</strong> Robust APIs and comprehensive documentation
-        </div>
-      </div>
-    </div>
-  </div>
-</div>
--- a/docs/src/pages/post/_assets/gpt-oss
+++ b/docs/src/pages/post/_assets/gpt-oss
--- a/docs/src/pages/post/_assets/jan
+++ b/docs/src/pages/post/_assets/jan
--- a/docs/src/pages/post/_assets/jan
+++ b/docs/src/pages/post/_assets/jan
--- a/docs/src/pages/post/_assets/run
+++ b/docs/src/pages/post/_assets/run
--- a/docs/src/pages/post/run-gpt-oss-locally.mdx
+++ b/docs/src/pages/post/run-gpt-oss-locally.mdx
@ -0,0 +1,211 @@
+---
+title: "Run OpenAI's gpt-oss locally in 5 mins (Beginner Guide)"
+description: "Complete 5-minute beginner guide to running OpenAI's gpt-oss locally. Step-by-step setup with Jan AI for private, offline AI conversations."
+tags: OpenAI, gpt-oss, local AI, Jan, privacy, Apache-2.0, llama.cpp, Ollama, LM Studio
+categories: guides
+date: 2025-08-06
+ogImage: assets/gpt-oss%20locally.jpeg   
+twitter:
+  card: summary_large_image
+  site: "@jandotai"
+  title: "Run OpenAI's gpt-oss Locally in 5 Minutes (Beginner Guide)"
+  description: "Complete 5-minute beginner guide to running OpenAI's gpt-oss locally with Jan AI for private, offline conversations."
+  image: assets/gpt-oss%20locally.jpeg
+---
+import { Callout } from 'nextra/components'
+import CTABlog from '@/components/Blog/CTA'
+
+# Run OpenAI's gpt-oss Locally in 5 mins
+
+![gpt-oss running locally in Jan interface](./_assets/gpt-oss%20locally.jpeg)
+
+OpenAI launched [gpt-oss](https://openai.com/index/introducing-gpt-oss/), marking their return to open-source AI after GPT-2. This model is designed to run locally on consumer hardware. This guide shows you how to install and run gpt-oss on your computer for private, offline AI conversations.
+
+## What is gpt-oss?
+
+gpt-oss is OpenAI's open-source large language model, released under the Apache-2.0 license. Unlike ChatGPT, gpt-oss:
+
+- Runs completely offline - No internet required after setup
+- 100% private - Your conversations never leave your device  
+- Unlimited usage - No token limits or rate limiting
+- Free forever - No subscription fees
+- Commercial use allowed - Apache-2.0 license permits business use
+
+Running AI models locally means everything happens on your own hardware, giving you complete control over your data and conversations.
+
+## gpt-oss System Requirements
+
+| Component | Minimum | Recommended |
+|-----------|---------|-------------|
+| **RAM** | 16 GB | 32 GB+ |
+| **Storage** | 11+ GB free | 25 GB+ free |
+| **CPU** | 4 cores | 8+ cores |
+| **GPU** | Optional | Modern GPU with 6GB+ VRAM recommended |
+| **OS** | Windows 10+, macOS 11+, Linux | Latest versions |
+
+**Installation apps available:**
+- **Jan** (Recommended - easiest setup) 
+- **llama.cpp** (Command line)
+- **Ollama** (Docker-based)
+- **LM Studio** (GUI alternative)
+
+## How to install gpt-oss locally with Jan (5 mins)
+
+### Step 1: Download Jan
+
+First download Jan to run gpt-oss locally: [Download Jan AI](https://jan.ai/)
+
+<Callout type="info">
+Jan is the simplest way to run AI models locally. It automatically handles CPU/GPU optimization, provides a clean chat interface, and requires zero technical knowledge.
+</Callout>
+
+### Step 2: Install gpt-oss Model (2-3 minutes)
+
+![Jan Hub showing gpt-oss model in the hub](./_assets/jan%20hub%20gpt-oss%20locally.jpeg)
+
+1. Open Jan Hub -> search "gpt-oss" (it appears at the top)
+2. Click Download and wait for completion (~11GB download)
+3. Installation is automatic - Jan handles everything
+
+### Step 3: Start using gpt-oss offline (30 seconds)
+
+![Jan interface with gpt-oss model selected and ready to chat](./_assets/jan%20gpt-oss.jpeg)
+
+1. Go to New Chat → select gpt-oss-20b from model picker
+2. Start chatting - Jan automatically optimizes for your hardware
+3. You're done! Your AI conversations now stay completely private
+
+Success: Your gpt-oss setup is complete. No internet required for chatting, unlimited usage, zero subscription fees.
+
+## Jan with gpt-oss vs ChatGPT vs other Local AI Models
+
+| Feature | gpt-oss (Local) | ChatGPT Plus | Claude Pro | Other Local Models |
+|---------|----------------|--------------|------------|-------------------|
+| Cost | Free forever | $20/month | $20/month | Free |
+| Privacy | 100% private | Data sent to OpenAI | Data sent to Anthropic | 100% private |
+| Internet | Offline after setup | Requires internet | Requires internet | Offline |
+| Usage limits | Unlimited | Rate limited | Rate limited | Unlimited |
+| Performance | Good (hardware dependent) | Excellent | Excellent | Varies |
+| Setup difficulty | Easy with Jan | None | None | Varies |
+
+## Alternative Installation Methods
+
+### Option 1: Jan (Recommended)
+
+- Best for: Complete beginners, users wanting GUI interface
+- Setup time: 5 minutes
+- Difficulty: Very Easy
+
+Already covered above - [Download Jan](https://jan.ai/)
+
+### Option 2: llama.cpp (Command Line)
+
+- Best for: Developers, terminal users, custom integrations  
+- Setup time: 10-15 minutes  
+- Difficulty: Intermediate
+
+```bash
+# macOS
+brew install llama-cpp
+
+# Windows: grab Windows exe from releases
+curl -L -o gpt-oss-20b.gguf https://huggingface.co/openai/gpt-oss-20b-gguf/resolve/main/gpt-oss-20b.gguf
+./main -m gpt-oss-20b.gguf --chat-simple
+
+# Add GPU acceleration (adjust -ngl value based on your GPU VRAM)
+./main -m gpt-oss-20b.gguf --chat-simple -ngl 20
+```
+
+### Option 3: Ollama (Docker-Based)
+
+Best for: Docker users, server deployments  
+Setup time: 5-10 minutes  
+Difficulty: Intermediate
+
+```bash
+# Install from https://ollama.com
+ollama run gpt-oss:20b
+```
+
+### Option 4: LM Studio (GUI Alternative)
+
+Best for: Users wanting GUI but not Jan  
+Setup time: 10 minutes  
+Difficulty: Easy
+
+1. Download LM Studio from official website
+2. Go to Models → search "gpt-oss-20b (GGUF)"
+3. Download the model (wait for completion)
+4. Go to Chat tab → select the model and start chatting
+
+## gpt-oss Performance & Troubleshooting
+
+### Expected Performance Benchmarks
+
+| Hardware Setup | First Response | Subsequent Responses | Tokens/Second |
+|---------------|---------------|---------------------|---------------|
+| **16GB RAM + CPU only** | 30-45 seconds | 3-6 seconds | 3-8 tokens/sec |
+| **32GB RAM + RTX 3060** | 15-25 seconds | 1-3 seconds | 15-25 tokens/sec |
+| **32GB RAM + RTX 4080+** | 8-15 seconds | 1-2 seconds | 25-45 tokens/sec |
+
+### Common Issues & Solutions
+
+Performance optimization tips:
+- First response is slow: Normal - kernels compile once, then speed up dramatically
+- Out of VRAM error: Reduce context length in settings or switch to CPU mode
+- Out of memory: Close memory-heavy apps (Chrome, games, video editors)
+- Slow responses: Check if other apps are using GPU/CPU heavily
+
+Quick fixes:
+1. Restart Jan if responses become slow
+2. Lower context window from 4096 to 2048 tokens
+3. Enable CPU mode if GPU issues persist
+4. Free up RAM by closing unused applications
+
+## Frequently Asked Questions (FAQ)
+
+### Is gpt-oss completely free?
+Yes! gpt-oss is 100% free under Apache-2.0 license. No subscription fees, no token limits, no hidden costs.
+
+### How much internet data does gpt-oss use?
+Only for the initial 11GB download. After installation, gpt-oss works completely offline with zero internet usage.
+
+### Can I use gpt-oss for commercial projects?
+Absolutely! The Apache-2.0 license permits commercial use, modification, and distribution.
+
+### Is gpt-oss better than ChatGPT?
+gpt-oss offers different advantages: complete privacy, unlimited usage, offline capability, and no costs. ChatGPT may have better performance but requires internet and subscriptions.
+
+### What happens to my conversations with gpt-oss?
+Your conversations stay 100% on your device. Nothing is sent to OpenAI, Jan, or any external servers.
+
+### Can I run gpt-oss on a Mac with 8GB RAM?
+No, gpt-oss requires minimum 16GB RAM. Consider upgrading your RAM or using cloud-based alternatives.
+
+### How do I update gpt-oss to newer versions?
+Jan automatically notifies you of updates. Simply click update in Jan Hub when new versions are available.
+
+## Why Choose gpt-oss Over ChatGPT Plus?
+
+gpt-oss advantages:
+- $0/month vs $20/month for ChatGPT Plus
+- 100% private - no data leaves your device  
+- Unlimited usage - no rate limits or restrictions
+- Works offline - no internet required after setup
+- Commercial use allowed - build businesses with it
+
+When to choose ChatGPT Plus instead:
+- You need the absolute best performance
+- You don't want to manage local installation
+- You have less than 16GB RAM
+
+## Get started with gpt-oss today
+
+![gpt-oss running locally with complete privacy](./_assets/run%20gpt-oss%20locally%20in%20jan.jpeg)
+
+Ready to try gpt-oss?
+- Download Jan: [https://jan.ai/](https://jan.ai/)
+- View source code: [https://github.com/menloresearch/jan](https://github.com/menloresearch/jan)
+- Need help? Check our [local AI guide](/post/run-ai-models-locally) for beginners
+
+<CTABlog />
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@ -6,9 +6,11 @@
    "controllerType": "dropdown",
    "controllerProps": {
      "value": "none",
-      "options": []
+      "options": [],
+      "recommended": ""
    }
  },
+
  {
    "key": "auto_update_engine",
    "title": "Auto update engine",
@ -23,18 +25,6 @@
    "controllerType": "checkbox",
    "controllerProps": { "value": true }
  },
-  {
-    "key": "chat_template",
-    "title": "Custom Jinja Chat template",
-    "description": "Custom Jinja chat_template to be used for the model",
-    "controllerType": "input",
-    "controllerProps": {
-      "value": "",
-      "placeholder": "e.g., {% for message in messages %}...{% endfor %} (default is read from GGUF)",
-      "type": "text",
-      "textAlign": "right"
-    }
-  },
  {
    "key": "threads",
    "title": "Threads",
--- a/extensions/llamacpp-extension/src/backend.ts
+++ b/extensions/llamacpp-extension/src/backend.ts
@ -27,8 +27,18 @@ export async function listSupportedBackends(): Promise<
    if (features.avx) supportedBackends.push('win-avx-x64')
    if (features.avx2) supportedBackends.push('win-avx2-x64')
    if (features.avx512) supportedBackends.push('win-avx512-x64')
-    if (features.cuda11) supportedBackends.push('win-avx2-cuda-cu11.7-x64')
-    if (features.cuda12) supportedBackends.push('win-avx2-cuda-cu12.0-x64')
+    if (features.cuda11) {
+      if (features.avx512) supportedBackends.push('win-avx512-cuda-cu11.7-x64')
+      else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu11.7-x64')
+      else if (features.avx) supportedBackends.push('win-avx-cuda-cu11.7-x64')
+      else supportedBackends.push('win-noavx-cuda-cu11.7-x64')
+    }
+    if (features.cuda12) {
+      if (features.avx512) supportedBackends.push('win-avx512-cuda-cu12.0-x64')
+      else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu12.0-x64')
+      else if (features.avx) supportedBackends.push('win-avx-cuda-cu12.0-x64')
+      else supportedBackends.push('win-noavx-cuda-cu12.0-x64')
+    }
    if (features.vulkan) supportedBackends.push('win-vulkan-x64')
  }
  // not available yet, placeholder for future
@ -39,8 +49,22 @@ export async function listSupportedBackends(): Promise<
    if (features.avx) supportedBackends.push('linux-avx-x64')
    if (features.avx2) supportedBackends.push('linux-avx2-x64')
    if (features.avx512) supportedBackends.push('linux-avx512-x64')
-    if (features.cuda11) supportedBackends.push('linux-avx2-cuda-cu11.7-x64')
-    if (features.cuda12) supportedBackends.push('linux-avx2-cuda-cu12.0-x64')
+    if (features.cuda11) {
+      if (features.avx512)
+        supportedBackends.push('linux-avx512-cuda-cu11.7-x64')
+      else if (features.avx2)
+        supportedBackends.push('linux-avx2-cuda-cu11.7-x64')
+      else if (features.avx) supportedBackends.push('linux-avx-cuda-cu11.7-x64')
+      else supportedBackends.push('linux-noavx-cuda-cu11.7-x64')
+    }
+    if (features.cuda12) {
+      if (features.avx512)
+        supportedBackends.push('linux-avx512-cuda-cu12.0-x64')
+      else if (features.avx2)
+        supportedBackends.push('linux-avx2-cuda-cu12.0-x64')
+      else if (features.avx) supportedBackends.push('linux-avx-cuda-cu12.0-x64')
+      else supportedBackends.push('linux-noavx-cuda-cu12.0-x64')
+    }
    if (features.vulkan) supportedBackends.push('linux-vulkan-x64')
  }
  // not available yet, placeholder for future
@ -236,10 +260,16 @@ async function _getSupportedFeatures() {
      if (compareVersions(driverVersion, minCuda12DriverVersion) >= 0)
        features.cuda12 = true
    }
-
-    if (gpuInfo.vulkan_info?.api_version) features.vulkan = true
+    // Vulkan support check - only discrete GPUs with 6GB+ VRAM
+    if (
+      gpuInfo.vulkan_info?.api_version &&
+      gpuInfo.vulkan_info?.device_type === 'DISCRETE_GPU' &&
+      gpuInfo.total_memory >= 6 * 1024
+    ) {
+      // 6GB (total_memory is in MB)
+      features.vulkan = true
+    }
  }
-
  return features
 }

--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -39,6 +39,7 @@ type LlamacppConfig = {
  auto_unload: boolean
  chat_template: string
  n_gpu_layers: number
+  override_tensor_buffer_t: string
  ctx_size: number
  threads: number
  threads_batch: number
@ -144,7 +145,6 @@ export default class llamacpp_extension extends AIEngine {
  readonly providerId: string = 'llamacpp'

  private config: LlamacppConfig
-  private activeSessions: Map<number, SessionInfo> = new Map()
  private providerPath!: string
  private apiSecret: string = 'JustAskNow'
  private pendingDownloads: Map<string, Promise<void>> = new Map()
@ -297,6 +297,12 @@ export default class llamacpp_extension extends AIEngine {
          return { value: key, name: key }
        })

+        // Set the recommended backend based on bestAvailableBackendString
+        if (bestAvailableBackendString) {
+          backendSetting.controllerProps.recommended =
+            bestAvailableBackendString
+        }
+
        const savedBackendSetting = await this.getSetting<string>(
          'version_backend',
          originalDefaultBackendValue
@ -357,9 +363,16 @@ export default class llamacpp_extension extends AIEngine {

      // Handle fresh installation case where version_backend might be 'none' or invalid
      if (
-        !effectiveBackendString ||
-        effectiveBackendString === 'none' ||
-        !effectiveBackendString.includes('/')
+        (!effectiveBackendString ||
+          effectiveBackendString === 'none' ||
+          !effectiveBackendString.includes('/') ||
+          // If the selected backend is not in the list of supported backends
+          // Need to reset too
+          !version_backends.some(
+            (e) => `${e.version}/${e.backend}` === effectiveBackendString
+          )) &&
+        // Ensure we have a valid best available backend
+        bestAvailableBackendString
      ) {
        effectiveBackendString = bestAvailableBackendString
        logger.info(
@ -380,6 +393,17 @@ export default class llamacpp_extension extends AIEngine {
          })
        )
        logger.info(`Updated UI settings to show: ${effectiveBackendString}`)
+
+        // Emit for updating fe
+        if (events && typeof events.emit === 'function') {
+          logger.info(
+            `Emitting settingsChanged event for version_backend with value: ${effectiveBackendString}`
+          )
+          events.emit('settingsChanged', {
+            key: 'version_backend',
+            value: effectiveBackendString,
+          })
+        }
      }

      // Download and install the backend if not already present
@ -746,16 +770,6 @@ export default class llamacpp_extension extends AIEngine {

  override async onUnload(): Promise<void> {
    // Terminate all active sessions
-    for (const [_, sInfo] of this.activeSessions) {
-      try {
-        await this.unload(sInfo.model_id)
-      } catch (error) {
-        logger.error(`Failed to unload model ${sInfo.model_id}:`, error)
-      }
-    }
-
-    // Clear the sessions map
-    this.activeSessions.clear()
  }

  onSettingUpdate<T>(key: string, value: T): void {
@ -1079,67 +1093,13 @@ export default class llamacpp_extension extends AIEngine {
   * Function to find a random port
   */
  private async getRandomPort(): Promise<number> {
-    const MAX_ATTEMPTS = 20000
-    let attempts = 0
-
-    while (attempts < MAX_ATTEMPTS) {
-      const port = Math.floor(Math.random() * 1000) + 3000
-
-      const isAlreadyUsed = Array.from(this.activeSessions.values()).some(
-        (info) => info.port === port
-      )
-
-      if (!isAlreadyUsed) {
-        const isAvailable = await invoke<boolean>('is_port_available', { port })
-        if (isAvailable) return port
-      }
-
-      attempts++
+    try {
+      const port = await invoke<number>('get_random_port')
+      return port
+    } catch {
+      logger.error('Unable to find a suitable port')
+      throw new Error('Unable to find a suitable port for model')
    }
-
-    throw new Error('Failed to find an available port for the model to load')
-  }
-
-  private async sleep(ms: number): Promise<void> {
-    return new Promise((resolve) => setTimeout(resolve, ms))
-  }
-
-  private async waitForModelLoad(
-    sInfo: SessionInfo,
-    timeoutMs = 240_000
-  ): Promise<void> {
-    await this.sleep(500) // Wait before first check
-    const start = Date.now()
-    while (Date.now() - start < timeoutMs) {
-      try {
-        const res = await fetch(`http://localhost:${sInfo.port}/health`)
-
-        if (res.status === 503) {
-          const body = await res.json()
-          const msg = body?.error?.message ?? 'Model loading'
-          logger.info(`waiting for model load... (${msg})`)
-        } else if (res.ok) {
-          const body = await res.json()
-          if (body.status === 'ok') {
-            return
-          } else {
-            logger.warn('Unexpected OK response from /health:', body)
-          }
-        } else {
-          logger.warn(`Unexpected status ${res.status} from /health`)
-        }
-      } catch (e) {
-        await this.unload(sInfo.model_id)
-        throw new Error(`Model appears to have crashed: ${e}`)
-      }
-
-      await this.sleep(800) // Retry interval
-    }
-
-    await this.unload(sInfo.model_id)
-    throw new Error(
-      `Timed out loading model after ${timeoutMs}... killing llamacpp`
-    )
  }

  override async load(
@ -1147,7 +1107,7 @@ export default class llamacpp_extension extends AIEngine {
    overrideSettings?: Partial<LlamacppConfig>,
    isEmbedding: boolean = false
  ): Promise<SessionInfo> {
-    const sInfo = this.findSessionByModel(modelId)
+    const sInfo = await this.findSessionByModel(modelId)
    if (sInfo) {
      throw new Error('Model already loaded!!')
    }
@ -1238,6 +1198,14 @@ export default class llamacpp_extension extends AIEngine {
    args.push('--jinja')
    args.push('--reasoning-format', 'none')
    args.push('-m', modelPath)
+    // For overriding tensor buffer type, useful where
+    // massive MOE models can be made faster by keeping attention on the GPU
+    // and offloading the expert FFNs to the CPU.
+    // This is an expert level settings and should only be used by people
+    // who knows what they are doing.
+    // Takes a regex with matching tensor name as input
+    if (cfg.override_tensor_buffer_t)
+      args.push('--override-tensor', cfg.override_tensor_buffer_t)
    args.push('-a', modelId)
    args.push('--port', String(port))
    if (modelConfig.mmproj_path) {
@ -1247,11 +1215,6 @@ export default class llamacpp_extension extends AIEngine {
      ])
      args.push('--mmproj', mmprojPath)
    }
-
-    if (cfg.ctx_size !== undefined) {
-      args.push('-c', String(cfg.ctx_size))
-    }
-
    // Add remaining options from the interface
    if (cfg.chat_template) args.push('--chat-template', cfg.chat_template)
    const gpu_layers =
@ -1263,8 +1226,9 @@ export default class llamacpp_extension extends AIEngine {
    if (cfg.batch_size > 0) args.push('--batch-size', String(cfg.batch_size))
    if (cfg.ubatch_size > 0) args.push('--ubatch-size', String(cfg.ubatch_size))
    if (cfg.device.length > 0) args.push('--device', cfg.device)
-    if (cfg.split_mode.length > 0) args.push('--split-mode', cfg.split_mode)
-    if (cfg.main_gpu !== undefined)
+    if (cfg.split_mode.length > 0 && cfg.split_mode != 'layer')
+      args.push('--split-mode', cfg.split_mode)
+    if (cfg.main_gpu !== undefined && cfg.main_gpu != 0)
      args.push('--main-gpu', String(cfg.main_gpu))

    // Boolean flags
@ -1280,19 +1244,26 @@ export default class llamacpp_extension extends AIEngine {
    } else {
      if (cfg.ctx_size > 0) args.push('--ctx-size', String(cfg.ctx_size))
      if (cfg.n_predict > 0) args.push('--n-predict', String(cfg.n_predict))
-      args.push('--cache-type-k', cfg.cache_type_k)
+      if (cfg.cache_type_k && cfg.cache_type_k != 'f16')
+        args.push('--cache-type-k', cfg.cache_type_k)
      if (
-        (cfg.flash_attn && cfg.cache_type_v != 'f16') ||
+        cfg.flash_attn &&
+        cfg.cache_type_v != 'f16' &&
        cfg.cache_type_v != 'f32'
      ) {
        args.push('--cache-type-v', cfg.cache_type_v)
      }
-      args.push('--defrag-thold', String(cfg.defrag_thold))
+      if (cfg.defrag_thold && cfg.defrag_thold != 0.1)
+        args.push('--defrag-thold', String(cfg.defrag_thold))

-      args.push('--rope-scaling', cfg.rope_scaling)
-      args.push('--rope-scale', String(cfg.rope_scale))
-      args.push('--rope-freq-base', String(cfg.rope_freq_base))
-      args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
+      if (cfg.rope_scaling && cfg.rope_scaling != 'none')
+        args.push('--rope-scaling', cfg.rope_scaling)
+      if (cfg.rope_scale && cfg.rope_scale != 1)
+        args.push('--rope-scale', String(cfg.rope_scale))
+      if (cfg.rope_freq_base && cfg.rope_freq_base != 0)
+        args.push('--rope-freq-base', String(cfg.rope_freq_base))
+      if (cfg.rope_freq_scale && cfg.rope_freq_scale != 1)
+        args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
    }

    logger.info('Calling Tauri command llama_load with args:', args)
@ -1306,26 +1277,20 @@ export default class llamacpp_extension extends AIEngine {
        libraryPath,
        args,
      })
-
-      // Store the session info for later use
-      this.activeSessions.set(sInfo.pid, sInfo)
-      await this.waitForModelLoad(sInfo)
-
      return sInfo
    } catch (error) {
-      logger.error('Error loading llama-server:\n', error)
-      throw new Error(`Failed to load llama-server: ${error}`)
+      logger.error('Error in load command:\n', error)
+      throw new Error(`Failed to load model:\n${error}`)
    }
  }

  override async unload(modelId: string): Promise<UnloadResult> {
-    const sInfo: SessionInfo = this.findSessionByModel(modelId)
+    const sInfo: SessionInfo = await this.findSessionByModel(modelId)
    if (!sInfo) {
      throw new Error(`No active session found for model: ${modelId}`)
    }
    const pid = sInfo.pid
    try {
-      this.activeSessions.delete(pid)

      // Pass the PID as the session_id
      const result = await invoke<UnloadResult>('unload_llama_model', {
@ -1337,13 +1302,11 @@ export default class llamacpp_extension extends AIEngine {
        logger.info(`Successfully unloaded model with PID ${pid}`)
      } else {
        logger.warn(`Failed to unload model: ${result.error}`)
-        this.activeSessions.set(sInfo.pid, sInfo)
      }

      return result
    } catch (error) {
      logger.error('Error in unload command:', error)
-      this.activeSessions.set(sInfo.pid, sInfo)
      return {
        success: false,
        error: `Failed to unload model: ${error}`,
@ -1466,17 +1429,21 @@ export default class llamacpp_extension extends AIEngine {
    }
  }

-  private findSessionByModel(modelId: string): SessionInfo | undefined {
-    return Array.from(this.activeSessions.values()).find(
-      (session) => session.model_id === modelId
-    )
+  private async findSessionByModel(modelId: string): Promise<SessionInfo> {
+      try {
+          let sInfo = await invoke<SessionInfo>('find_session_by_model', {modelId})
+          return sInfo
+      } catch (e) {
+          logger.error(e)
+          throw new Error(String(e))
+      }
  }

  override async chat(
    opts: chatCompletionRequest,
    abortController?: AbortController
  ): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>> {
-    const sessionInfo = this.findSessionByModel(opts.model)
+    const sessionInfo = await this.findSessionByModel(opts.model)
    if (!sessionInfo) {
      throw new Error(`No active session found for model: ${opts.model}`)
    }
@ -1492,7 +1459,6 @@ export default class llamacpp_extension extends AIEngine {
        throw new Error('Model appears to have crashed! Please reload!')
      }
    } else {
-      this.activeSessions.delete(sessionInfo.pid)
      throw new Error('Model have crashed! Please reload!')
    }
    const baseUrl = `http://localhost:${sessionInfo.port}/v1`
@ -1541,11 +1507,13 @@ export default class llamacpp_extension extends AIEngine {
  }

  override async getLoadedModels(): Promise<string[]> {
-    let lmodels: string[] = []
-    for (const [_, sInfo] of this.activeSessions) {
-      lmodels.push(sInfo.model_id)
-    }
-    return lmodels
+      try {
+          let models: string[] = await invoke<string[]>('get_loaded_models')
+          return models
+      } catch (e) {
+          logger.error(e)
+          throw new Error(e)
+      }
  }

  async getDevices(): Promise<DeviceList[]> {
@ -1575,7 +1543,7 @@ export default class llamacpp_extension extends AIEngine {
  }

  async embed(text: string[]): Promise<EmbeddingResponse> {
-    let sInfo = this.findSessionByModel('sentence-transformer-mini')
+    let sInfo = await this.findSessionByModel('sentence-transformer-mini')
    if (!sInfo) {
      const downloadedModelList = await this.list()
      if (
--- a/extensions/llamacpp-extension/src/test/backend.test.ts
+++ b/extensions/llamacpp-extension/src/test/backend.test.ts
@ -51,6 +51,164 @@ describe('Backend functions', () => {
      ])
    })

+    it('should return CUDA backends with proper CPU instruction detection for Windows', async () => {
+      // Mock system info with CUDA support and AVX512
+      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
+        os_type: 'windows',
+        cpu: {
+          arch: 'x86_64',
+          extensions: ['avx', 'avx2', 'avx512'],
+        },
+        gpus: [
+          {
+            driver_version: '530.41',
+            nvidia_info: { compute_capability: '8.6' },
+          },
+        ],
+      })
+
+      // Mock GitHub releases with CUDA backends
+      const mockReleases = [
+        {
+          tag_name: 'v1.0.0',
+          assets: [
+            { name: 'llama-v1.0.0-bin-win-avx512-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-avx2-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-avx-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-noavx-cuda-cu12.0-x64.tar.gz' },
+          ],
+        },
+      ]
+
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: () => Promise.resolve(mockReleases),
+      })
+
+      const result = await listSupportedBackends()
+
+      expect(result).toContain({ version: 'v1.0.0', backend: 'win-avx512-cuda-cu12.0-x64' })
+    })
+
+    it('should select appropriate CUDA backend based on CPU features - AVX2 only', async () => {
+      // Mock system info with CUDA support but only AVX2
+      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
+        os_type: 'windows',
+        cpu: {
+          arch: 'x86_64',
+          extensions: ['avx', 'avx2'], // No AVX512
+        },
+        gpus: [
+          {
+            driver_version: '530.41',
+            nvidia_info: { compute_capability: '8.6' },
+          },
+        ],
+      })
+
+      const mockReleases = [
+        {
+          tag_name: 'v1.0.0',
+          assets: [
+            { name: 'llama-v1.0.0-bin-win-avx512-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-avx2-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-avx-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-noavx-cuda-cu12.0-x64.tar.gz' },
+          ],
+        },
+      ]
+
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: () => Promise.resolve(mockReleases),
+      })
+
+      const result = await listSupportedBackends()
+
+      expect(result).toContain({ version: 'v1.0.0', backend: 'win-avx2-cuda-cu12.0-x64' })
+      expect(result).not.toContain({ version: 'v1.0.0', backend: 'win-avx512-cuda-cu12.0-x64' })
+    })
+
+    it('should select appropriate CUDA backend based on CPU features - no AVX', async () => {
+      // Mock system info with CUDA support but no AVX
+      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
+        os_type: 'windows',
+        cpu: {
+          arch: 'x86_64',
+          extensions: [], // No AVX extensions
+        },
+        gpus: [
+          {
+            driver_version: '530.41',
+            nvidia_info: { compute_capability: '8.6' },
+          },
+        ],
+      })
+
+      const mockReleases = [
+        {
+          tag_name: 'v1.0.0',
+          assets: [
+            { name: 'llama-v1.0.0-bin-win-avx512-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-avx2-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-avx-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-noavx-cuda-cu12.0-x64.tar.gz' },
+          ],
+        },
+      ]
+
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: () => Promise.resolve(mockReleases),
+      })
+
+      const result = await listSupportedBackends()
+
+      expect(result).toContain({ version: 'v1.0.0', backend: 'win-noavx-cuda-cu12.0-x64' })
+      expect(result).not.toContain({ version: 'v1.0.0', backend: 'win-avx2-cuda-cu12.0-x64' })
+      expect(result).not.toContain({ version: 'v1.0.0', backend: 'win-avx512-cuda-cu12.0-x64' })
+    })
+
+    it('should return CUDA backends with proper CPU instruction detection for Linux', async () => {
+      // Mock system info with CUDA support and AVX support
+      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
+        os_type: 'linux',
+        cpu: {
+          arch: 'x86_64',
+          extensions: ['avx'], // Only AVX, no AVX2
+        },
+        gpus: [
+          {
+            driver_version: '530.60.13',
+            nvidia_info: { compute_capability: '8.6' },
+          },
+        ],
+      })
+
+      const mockReleases = [
+        {
+          tag_name: 'v1.0.0',
+          assets: [
+            { name: 'llama-v1.0.0-bin-linux-avx512-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-linux-avx2-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-linux-avx-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-linux-noavx-cuda-cu12.0-x64.tar.gz' },
+          ],
+        },
+      ]
+
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: () => Promise.resolve(mockReleases),
+      })
+
+      const result = await listSupportedBackends()
+
+      expect(result).toContain({ version: 'v1.0.0', backend: 'linux-avx-cuda-cu12.0-x64' })
+      expect(result).not.toContain({ version: 'v1.0.0', backend: 'linux-avx2-cuda-cu12.0-x64' })
+      expect(result).not.toContain({ version: 'v1.0.0', backend: 'linux-avx512-cuda-cu12.0-x64' })
+    })
+
    it('should return supported backends for macOS arm64', async () => {
      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
        os_type: 'macos',
--- a/package.json
+++ b/package.json
@ -17,7 +17,7 @@
    "test:coverage": "vitest run --coverage",
    "test:prepare": "yarn build:icon && yarn copy:assets:tauri && yarn build --no-bundle ",
    "dev:web": "yarn workspace @janhq/web-app dev",
-    "dev:tauri": "yarn build:icon && yarn copy:assets:tauri && tauri dev",
+    "dev:tauri": "yarn build:icon && yarn copy:assets:tauri && cross-env IS_CLEAN=true tauri dev",
    "copy:assets:tauri": "cpx \"pre-install/*.tgz\" \"src-tauri/resources/pre-install/\"",
    "download:lib": "node ./scripts/download-lib.mjs",
    "download:bin": "node ./scripts/download-bin.mjs",
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@ -63,8 +63,12 @@ nix = "=0.30.1"

 [target.'cfg(windows)'.dependencies]
 libc = "0.2.172"
+windows-sys = { version = "0.60.2", features = ["Win32_Storage_FileSystem"] }

 [target.'cfg(not(any(target_os = "android", target_os = "ios")))'.dependencies]
 tauri-plugin-updater = "2"
 once_cell = "1.18"
 tauri-plugin-single-instance = { version = "2.0.0", features = ["deep-link"] }
+
+[target.'cfg(windows)'.dev-dependencies]
+tempfile = "3.20.0"
--- a/src-tauri/src/core/mcp.rs
+++ b/src-tauri/src/core/mcp.rs
@ -10,7 +10,11 @@ use tokio::{
    time::{sleep, timeout},
 };

-use super::{cmd::get_jan_data_folder_path, state::AppState};
+use super::{
+    cmd::get_jan_data_folder_path, 
+    state::AppState,
+    utils::can_override_npx,
+};

 const DEFAULT_MCP_CONFIG: &str = r#"{
  "mcpServers": {
@ -512,8 +516,8 @@ async fn schedule_mcp_start_task<R: Runtime>(
        .ok_or_else(|| format!("Failed to extract command args from config for {name}"))?;

    let mut cmd = Command::new(command.clone());
-    
-    if command == "npx" {
+
+    if command == "npx" && can_override_npx() {
        let mut cache_dir = app_path.clone();
        cache_dir.push(".npx");
        let bun_x_path = format!("{}/bun", bin_path.display());
--- a/src-tauri/src/core/setup.rs
+++ b/src-tauri/src/core/setup.rs
@ -43,8 +43,8 @@ pub fn install_extensions(app: tauri::AppHandle, force: bool) -> Result<(), Stri

    let mut clean_up = force;

-    // Check CLEAN environment variable to optionally skip extension install
-    if std::env::var("CLEAN").is_ok() {
+    // Check IS_CLEAN environment variable to optionally skip extension install
+    if std::env::var("IS_CLEAN").is_ok() {
        clean_up = true;
    }
    log::info!(
--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@ -1,7 +1,9 @@
 use base64::{engine::general_purpose, Engine as _};
 use hmac::{Hmac, Mac};
+use rand::{rngs::StdRng, Rng, SeedableRng};
 use serde::{Deserialize, Serialize};
 use sha2::Sha256;
+use std::collections::HashSet;
 use std::path::PathBuf;
 use std::process::Stdio;
 use std::time::Duration;
@ -67,13 +69,39 @@ pub struct DeviceInfo {
    pub free: i32,
 }

+#[cfg(windows)]
+use std::os::windows::ffi::OsStrExt;
+
+#[cfg(windows)]
+use std::ffi::OsStr;
+
+#[cfg(windows)]
+use windows_sys::Win32::Storage::FileSystem::GetShortPathNameW;
+
+#[cfg(windows)]
+pub fn get_short_path<P: AsRef<std::path::Path>>(path: P) -> Option<String> {
+    let wide: Vec<u16> = OsStr::new(path.as_ref())
+        .encode_wide()
+        .chain(Some(0))
+        .collect();
+
+    let mut buffer = vec![0u16; 260];
+    let len = unsafe { GetShortPathNameW(wide.as_ptr(), buffer.as_mut_ptr(), buffer.len() as u32) };
+
+    if len > 0 {
+        Some(String::from_utf16_lossy(&buffer[..len as usize]))
+    } else {
+        None
+    }
+}
+
 // --- Load Command ---
 #[tauri::command]
 pub async fn load_llama_model(
    state: State<'_, AppState>,
    backend_path: &str,
    library_path: Option<&str>,
-    args: Vec<String>,
+    mut args: Vec<String>,
 ) -> ServerResult<SessionInfo> {
    let mut process_map = state.llama_server_process.lock().await;

@ -105,13 +133,38 @@ pub async fn load_llama_model(
            8080
        }
    };
-
-    let model_path = args
+    // FOR MODEL PATH; TODO: DO SIMILARLY FOR MMPROJ PATH
+    let model_path_index = args
        .iter()
        .position(|arg| arg == "-m")
-        .and_then(|i| args.get(i + 1))
-        .cloned()
-        .unwrap_or_default();
+        .ok_or(ServerError::LlamacppError("Missing `-m` flag".into()))?;
+
+    let model_path = args
+        .get(model_path_index + 1)
+        .ok_or(ServerError::LlamacppError("Missing path after `-m`".into()))?
+        .clone();
+
+    let model_path_pb = PathBuf::from(model_path);
+    if !model_path_pb.exists() {
+        return Err(ServerError::LlamacppError(format!(
+            "Invalid or inaccessible model path: {}",
+            model_path_pb.display().to_string(),
+        )));
+    }
+    #[cfg(windows)]
+    {
+        // use short path on Windows
+        if let Some(short) = get_short_path(&model_path_pb) {
+            args[model_path_index + 1] = short;
+        } else {
+            args[model_path_index + 1] = model_path_pb.display().to_string();
+        }
+    }
+    #[cfg(not(windows))]
+    {
+        args[model_path_index + 1] = model_path_pb.display().to_string();
+    }
+    // -----------------------------------------------------------------

    let api_key = args
        .iter()
@ -181,7 +234,6 @@ pub async fn load_llama_model(

    // Create channels for communication between tasks
    let (ready_tx, mut ready_rx) = mpsc::channel::<bool>(1);
-    let (error_tx, mut error_rx) = mpsc::channel::<String>(1);

    // Spawn task to monitor stdout for readiness
    let _stdout_task = tokio::spawn(async move {
@ -228,20 +280,10 @@ pub async fn load_llama_model(

                        // Check for critical error indicators that should stop the process
                        let line_lower = line.to_string().to_lowercase();
-                        if line_lower.contains("error loading model")
-                            || line_lower.contains("unknown model architecture")
-                            || line_lower.contains("fatal")
-                            || line_lower.contains("cuda error")
-                            || line_lower.contains("out of memory")
-                            || line_lower.contains("error")
-                            || line_lower.contains("failed")
-                        {
-                            let _ = error_tx.send(line.to_string()).await;
-                        }
                        // Check for readiness indicator - llama-server outputs this when ready
-                        else if line.contains("server is listening on")
-                            || line.contains("starting the main loop")
-                            || line.contains("server listening on")
+                        if line_lower.contains("server is listening on")
+                            || line_lower.contains("starting the main loop")
+                            || line_lower.contains("server listening on")
                        {
                            log::info!("Server appears to be ready based on stderr: '{}'", line);
                            let _ = ready_tx.send(true).await;
@ -279,26 +321,6 @@ pub async fn load_llama_model(
                log::info!("Server is ready to accept requests!");
                break;
            }
-            // Error occurred
-            Some(error_msg) = error_rx.recv() => {
-                log::error!("Server encountered an error: {}", error_msg);
-
-                // Give process a moment to exit naturally
-                tokio::time::sleep(Duration::from_millis(100)).await;
-
-                // Check if process already exited
-                if let Some(status) = child.try_wait()? {
-                    log::info!("Process exited with code {:?}", status);
-                    return Err(ServerError::LlamacppError(error_msg));
-                } else {
-                    log::info!("Process still running, killing it...");
-                    let _ = child.kill().await;
-                }
-
-                // Get full stderr output
-                let stderr_output = stderr_task.await.unwrap_or_default();
-                return Err(ServerError::LlamacppError(format!("Error: {}\n\nFull stderr:\n{}", error_msg, stderr_output)));
-            }
            // Check for process exit more frequently
            _ = tokio::time::sleep(Duration::from_millis(50)) => {
                // Check if process exited
@ -332,7 +354,7 @@ pub async fn load_llama_model(
        pid: pid.clone(),
        port: port,
        model_id: model_id,
-        model_path: model_path,
+        model_path: model_path_pb.display().to_string(),
        api_key: api_key,
    };

@ -704,7 +726,308 @@ pub async fn is_process_running(pid: i32, state: State<'_, AppState>) -> Result<
 }

 // check port availability
-#[tauri::command]
-pub fn is_port_available(port: u16) -> bool {
+fn is_port_available(port: u16) -> bool {
    std::net::TcpListener::bind(("127.0.0.1", port)).is_ok()
 }
+
+#[tauri::command]
+pub async fn get_random_port(state: State<'_, AppState>) -> Result<u16, String> {
+    const MAX_ATTEMPTS: u32 = 20000;
+    let mut attempts = 0;
+    let mut rng = StdRng::from_entropy();
+
+    // Get all active ports from sessions
+    let map = state.llama_server_process.lock().await;
+
+    let used_ports: HashSet<u16> = map
+        .values()
+        .filter_map(|session| {
+            // Convert valid ports to u16 (filter out placeholder ports like -1)
+            if session.info.port > 0 && session.info.port <= u16::MAX as i32 {
+                Some(session.info.port as u16)
+            } else {
+                None
+            }
+        })
+        .collect();
+
+    drop(map); // unlock early
+
+    while attempts < MAX_ATTEMPTS {
+        let port = rng.gen_range(3000..4000);
+
+        if used_ports.contains(&port) {
+            attempts += 1;
+            continue;
+        }
+
+        if is_port_available(port) {
+            return Ok(port);
+        }
+
+        attempts += 1;
+    }
+
+    Err("Failed to find an available port for the model to load".into())
+}
+
+// find session
+#[tauri::command]
+pub async fn find_session_by_model(
+    model_id: String,
+    state: State<'_, AppState>,
+) -> Result<Option<SessionInfo>, String> {
+    let map = state.llama_server_process.lock().await;
+
+    let session_info = map
+        .values()
+        .find(|backend_session| backend_session.info.model_id == model_id)
+        .map(|backend_session| backend_session.info.clone());
+
+    Ok(session_info)
+}
+
+// get running models
+#[tauri::command]
+pub async fn get_loaded_models(state: State<'_, AppState>) -> Result<Vec<String>, String> {
+    let map = state.llama_server_process.lock().await;
+
+    let model_ids = map
+        .values()
+        .map(|backend_session| backend_session.info.model_id.clone())
+        .collect();
+
+    Ok(model_ids)
+}
+
+// tests
+//
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::path::PathBuf;
+    #[cfg(windows)]
+    use tempfile;
+
+    #[test]
+    fn test_parse_multiple_devices() {
+        let output = r#"ggml_vulkan: Found 2 Vulkan devices:
+ggml_vulkan: 0 = NVIDIA GeForce RTX 3090 (NVIDIA) | uma: 0 | fp16: 1 | bf16: 0 | warp size: 32 | shared memory: 49152 | int dot: 0 | matrix cores: KHR_coopmat
+ggml_vulkan: 1 = AMD Radeon Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 0 | matrix cores: KHR_coopmat
+Available devices:
+Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)
+Vulkan1: AMD Radeon Graphics (RADV GFX1151) (87722 MiB, 87722 MiB free)
+"#;
+
+        let devices = parse_device_output(output).unwrap();
+
+        assert_eq!(devices.len(), 2);
+
+        // Check first device
+        assert_eq!(devices[0].id, "Vulkan0");
+        assert_eq!(devices[0].name, "NVIDIA GeForce RTX 3090");
+        assert_eq!(devices[0].mem, 24576);
+        assert_eq!(devices[0].free, 24576);
+
+        // Check second device
+        assert_eq!(devices[1].id, "Vulkan1");
+        assert_eq!(devices[1].name, "AMD Radeon Graphics (RADV GFX1151)");
+        assert_eq!(devices[1].mem, 87722);
+        assert_eq!(devices[1].free, 87722);
+    }
+
+    #[test]
+    fn test_parse_single_device() {
+        let output = r#"Available devices:
+CUDA0: NVIDIA GeForce RTX 4090 (24576 MiB, 24000 MiB free)"#;
+
+        let devices = parse_device_output(output).unwrap();
+
+        assert_eq!(devices.len(), 1);
+        assert_eq!(devices[0].id, "CUDA0");
+        assert_eq!(devices[0].name, "NVIDIA GeForce RTX 4090");
+        assert_eq!(devices[0].mem, 24576);
+        assert_eq!(devices[0].free, 24000);
+    }
+
+    #[test]
+    fn test_parse_with_extra_whitespace_and_empty_lines() {
+        let output = r#"
+Available devices:
+
+Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)
+
+Vulkan1: AMD Radeon Graphics (RADV GFX1151) (87722 MiB, 87722 MiB free)
+
+"#;
+
+        let devices = parse_device_output(output).unwrap();
+
+        assert_eq!(devices.len(), 2);
+        assert_eq!(devices[0].id, "Vulkan0");
+        assert_eq!(devices[1].id, "Vulkan1");
+    }
+
+    #[test]
+    fn test_parse_different_backends() {
+        let output = r#"Available devices:
+CUDA0: NVIDIA GeForce RTX 4090 (24576 MiB, 24000 MiB free)
+Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)
+SYCL0: Intel(R) Arc(TM) A750 Graphics (8000 MiB, 7721 MiB free)"#;
+
+        let devices = parse_device_output(output).unwrap();
+
+        assert_eq!(devices.len(), 3);
+
+        assert_eq!(devices[0].id, "CUDA0");
+        assert_eq!(devices[0].name, "NVIDIA GeForce RTX 4090");
+
+        assert_eq!(devices[1].id, "Vulkan0");
+        assert_eq!(devices[1].name, "NVIDIA GeForce RTX 3090");
+
+        assert_eq!(devices[2].id, "SYCL0");
+        assert_eq!(devices[2].name, "Intel(R) Arc(TM) A750 Graphics");
+        assert_eq!(devices[2].mem, 8000);
+        assert_eq!(devices[2].free, 7721);
+    }
+
+    #[test]
+    fn test_parse_complex_gpu_names() {
+        let output = r#"Available devices:
+Vulkan0: Intel(R) Arc(tm) A750 Graphics (DG2) (8128 MiB, 8128 MiB free)
+Vulkan1: AMD Radeon RX 7900 XTX (Navi 31) [RDNA 3] (24576 MiB, 24000 MiB free)"#;
+
+        let devices = parse_device_output(output).unwrap();
+
+        assert_eq!(devices.len(), 2);
+
+        assert_eq!(devices[0].id, "Vulkan0");
+        assert_eq!(devices[0].name, "Intel(R) Arc(tm) A750 Graphics (DG2)");
+        assert_eq!(devices[0].mem, 8128);
+        assert_eq!(devices[0].free, 8128);
+
+        assert_eq!(devices[1].id, "Vulkan1");
+        assert_eq!(devices[1].name, "AMD Radeon RX 7900 XTX (Navi 31) [RDNA 3]");
+        assert_eq!(devices[1].mem, 24576);
+        assert_eq!(devices[1].free, 24000);
+    }
+
+    #[test]
+    fn test_parse_no_devices() {
+        let output = r#"Available devices:"#;
+
+        let devices = parse_device_output(output).unwrap();
+        assert_eq!(devices.len(), 0);
+    }
+
+    #[test]
+    fn test_parse_missing_header() {
+        let output = r#"Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)"#;
+
+        let result = parse_device_output(output);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("Could not find 'Available devices:' section"));
+    }
+
+    #[test]
+    fn test_parse_malformed_device_line() {
+        let output = r#"Available devices:
+Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)
+Invalid line without colon
+Vulkan1: AMD Radeon Graphics (RADV GFX1151) (87722 MiB, 87722 MiB free)"#;
+
+        let devices = parse_device_output(output).unwrap();
+
+        // Should skip the malformed line and parse the valid ones
+        assert_eq!(devices.len(), 2);
+        assert_eq!(devices[0].id, "Vulkan0");
+        assert_eq!(devices[1].id, "Vulkan1");
+    }
+
+    #[test]
+    fn test_parse_device_line_individual() {
+        // Test the individual line parser
+        let line = "Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)";
+        let device = parse_device_line(line).unwrap().unwrap();
+
+        assert_eq!(device.id, "Vulkan0");
+        assert_eq!(device.name, "NVIDIA GeForce RTX 3090");
+        assert_eq!(device.mem, 24576);
+        assert_eq!(device.free, 24576);
+    }
+
+    #[test]
+    fn test_memory_pattern_detection() {
+        assert!(is_memory_pattern("24576 MiB, 24576 MiB free"));
+        assert!(is_memory_pattern("8000 MiB, 7721 MiB free"));
+        assert!(!is_memory_pattern("just some text"));
+        assert!(!is_memory_pattern("24576 MiB"));
+        assert!(!is_memory_pattern("24576, 24576"));
+    }
+
+    #[test]
+    fn test_parse_memory_value() {
+        assert_eq!(parse_memory_value("24576 MiB").unwrap(), 24576);
+        assert_eq!(parse_memory_value("7721 MiB free").unwrap(), 7721);
+        assert_eq!(parse_memory_value("8000").unwrap(), 8000);
+
+        assert!(parse_memory_value("").is_err());
+        assert!(parse_memory_value("not_a_number MiB").is_err());
+    }
+
+    #[test]
+    fn test_find_memory_pattern() {
+        let text = "NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)";
+        let result = find_memory_pattern(text);
+        assert!(result.is_some());
+        let (_start, content) = result.unwrap();
+        assert_eq!(content, "24576 MiB, 24576 MiB free");
+
+        // Test with multiple parentheses
+        let text = "Intel(R) Arc(tm) A750 Graphics (DG2) (8128 MiB, 8128 MiB free)";
+        let result = find_memory_pattern(text);
+        assert!(result.is_some());
+        let (_start, content) = result.unwrap();
+        assert_eq!(content, "8128 MiB, 8128 MiB free");
+    }
+    #[test]
+    fn test_path_with_uncommon_dir_names() {
+        const UNCOMMON_DIR_NAME: &str = "тест-你好-éàç-🚀";
+        #[cfg(windows)]
+        {
+            let dir = tempfile::tempdir().expect("Failed to create temp dir");
+            let long_path = dir.path().join(UNCOMMON_DIR_NAME);
+            std::fs::create_dir(&long_path)
+                .expect("Failed to create test directory with non-ASCII name");
+            let short_path = get_short_path(&long_path);
+            assert!(
+                short_path.is_ascii(),
+                "The resulting short path must be composed of only ASCII characters. Got: {}",
+                short_path
+            );
+            assert!(
+                PathBuf::from(&short_path).exists(),
+                "The returned short path must exist on the filesystem"
+            );
+            assert_ne!(
+                short_path,
+                long_path.to_str().unwrap(),
+                "Short path should not be the same as the long path"
+            );
+        }
+        #[cfg(not(windows))]
+        {
+            // On Unix, paths are typically UTF-8 and there's no "short path" concept.
+            let long_path_str = format!("/tmp/{}", UNCOMMON_DIR_NAME);
+            let path_buf = PathBuf::from(&long_path_str);
+            let displayed_path = path_buf.display().to_string();
+            assert_eq!(
+                displayed_path, long_path_str,
+                "Path with non-ASCII characters should be preserved exactly on non-Windows platforms"
+            );
+        }
+    }
+}
--- a/src-tauri/src/core/utils/mod.rs
+++ b/src-tauri/src/core/utils/mod.rs
@ -103,6 +103,21 @@ pub fn normalize_path(path: &Path) -> PathBuf {
    ret
 }

+pub fn can_override_npx() -> bool {
+    // we need to check the CPU for the AVX2 instruction support if we are running under the MacOS
+    // with Intel CPU. We can override `npx` command with `bun` only if CPU is
+    // supporting AVX2, otherwise we need to use default `npx` binary
+    #[cfg(all(target_os = "macos", any(target_arch = "x86", target_arch = "x86_64")))]
+    {
+        if !is_x86_feature_detected!("avx2") {
+            log::warn!("Your CPU doesn't support AVX2 instruction, default npx binary will be used");
+            return false; // we cannot override npx with bun binary
+        }
+    }
+
+    true // by default, we can override npx with bun binary
+}
+
 #[tauri::command]
 pub fn write_yaml(
    app: tauri::AppHandle,
@ -195,3 +210,4 @@ pub fn is_library_available(library: &str) -> bool {
        }
    }
 }
+
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@ -95,7 +95,9 @@ pub fn run() {
            core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
            core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
            core::utils::extensions::inference_llamacpp_extension::server::get_devices,
-            core::utils::extensions::inference_llamacpp_extension::server::is_port_available,
+            core::utils::extensions::inference_llamacpp_extension::server::get_random_port,
+            core::utils::extensions::inference_llamacpp_extension::server::find_session_by_model,
+            core::utils::extensions::inference_llamacpp_extension::server::get_loaded_models,
            core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
            core::utils::extensions::inference_llamacpp_extension::server::is_process_running,
        ])
@ -129,6 +131,12 @@ pub fn run() {
            if let Err(e) = setup::install_extensions(app.handle().clone(), false) {
                log::error!("Failed to install extensions: {}", e);
            }
+
+            #[cfg(any(windows, target_os = "linux"))]
+            {
+                use tauri_plugin_deep_link::DeepLinkExt;
+                app.deep_link().register_all()?;
+            }
            setup_mcp(app);
            Ok(())
        })
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@ -6,7 +6,7 @@
  "build": {
    "frontendDist": "../web-app/dist",
    "devUrl": "http://localhost:1420",
-    "beforeDevCommand": "cross-env IS_TAURI=true CLEAN=true yarn dev:web",
+    "beforeDevCommand": "cross-env IS_TAURI=true yarn dev:web",
    "beforeBuildCommand": "cross-env IS_TAURI=true yarn build:web"
  },
  "app": {
--- a/web-app/src/containers/DownloadManegement.tsx
+++ b/web-app/src/containers/DownloadManegement.tsx
@ -161,8 +161,14 @@ export function DownloadManagement() {
      console.debug('onFileDownloadError', state)
      removeDownload(state.modelId)
      removeLocalDownloadingModel(state.modelId)
+      toast.error(t('common:toast.downloadFailed.title'), {
+        id: 'download-failed',
+        description: t('common:toast.downloadFailed.description', {
+          item: state.modelId,
+        }),
+      })
    },
-    [removeDownload, removeLocalDownloadingModel]
+    [removeDownload, removeLocalDownloadingModel, t]
  )

  const onFileDownloadStopped = useCallback(
--- a/web-app/src/containers/ModelSetting.tsx
+++ b/web-app/src/containers/ModelSetting.tsx
@ -71,7 +71,7 @@ export function ModelSetting({
      })

      // Call debounced stopModel only when updating ctx_len or ngl
-      if (key === 'ctx_len' || key === 'ngl') {
+      if (key === 'ctx_len' || key === 'ngl' || key === 'chat_template') {
        debouncedStopModel(model.id)
      }
    }
@ -101,9 +101,17 @@ export function ModelSetting({
        <div className="px-4 space-y-6">
          {Object.entries(model.settings || {}).map(([key, value]) => {
            const config = value as ProviderSetting
+
            return (
              <div key={key} className="space-y-2">
-                <div className="flex items-start justify-between gap-8">
+                <div
+                  className={cn(
+                    'flex items-start justify-between gap-8 last:mb-2',
+                    (key === 'chat_template' ||
+                      key === 'override_tensor_buffer_t') &&
+                      'flex-col gap-1 w-full'
+                  )}
+                >
                  <div className="space-y-1 mb-2">
                    <h3 className="font-medium">{config.title}</h3>
                    <p className="text-main-view-fg/70 text-xs">
--- a/web-app/src/containers/ThinkingBlock.tsx
+++ b/web-app/src/containers/ThinkingBlock.tsx
@ -30,14 +30,31 @@ const ThinkingBlock = ({ id, text }: Props) => {
  const { thinkingState, setThinkingState } = useThinkingStore()
  const { streamingContent } = useAppState()
  const { t } = useTranslation()
-  const loading = !text.includes('</think>') && streamingContent
+  // Check for thinking formats
+  const hasThinkTag = text.includes('<think>') && !text.includes('</think>')
+  const hasAnalysisChannel = text.includes('<|channel|>analysis<|message|>') && !text.includes('<|start|>assistant<|channel|>final<|message|>')
+  const loading = (hasThinkTag || hasAnalysisChannel) && streamingContent
  const isExpanded = thinkingState[id] ?? (loading ? true : false)
  const handleClick = () => {
    const newExpandedState = !isExpanded
    setThinkingState(id, newExpandedState)
  }

-  if (!text.replace(/<\/?think>/g, '').trim()) return null
+  // Extract thinking content from either format
+  const extractThinkingContent = (text: string) => {
+    return text
+      .replace(/<\/?think>/g, '')
+      .replace(/<\|channel\|>analysis<\|message\|>/g, '')
+      .replace(/<\|start\|>assistant<\|channel\|>final<\|message\|>/g, '')
+      .replace(/assistant<\|channel\|>final<\|message\|>/g, '')
+      .replace(/<\|channel\|>/g, '') // remove any remaining channel markers
+      .replace(/<\|message\|>/g, '') // remove any remaining message markers  
+      .replace(/<\|start\|>/g, '') // remove any remaining start markers
+      .trim()
+  }
+
+  const thinkingContent = extractThinkingContent(text)
+  if (!thinkingContent) return null

  return (
    <div
@ -63,7 +80,7 @@ const ThinkingBlock = ({ id, text }: Props) => {

        {isExpanded && (
          <div className="mt-2 pl-6 pr-4 text-main-view-fg/60">
-            <RenderMarkdown content={text.replace(/<\/?think>/g, '').trim()} />
+            <RenderMarkdown content={thinkingContent} />
          </div>
        )}
      </div>
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@ -170,18 +170,33 @@ export const ThreadContent = memo(
    )

    const { reasoningSegment, textSegment } = useMemo(() => {
-      const isThinking = text.includes('<think>') && !text.includes('</think>')
-      if (isThinking) return { reasoningSegment: text, textSegment: '' }
+      // Check for thinking formats
+      const hasThinkTag = text.includes('<think>') && !text.includes('</think>')
+      const hasAnalysisChannel = text.includes('<|channel|>analysis<|message|>') && !text.includes('<|start|>assistant<|channel|>final<|message|>')
+      
+      if (hasThinkTag || hasAnalysisChannel) return { reasoningSegment: text, textSegment: '' }

-      const match = text.match(/<think>([\s\S]*?)<\/think>/)
-      if (match?.index === undefined)
-        return { reasoningSegment: undefined, textSegment: text }
-
-      const splitIndex = match.index + match[0].length
-      return {
-        reasoningSegment: text.slice(0, splitIndex),
-        textSegment: text.slice(splitIndex),
+      // Check for completed think tag format
+      const thinkMatch = text.match(/<think>([\s\S]*?)<\/think>/)
+      if (thinkMatch?.index !== undefined) {
+        const splitIndex = thinkMatch.index + thinkMatch[0].length
+        return {
+          reasoningSegment: text.slice(0, splitIndex),
+          textSegment: text.slice(splitIndex),
+        }
      }
+
+      // Check for completed analysis channel format
+      const analysisMatch = text.match(/<\|channel\|>analysis<\|message\|>([\s\S]*?)<\|start\|>assistant<\|channel\|>final<\|message\|>/)
+      if (analysisMatch?.index !== undefined) {
+        const splitIndex = analysisMatch.index + analysisMatch[0].length
+        return {
+          reasoningSegment: text.slice(0, splitIndex),
+          textSegment: text.slice(splitIndex),
+        }
+      }
+
+      return { reasoningSegment: undefined, textSegment: text }
    }, [text])

    const { getMessages, deleteMessage } = useMessages()
--- a/web-app/src/containers/dialogs/OutOfContextDialog.tsx
+++ b/web-app/src/containers/dialogs/OutOfContextDialog.tsx
@ -36,7 +36,10 @@ export default function OutOfContextPromiseModal() {

  return (
    <Dialog open={isModalOpen} onOpenChange={handleDialogOpen}>
-      <DialogContent>
+      <DialogContent
+        showCloseButton={false}
+        onInteractOutside={(e) => e.preventDefault()}
+      >
        <DialogHeader>
          <DialogTitle>{t('model-errors:title')}</DialogTitle>
        </DialogHeader>
@ -57,7 +60,7 @@ export default function OutOfContextPromiseModal() {
            {t('model-errors:truncateInput')}
          </Button>
          <Button
-            asChild
+            autoFocus
            onClick={() => {
              handleContextLength()
            }}
--- a/web-app/src/containers/dynamicControllerSetting/DropdownControl.tsx
+++ b/web-app/src/containers/dynamicControllerSetting/DropdownControl.tsx
@ -5,30 +5,61 @@ import {
  DropdownMenuTrigger,
 } from '@/components/ui/dropdown-menu'

+import {
+  Tooltip,
+  TooltipTrigger,
+  TooltipContent,
+} from '@/components/ui/tooltip'
+import { IconStarFilled } from '@tabler/icons-react'
+import { cn } from '@/lib/utils'
+
 // Dropdown component
 type DropdownControlProps = {
  value: string
  options?: Array<{ value: number | string; name: string }>
+  recommended?: string
  onChange: (value: number | string) => void
 }

 export function DropdownControl({
  value,
  options = [],
+  recommended,
  onChange,
 }: DropdownControlProps) {
+  const isSelected =
+    options.find((option) => option.value === value)?.name || value
+
  return (
    <DropdownMenu>
-      <DropdownMenuTrigger className="bg-main-view-fg/5 hover:bg-main-view-fg/8 px-2 py-1 rounded font-medium cursor-pointer">
-        {options.find((option) => option.value === value)?.name || value}
+      <DropdownMenuTrigger className="bg-main-view-fg/5 hover:bg-main-view-fg/8 px-3 py-1 rounded-sm font-medium cursor-pointer">
+        {isSelected}
      </DropdownMenuTrigger>
-      <DropdownMenuContent align="end">
+      <DropdownMenuContent align="end" className="max-h-70">
        {options.map((option, optionIndex) => (
          <DropdownMenuItem
            key={optionIndex}
            onClick={() => onChange(option.value)}
+            className={cn(
+              'flex items-center justify-between my-1',
+              isSelected === option.name
+                ? 'bg-main-view-fg/6 hover:bg-main-view-fg/6'
+                : ''
+            )}
          >
-            {option.name}
+            <span>{option.name}</span>
+            {recommended === option.value && (
+              <Tooltip>
+                <TooltipTrigger asChild>
+                  <div className="cursor-pointer">
+                    <IconStarFilled className="text-accent" />
+                  </div>
+                </TooltipTrigger>
+                <TooltipContent side="top" sideOffset={8} className="z-50">
+                  Recommended
+                </TooltipContent>
+              </Tooltip>
+            )}
          </DropdownMenuItem>
        ))}
      </DropdownMenuContent>
--- a/web-app/src/containers/dynamicControllerSetting/index.tsx
+++ b/web-app/src/containers/dynamicControllerSetting/index.tsx
@ -28,6 +28,7 @@ type DynamicControllerProps = {
    min?: number
    max?: number
    step?: number
+    recommended?: string
  }
  onChange: (value: string | boolean | number) => void
 }
@ -65,6 +66,7 @@ export function DynamicControllerSetting({
      <DropdownControl
        value={controllerProps.value as string}
        options={controllerProps.options}
+        recommended={controllerProps.recommended}
        onChange={(newValue) => onChange(newValue)}
      />
    )
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@ -365,8 +365,7 @@ export const useChat = () => {
            if (
              typeof errorMessage === 'string' &&
              errorMessage.includes(OUT_OF_CONTEXT_SIZE) &&
-              selectedModel &&
-              troubleshooting
+              selectedModel
            ) {
              const method = await showIncreaseContextSizeModal()
              if (method === 'ctx_len') {
--- a/web-app/src/hooks/useMCPServers.ts
+++ b/web-app/src/hooks/useMCPServers.ts
@ -25,8 +25,8 @@ type MCPServerStoreState = {
  editServer: (key: string, config: MCPServerConfig) => void
  deleteServer: (key: string) => void
  setServers: (servers: MCPServers) => void
-  syncServers: () => void
-  syncServersAndRestart: () => void
+  syncServers: () => Promise<void>
+  syncServersAndRestart: () => Promise<void>
 }

 export const useMCPServers = create<MCPServerStoreState>()((set, get) => ({
--- a/web-app/src/hooks/useModelProvider.ts
+++ b/web-app/src/hooks/useModelProvider.ts
@ -2,6 +2,7 @@ import { create } from 'zustand'
 import { persist, createJSONStorage } from 'zustand/middleware'
 import { localStorageKey } from '@/constants/localStorage'
 import { sep } from '@tauri-apps/api/path'
+import { modelSettings } from '@/lib/predefined'

 type ModelProviderState = {
  providers: ModelProvider[]
@ -211,8 +212,21 @@ export const useModelProvider = create<ModelProviderState>()(
      name: localStorageKey.modelProvider,
      storage: createJSONStorage(() => localStorage),
      migrate: (persistedState: unknown, version: number) => {
-        const state = persistedState as ModelProviderState
-        
+        const state = persistedState as ModelProviderState & {
+          providers: Array<
+            ModelProvider & {
+              models: Array<
+                Model & {
+                  settings?: Record<string, unknown> & {
+                    chatTemplate?: string
+                    chat_template?: string
+                  }
+                }
+              >
+            }
+          >
+        }
+
        // Migration for cont_batching description update (version 0 -> 1)
        if (version === 0 && state?.providers) {
          state.providers = state.providers.map((provider) => {
@ -221,7 +235,8 @@ export const useModelProvider = create<ModelProviderState>()(
                if (setting.key === 'cont_batching') {
                  return {
                    ...setting,
-                    description: 'Enable continuous batching (a.k.a dynamic batching) for concurrent requests.'
+                    description:
+                      'Enable continuous batching (a.k.a dynamic batching) for concurrent requests.',
                  }
                }
                return setting
@ -230,9 +245,65 @@ export const useModelProvider = create<ModelProviderState>()(
            return provider
          })
        }
+
+        // Migration for chatTemplate key to chat_template (version 1 -> 2)
+        if (version === 1 && state?.providers) {
+          state.providers.forEach((provider) => {
+            if (provider.models) {
+              provider.models.forEach((model) => {
+                // Initialize settings if it doesn't exist
+                if (!model.settings) {
+                  model.settings = {}
+                }
+
+                // Migrate chatTemplate key to chat_template
+                if (model.settings.chatTemplate) {
+                  model.settings.chat_template = model.settings.chatTemplate
+                  delete model.settings.chatTemplate
+                }
+
+                // Add missing chat_template setting if it doesn't exist
+                if (!model.settings.chat_template) {
+                  model.settings.chat_template = {
+                    ...modelSettings.chatTemplate,
+                    controller_props: {
+                      ...modelSettings.chatTemplate.controller_props,
+                    },
+                  }
+                }
+              })
+            }
+          })
+        }
+
+        // Migration for override_tensor_buffer_type key (version 2 -> 3)
+        if (version === 2 && state?.providers) {
+          state.providers.forEach((provider) => {
+            if (provider.models) {
+              provider.models.forEach((model) => {
+                // Initialize settings if it doesn't exist
+                if (!model.settings) {
+                  model.settings = {}
+                }
+
+                // Add missing override_tensor_buffer_type setting if it doesn't exist
+                if (!model.settings.override_tensor_buffer_t) {
+                  model.settings.override_tensor_buffer_t = {
+                    ...modelSettings.override_tensor_buffer_t,
+                    controller_props: {
+                      ...modelSettings.override_tensor_buffer_t
+                        .controller_props,
+                    },
+                  }
+                }
+              })
+            }
+          })
+        }
+
        return state
      },
-      version: 1,
+      version: 3,
    }
  )
 )
--- a/web-app/src/lib/tests/messages.test.ts
+++ b/web-app/src/lib/tests/messages.test.ts
@ -407,6 +407,50 @@ describe('CompletionMessagesBuilder', () => {
      const result = builder.getMessages()
      expect(result[0].content).toBe('Clean answer')
    })
+
+    it('should remove analysis channel reasoning content', () => {
+      const builder = new CompletionMessagesBuilder([])
+
+      builder.addAssistantMessage(
+        '<|channel|>analysis<|message|>Let me analyze this step by step...<|start|>assistant<|channel|>final<|message|>The final answer is 42.'
+      )
+
+      const result = builder.getMessages()
+      expect(result[0].content).toBe('The final answer is 42.')
+    })
+
+    it('should handle analysis channel without final message', () => {
+      const builder = new CompletionMessagesBuilder([])
+
+      builder.addAssistantMessage(
+        '<|channel|>analysis<|message|>Only analysis content here...'
+      )
+
+      const result = builder.getMessages()
+      expect(result[0].content).toBe('<|channel|>analysis<|message|>Only analysis content here...')
+    })
+
+    it('should handle analysis channel with multiline content', () => {
+      const builder = new CompletionMessagesBuilder([])
+
+      builder.addAssistantMessage(
+        '<|channel|>analysis<|message|>Step 1: First analysis\nStep 2: Second analysis\nStep 3: Final analysis<|start|>assistant<|channel|>final<|message|>Based on my analysis, here is the result.'
+      )
+
+      const result = builder.getMessages()
+      expect(result[0].content).toBe('Based on my analysis, here is the result.')
+    })
+
+    it('should handle both think and analysis channel tags', () => {
+      const builder = new CompletionMessagesBuilder([])
+
+      builder.addAssistantMessage(
+        '<think>Initial thought</think><|channel|>analysis<|message|>Detailed analysis<|start|>assistant<|channel|>final<|message|>Final response'
+      )
+
+      const result = builder.getMessages()
+      expect(result[0].content).toBe('Final response')
+    })
  })

  describe('integration tests', () => {
--- a/web-app/src/lib/messages.ts
+++ b/web-app/src/lib/messages.ts
@ -102,6 +102,15 @@ export class CompletionMessagesBuilder {
        content = content.slice(splitIndex).trim()
      }
    }
+    if (content.includes('<|channel|>analysis<|message|>')) {
+      const match = content.match(
+        /<\|channel\|>analysis<\|message\|>([\s\S]*?)<\|start\|>assistant<\|channel\|>final<\|message\|>/
+      )
+      if (match?.index !== undefined) {
+        const splitIndex = match.index + match[0].length
+        content = content.slice(splitIndex).trim()
+      }
+    }
    return content
  }
 }
--- a/web-app/src/lib/predefined.ts
+++ b/web-app/src/lib/predefined.ts
@ -22,7 +22,6 @@ export const modelSettings = {
      type: 'number',
    },
  },
-
  temperature: {
    key: 'temperature',
    title: 'Temperature',
@ -121,4 +120,28 @@ export const modelSettings = {
      type: 'number',
    },
  },
+  chatTemplate: {
+    key: 'chat_template',
+    title: 'Custom Jinja Chat template',
+    description: 'Custom Jinja chat_template to be used for the model',
+    controller_type: 'textarea',
+    controller_props: {
+      value: '',
+      placeholder:
+        'e.g., {% for message in messages %}...{% endfor %} (default is read from GGUF)',
+      type: 'text',
+      textAlign: 'right',
+    },
+  },
+  override_tensor_buffer_t: {
+    key: 'override_tensor_buffer_t',
+    title: 'Override Tensor Buffer Type',
+    description: 'Override the tensor buffer type for the model',
+    controller_type: 'input',
+    controller_props: {
+      value: '',
+      placeholder: 'e.g., layers\\.\\d+\\.ffn_.*=CPU',
+      type: 'text',
+    },
+  },
 }
--- a/web-app/src/locales/en/common.json
+++ b/web-app/src/locales/en/common.json
@ -257,6 +257,10 @@
    "downloadCancelled": {
      "title": "Download Cancelled",
      "description": "The download process was cancelled"
+    },
+    "downloadFailed": {
+      "title": "Download Failed",
+      "description": "{{item}} download failed"
    }
  }
-}
+}
--- a/web-app/src/providers/DataProvider.tsx
+++ b/web-app/src/providers/DataProvider.tsx
@ -62,7 +62,12 @@ export function DataProvider() {

  // Check for app updates
  useEffect(() => {
-    checkForUpdate()
+    // Only check for updates if the auto updater is not disabled
+    // App might be distributed via other package managers
+    // or methods that handle updates differently
+    if (!AUTO_UPDATER_DISABLED) {
+      checkForUpdate()
+    }
  }, [checkForUpdate])

  const handleDeepLink = (urls: string[] | null) => {
@ -79,7 +84,7 @@ export function DataProvider() {
      const resource = params.slice(1).join('/')
      // return { action, provider, resource }
      navigate({
-        to: route.hub.index,
+        to: route.hub.model,
        search: {
          repo: resource,
        },
--- a/web-app/src/routes/hub/$modelId.tsx
+++ b/web-app/src/routes/hub/$modelId.tsx
@ -1,5 +1,10 @@
 import HeaderPage from '@/containers/HeaderPage'
-import { createFileRoute, useParams, useNavigate } from '@tanstack/react-router'
+import {
+  createFileRoute,
+  useParams,
+  useNavigate,
+  useSearch,
+} from '@tanstack/react-router'
 import {
  IconArrowLeft,
  IconDownload,
@ -13,23 +18,38 @@ import { RenderMarkdown } from '@/containers/RenderMarkdown'
 import { useEffect, useMemo, useCallback, useState } from 'react'
 import { useModelProvider } from '@/hooks/useModelProvider'
 import { useDownloadStore } from '@/hooks/useDownloadStore'
-import { pullModel } from '@/services/models'
+import {
+  CatalogModel,
+  convertHfRepoToCatalogModel,
+  fetchHuggingFaceRepo,
+  pullModel,
+} from '@/services/models'
 import { Progress } from '@/components/ui/progress'
 import { Button } from '@/components/ui/button'
 import { cn } from '@/lib/utils'

+type SearchParams = {
+  repo: string
+}
+
 export const Route = createFileRoute('/hub/$modelId')({
  component: HubModelDetail,
+  validateSearch: (search: Record<string, unknown>): SearchParams => ({
+    repo: search.repo as SearchParams['repo'],
+  }),
 })

 function HubModelDetail() {
  const { modelId } = useParams({ from: Route.id })
  const navigate = useNavigate()
  const { sources, fetchSources } = useModelSources()
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  const search = useSearch({ from: Route.id as any })
  const { getProviderByName } = useModelProvider()
  const llamaProvider = getProviderByName('llamacpp')
  const { downloads, localDownloadingModels, addLocalDownloadingModel } =
    useDownloadStore()
+  const [repoData, setRepoData] = useState<CatalogModel | undefined>()

  // State for README content
  const [readmeContent, setReadmeContent] = useState<string>('')
@ -39,10 +59,21 @@ function HubModelDetail() {
    fetchSources()
  }, [fetchSources])

+  const fetchRepo = useCallback(async () => {
+    const repoInfo = await fetchHuggingFaceRepo(search.repo || modelId)
+    if (repoInfo) {
+      const repoDetail = convertHfRepoToCatalogModel(repoInfo)
+      setRepoData(repoDetail)
+    }
+  }, [modelId, search])
+
+  useEffect(() => {
+    fetchRepo()
+  }, [modelId, fetchRepo])
  // Find the model data from sources
  const modelData = useMemo(() => {
-    return sources.find((model) => model.model_name === modelId)
-  }, [sources, modelId])
+    return sources.find((model) => model.model_name === modelId) ?? repoData
+  }, [sources, modelId, repoData])

  // Download processes
  const downloadProcesses = useMemo(
@ -116,7 +147,6 @@ function HubModelDetail() {
    })
  }, [modelData])

-
  // Fetch README content when modelData.readme is available
  useEffect(() => {
    if (modelData?.readme) {
--- a/web-app/src/routes/hub/index.tsx
+++ b/web-app/src/routes/hub/index.tsx
@ -31,7 +31,7 @@ import {
  CatalogModel,
  pullModel,
  fetchHuggingFaceRepo,
-  HuggingFaceRepo,
+  convertHfRepoToCatalogModel,
 } from '@/services/models'
 import { useDownloadStore } from '@/hooks/useDownloadStore'
 import { Progress } from '@/components/ui/progress'
@ -63,14 +63,16 @@ function Hub() {
    { value: 'newest', name: t('hub:sortNewest') },
    { value: 'most-downloaded', name: t('hub:sortMostDownloaded') },
  ]
-  const searchOptions = {
-    includeScore: true,
-    // Search in `author` and in `tags` array
-    keys: ['model_name', 'quants.model_id'],
-  }
+  const searchOptions = useMemo(() => {
+    return {
+      includeScore: true,
+      // Search in `author` and in `tags` array
+      keys: ['model_name', 'quants.model_id'],
+    }
+  }, [])

  const { sources, addSource, fetchSources, loading } = useModelSources()
-  const search = useSearch({ from: route.hub.index as any })
+
  const [searchValue, setSearchValue] = useState('')
  const [sortSelected, setSortSelected] = useState('newest')
  const [expandedModels, setExpandedModels] = useState<Record<string, boolean>>(
@ -92,48 +94,6 @@ function Hub() {
  const { getProviderByName } = useModelProvider()
  const llamaProvider = getProviderByName('llamacpp')

-  // Convert HuggingFace repository to CatalogModel format
-  const convertHfRepoToCatalogModel = useCallback(
-    (repo: HuggingFaceRepo): CatalogModel => {
-      // Extract GGUF files from the repository siblings
-      const ggufFiles =
-        repo.siblings?.filter((file) =>
-          file.rfilename.toLowerCase().endsWith('.gguf')
-        ) || []
-
-      // Convert GGUF files to quants format
-      const quants = ggufFiles.map((file) => {
-        // Format file size
-        const formatFileSize = (size?: number) => {
-          if (!size) return 'Unknown size'
-          if (size < 1024 ** 3) return `${(size / 1024 ** 2).toFixed(1)} MB`
-          return `${(size / 1024 ** 3).toFixed(1)} GB`
-        }
-
-        // Generate model_id from filename (remove .gguf extension, case-insensitive)
-        const modelId = file.rfilename.replace(/\.gguf$/i, '')
-
-        return {
-          model_id: modelId,
-          path: `https://huggingface.co/${repo.modelId}/resolve/main/${file.rfilename}`,
-          file_size: formatFileSize(file.size),
-        }
-      })
-
-      return {
-        model_name: repo.modelId,
-        description: `**Metadata:** ${repo.pipeline_tag}\n\n **Tags**: ${repo.tags?.join(', ')}`,
-        developer: repo.author,
-        downloads: repo.downloads || 0,
-        num_quants: quants.length,
-        quants: quants,
-        created_at: repo.created_at,
-        readme: `https://huggingface.co/${repo.modelId}/resolve/main/README.md`,
-      }
-    },
-    []
-  )
-
  const toggleModelExpansion = (modelId: string) => {
    setExpandedModels((prev) => ({
      ...prev,
@ -141,35 +101,6 @@ function Hub() {
    }))
  }

-  useEffect(() => {
-    if (search.repo) {
-      setSearchValue(search.repo || '')
-      setIsSearching(true)
-
-      addModelSourceTimeoutRef.current = setTimeout(async () => {
-        try {
-          // Fetch HuggingFace repository information
-          const repoInfo = await fetchHuggingFaceRepo(search.repo)
-          if (repoInfo) {
-            const catalogModel = convertHfRepoToCatalogModel(repoInfo)
-            if (
-              !sources.some((s) => s.model_name === catalogModel.model_name)
-            ) {
-              setHuggingFaceRepo(catalogModel)
-              addSource(catalogModel)
-            }
-          }
-
-          await fetchSources()
-        } catch (error) {
-          console.error('Error fetching repository info:', error)
-        } finally {
-          setIsSearching(false)
-        }
-      }, 500)
-    }
-  }, [convertHfRepoToCatalogModel, fetchSources, addSource, search, sources])
-
  // Sorting functionality
  const sortedModels = useMemo(() => {
    return [...sources].sort((a, b) => {
@ -264,9 +195,6 @@ function Hub() {
              addSource(catalogModel)
            }
          }
-
-          // Original addSource logic (if needed)
-          await fetchSources()
        } catch (error) {
          console.error('Error fetching repository info:', error)
        } finally {
--- a/web-app/src/routes/settings/mcp-servers.tsx
+++ b/web-app/src/routes/settings/mcp-servers.tsx
@ -83,11 +83,7 @@ function MCPServers() {
  }

  const handleSaveServer = async (name: string, config: MCPServerConfig) => {
-    try {
-      await toggleServer(name, false)
-    } catch (error) {
-      console.error('Error deactivating server:', error)
-    }
+    toggleServer(name, false)
    if (editingKey) {
      // If server name changed, delete old one and add new one
      if (editingKey !== name) {
@ -102,7 +98,7 @@ function MCPServers() {
    }

    syncServers()
-    await toggleServer(name, true)
+    toggleServer(name, true)
  }

  const handleEdit = (serverKey: string) => {
@ -147,25 +143,26 @@ function MCPServers() {
  ) => {
    if (jsonServerName) {
      try {
-        await toggleServer(jsonServerName, false)
+        toggleServer(jsonServerName, false)
      } catch (error) {
        console.error('Error deactivating server:', error)
      }
      // Save single server
      editServer(jsonServerName, data as MCPServerConfig)
-      syncServers()
-      toggleServer(jsonServerName, true)
+      toggleServer(jsonServerName, (data as MCPServerConfig).active || false)
    } else {
      // Save all servers
      // Clear existing servers first
-      Object.keys(mcpServers).forEach((key) => {
-        deleteServer(key)
+      Object.keys(mcpServers).forEach((serverKey) => {
+        toggleServer(serverKey, false)
+        deleteServer(serverKey)
      })

      // Add all servers from the JSON
      Object.entries(data as Record<string, MCPServerConfig>).forEach(
        ([key, config]) => {
          addServer(key, config)
+          toggleServer(key, config.active || false)
        }
      )
    }
--- a/web-app/src/routes/threads/$threadId.tsx
+++ b/web-app/src/routes/threads/$threadId.tsx
@ -4,8 +4,7 @@ import { UIEventHandler } from 'react'
 import debounce from 'lodash.debounce'
 import cloneDeep from 'lodash.clonedeep'
 import { cn } from '@/lib/utils'
-import { ArrowDown } from 'lucide-react'
-import { Play } from 'lucide-react'
+import { ArrowDown, Play } from 'lucide-react'

 import HeaderPage from '@/containers/HeaderPage'
 import { useThreads } from '@/hooks/useThreads'
@ -328,7 +327,7 @@ function ThreadDetail() {
          >
            {showScrollToBottomBtn && (
              <div
-                className="bg-main-view-fg/10 px-4 border border-main-view-fg/5 flex items-center justify-center rounded-xl gap-x-2 cursor-pointer pointer-events-auto"
+                className="bg-main-view-fg/10 px-2 border border-main-view-fg/5 flex items-center justify-center rounded-xl gap-x-2 cursor-pointer pointer-events-auto"
                onClick={() => {
                  scrollToBottom(true)
                  setIsUserScrolling(false)
@ -340,7 +339,7 @@ function ThreadDetail() {
            )}
            {showGenerateAIResponseBtn && (
              <div
-                className="bg-main-view-fg/10 px-4 border border-main-view-fg/5 flex items-center justify-center rounded-xl gap-x-2 cursor-pointer pointer-events-auto"
+                className="mx-2 bg-main-view-fg/10 px-2 border border-main-view-fg/5 flex items-center justify-center rounded-xl gap-x-2 cursor-pointer pointer-events-auto"
                onClick={generateAIResponse}
              >
                <p className="text-xs">{t('common:generateAiResponse')}</p>
--- a/web-app/src/services/tests/models.test.ts
+++ b/web-app/src/services/tests/models.test.ts
@ -4,6 +4,7 @@ import {
  fetchModels,
  fetchModelCatalog,
  fetchHuggingFaceRepo,
+  convertHfRepoToCatalogModel,
  updateModel,
  pullModel,
  abortDownload,
@ -12,6 +13,8 @@ import {
  stopModel,
  stopAllModels,
  startModel,
+  HuggingFaceRepo,
+  CatalogModel,
 } from '../models'
 import { EngineManager, Model } from '@janhq/core'

@ -334,7 +337,9 @@ describe('models service', () => {
      })

      // Test with full URL
-      await fetchHuggingFaceRepo('https://huggingface.co/microsoft/DialoGPT-medium')
+      await fetchHuggingFaceRepo(
+        'https://huggingface.co/microsoft/DialoGPT-medium'
+      )
      expect(fetch).toHaveBeenCalledWith(
        'https://huggingface.co/api/models/microsoft/DialoGPT-medium?blobs=true'
      )
@ -380,7 +385,7 @@ describe('models service', () => {

    it('should handle other HTTP errors', async () => {
      const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {})
-      
+
      ;(fetch as any).mockResolvedValue({
        ok: false,
        status: 500,
@ -394,13 +399,13 @@ describe('models service', () => {
        'Error fetching HuggingFace repository:',
        expect.any(Error)
      )
-      
+
      consoleSpy.mockRestore()
    })

    it('should handle network errors', async () => {
      const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {})
-      
+
      ;(fetch as any).mockRejectedValue(new Error('Network error'))

      const result = await fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
@ -410,7 +415,7 @@ describe('models service', () => {
        'Error fetching HuggingFace repository:',
        expect.any(Error)
      )
-      
+
      consoleSpy.mockRestore()
    })

@ -524,7 +529,303 @@ describe('models service', () => {

      expect(result).toEqual(mockRepoData)
      // Verify the GGUF file is present in siblings
-      expect(result?.siblings?.some(s => s.rfilename.endsWith('.gguf'))).toBe(true)
+      expect(result?.siblings?.some((s) => s.rfilename.endsWith('.gguf'))).toBe(
+        true
+      )
+    })
+  })
+
+  describe('convertHfRepoToCatalogModel', () => {
+    const mockHuggingFaceRepo: HuggingFaceRepo = {
+      id: 'microsoft/DialoGPT-medium',
+      modelId: 'microsoft/DialoGPT-medium',
+      sha: 'abc123',
+      downloads: 1500,
+      likes: 75,
+      tags: ['pytorch', 'transformers', 'text-generation'],
+      pipeline_tag: 'text-generation',
+      created_at: '2021-01-01T00:00:00Z',
+      last_modified: '2021-12-01T00:00:00Z',
+      private: false,
+      disabled: false,
+      gated: false,
+      author: 'microsoft',
+      siblings: [
+        {
+          rfilename: 'model-q4_0.gguf',
+          size: 2 * 1024 * 1024 * 1024, // 2GB
+          blobId: 'blob123',
+        },
+        {
+          rfilename: 'model-q8_0.GGUF', // Test case-insensitive matching
+          size: 4 * 1024 * 1024 * 1024, // 4GB
+          blobId: 'blob456',
+        },
+        {
+          rfilename: 'tokenizer.json', // Non-GGUF file (should be filtered out)
+          size: 1024 * 1024, // 1MB
+          blobId: 'blob789',
+        },
+      ],
+    }
+
+    it('should convert HuggingFace repo to catalog model format', () => {
+      const result = convertHfRepoToCatalogModel(mockHuggingFaceRepo)
+
+      const expected: CatalogModel = {
+        model_name: 'microsoft/DialoGPT-medium',
+        description: '**Tags**: pytorch, transformers, text-generation',
+        developer: 'microsoft',
+        downloads: 1500,
+        num_quants: 2,
+        quants: [
+          {
+            model_id: 'model-q4_0',
+            path: 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q4_0.gguf',
+            file_size: '2.0 GB',
+          },
+          {
+            model_id: 'model-q8_0',
+            path: 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q8_0.GGUF',
+            file_size: '4.0 GB',
+          },
+        ],
+        created_at: '2021-01-01T00:00:00Z',
+        readme:
+          'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/README.md',
+      }
+
+      expect(result).toEqual(expected)
+    })
+
+    it('should handle repository with no GGUF files', () => {
+      const repoWithoutGGUF: HuggingFaceRepo = {
+        ...mockHuggingFaceRepo,
+        siblings: [
+          {
+            rfilename: 'tokenizer.json',
+            size: 1024 * 1024,
+            blobId: 'blob789',
+          },
+          {
+            rfilename: 'config.json',
+            size: 2048,
+            blobId: 'blob101',
+          },
+        ],
+      }
+
+      const result = convertHfRepoToCatalogModel(repoWithoutGGUF)
+
+      expect(result.num_quants).toBe(0)
+      expect(result.quants).toEqual([])
+    })
+
+    it('should handle repository with no siblings', () => {
+      const repoWithoutSiblings: HuggingFaceRepo = {
+        ...mockHuggingFaceRepo,
+        siblings: undefined,
+      }
+
+      const result = convertHfRepoToCatalogModel(repoWithoutSiblings)
+
+      expect(result.num_quants).toBe(0)
+      expect(result.quants).toEqual([])
+    })
+
+    it('should format file sizes correctly', () => {
+      const repoWithVariousFileSizes: HuggingFaceRepo = {
+        ...mockHuggingFaceRepo,
+        siblings: [
+          {
+            rfilename: 'small-model.gguf',
+            size: 500 * 1024 * 1024, // 500MB
+            blobId: 'blob1',
+          },
+          {
+            rfilename: 'large-model.gguf',
+            size: 3.5 * 1024 * 1024 * 1024, // 3.5GB
+            blobId: 'blob2',
+          },
+          {
+            rfilename: 'unknown-size.gguf',
+            // No size property
+            blobId: 'blob3',
+          },
+        ],
+      }
+
+      const result = convertHfRepoToCatalogModel(repoWithVariousFileSizes)
+
+      expect(result.quants[0].file_size).toBe('500.0 MB')
+      expect(result.quants[1].file_size).toBe('3.5 GB')
+      expect(result.quants[2].file_size).toBe('Unknown size')
+    })
+
+    it('should handle empty or undefined tags', () => {
+      const repoWithEmptyTags: HuggingFaceRepo = {
+        ...mockHuggingFaceRepo,
+        tags: [],
+      }
+
+      const result = convertHfRepoToCatalogModel(repoWithEmptyTags)
+
+      expect(result.description).toBe('**Tags**: ')
+    })
+
+    it('should handle missing downloads count', () => {
+      const repoWithoutDownloads: HuggingFaceRepo = {
+        ...mockHuggingFaceRepo,
+        downloads: undefined as any,
+      }
+
+      const result = convertHfRepoToCatalogModel(repoWithoutDownloads)
+
+      expect(result.downloads).toBe(0)
+    })
+
+    it('should correctly remove .gguf extension from model IDs', () => {
+      const repoWithVariousGGUF: HuggingFaceRepo = {
+        ...mockHuggingFaceRepo,
+        siblings: [
+          {
+            rfilename: 'model.gguf',
+            size: 1024,
+            blobId: 'blob1',
+          },
+          {
+            rfilename: 'MODEL.GGUF',
+            size: 1024,
+            blobId: 'blob2',
+          },
+          {
+            rfilename: 'complex-model-name.gguf',
+            size: 1024,
+            blobId: 'blob3',
+          },
+        ],
+      }
+
+      const result = convertHfRepoToCatalogModel(repoWithVariousGGUF)
+
+      expect(result.quants[0].model_id).toBe('model')
+      expect(result.quants[1].model_id).toBe('MODEL')
+      expect(result.quants[2].model_id).toBe('complex-model-name')
+    })
+
+    it('should generate correct download paths', () => {
+      const result = convertHfRepoToCatalogModel(mockHuggingFaceRepo)
+
+      expect(result.quants[0].path).toBe(
+        'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q4_0.gguf'
+      )
+      expect(result.quants[1].path).toBe(
+        'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q8_0.GGUF'
+      )
+    })
+
+    it('should generate correct readme URL', () => {
+      const result = convertHfRepoToCatalogModel(mockHuggingFaceRepo)
+
+      expect(result.readme).toBe(
+        'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/README.md'
+      )
+    })
+
+    it('should handle GGUF files with case-insensitive extension matching', () => {
+      const repoWithMixedCase: HuggingFaceRepo = {
+        ...mockHuggingFaceRepo,
+        siblings: [
+          {
+            rfilename: 'model-1.gguf',
+            size: 1024,
+            blobId: 'blob1',
+          },
+          {
+            rfilename: 'model-2.GGUF',
+            size: 1024,
+            blobId: 'blob2',
+          },
+          {
+            rfilename: 'model-3.GgUf',
+            size: 1024,
+            blobId: 'blob3',
+          },
+          {
+            rfilename: 'not-a-model.txt',
+            size: 1024,
+            blobId: 'blob4',
+          },
+        ],
+      }
+
+      const result = convertHfRepoToCatalogModel(repoWithMixedCase)
+
+      expect(result.num_quants).toBe(3)
+      expect(result.quants).toHaveLength(3)
+      expect(result.quants[0].model_id).toBe('model-1')
+      expect(result.quants[1].model_id).toBe('model-2')
+      expect(result.quants[2].model_id).toBe('model-3')
+    })
+
+    it('should handle edge cases with file size formatting', () => {
+      const repoWithEdgeCases: HuggingFaceRepo = {
+        ...mockHuggingFaceRepo,
+        siblings: [
+          {
+            rfilename: 'tiny.gguf',
+            size: 512, // < 1MB
+            blobId: 'blob1',
+          },
+          {
+            rfilename: 'exactly-1gb.gguf',
+            size: 1024 * 1024 * 1024, // Exactly 1GB
+            blobId: 'blob2',
+          },
+          {
+            rfilename: 'zero-size.gguf',
+            size: 0,
+            blobId: 'blob3',
+          },
+        ],
+      }
+
+      const result = convertHfRepoToCatalogModel(repoWithEdgeCases)
+
+      expect(result.quants[0].file_size).toBe('0.0 MB')
+      expect(result.quants[1].file_size).toBe('1.0 GB')
+      expect(result.quants[2].file_size).toBe('Unknown size') // 0 is falsy, so it returns 'Unknown size'
+    })
+
+    it('should handle missing optional fields gracefully', () => {
+      const minimalRepo: HuggingFaceRepo = {
+        id: 'minimal/repo',
+        modelId: 'minimal/repo',
+        sha: 'abc123',
+        downloads: 0,
+        likes: 0,
+        tags: [],
+        created_at: '2021-01-01T00:00:00Z',
+        last_modified: '2021-12-01T00:00:00Z',
+        private: false,
+        disabled: false,
+        gated: false,
+        author: 'minimal',
+        siblings: [
+          {
+            rfilename: 'model.gguf',
+            blobId: 'blob1',
+          },
+        ],
+      }
+
+      const result = convertHfRepoToCatalogModel(minimalRepo)
+
+      expect(result.model_name).toBe('minimal/repo')
+      expect(result.developer).toBe('minimal')
+      expect(result.downloads).toBe(0)
+      expect(result.description).toBe('**Tags**: ')
+      expect(result.quants[0].file_size).toBe('Unknown size')
    })
  })
 })
--- a/web-app/src/services/models.ts
+++ b/web-app/src/services/models.ts
@ -134,6 +134,47 @@ export const fetchHuggingFaceRepo = async (
  }
 }

+// Convert HuggingFace repository to CatalogModel format
+export const convertHfRepoToCatalogModel = (
+  repo: HuggingFaceRepo
+): CatalogModel => {
+  // Extract GGUF files from the repository siblings
+  const ggufFiles =
+    repo.siblings?.filter((file) =>
+      file.rfilename.toLowerCase().endsWith('.gguf')
+    ) || []
+
+  // Convert GGUF files to quants format
+  const quants = ggufFiles.map((file) => {
+    // Format file size
+    const formatFileSize = (size?: number) => {
+      if (!size) return 'Unknown size'
+      if (size < 1024 ** 3) return `${(size / 1024 ** 2).toFixed(1)} MB`
+      return `${(size / 1024 ** 3).toFixed(1)} GB`
+    }
+
+    // Generate model_id from filename (remove .gguf extension, case-insensitive)
+    const modelId = file.rfilename.replace(/\.gguf$/i, '')
+
+    return {
+      model_id: modelId,
+      path: `https://huggingface.co/${repo.modelId}/resolve/main/${file.rfilename}`,
+      file_size: formatFileSize(file.size),
+    }
+  })
+
+  return {
+    model_name: repo.modelId,
+    description: `**Tags**: ${repo.tags?.join(', ')}`,
+    developer: repo.author,
+    downloads: repo.downloads || 0,
+    num_quants: quants.length,
+    quants: quants,
+    created_at: repo.created_at,
+    readme: `https://huggingface.co/${repo.modelId}/resolve/main/README.md`,
+  }
+}
+
 /**
 * Updates a model.
 * @param model The model to update.
--- a/web-app/src/types/global.d.ts
+++ b/web-app/src/types/global.d.ts
@ -19,6 +19,7 @@ declare global {
  declare const POSTHOG_KEY: string
  declare const POSTHOG_HOST: string
  declare const MODEL_CATALOG_URL: string
+  declare const AUTO_UPDATER_DISABLED: boolean
  interface Window {
    core: AppCore | undefined
  }
--- a/web-app/vite.config.ts
+++ b/web-app/vite.config.ts
@ -33,19 +33,19 @@ export default defineConfig(({ mode }) => {
    define: {
      IS_TAURI: JSON.stringify(process.env.IS_TAURI),
      IS_MACOS: JSON.stringify(
-        process.env.TAURI_ENV_PLATFORM?.includes('darwin') ?? 'false'
+        process.env.TAURI_ENV_PLATFORM?.includes('darwin') ?? false
      ),
      IS_WINDOWS: JSON.stringify(
-        process.env.TAURI_ENV_PLATFORM?.includes('windows') ?? 'false'
+        process.env.TAURI_ENV_PLATFORM?.includes('windows') ?? false
      ),
      IS_LINUX: JSON.stringify(
-        process.env.TAURI_ENV_PLATFORM?.includes('linux') ?? 'false'
+        process.env.TAURI_ENV_PLATFORM?.includes('linux') ?? false
      ),
      IS_IOS: JSON.stringify(
-        process.env.TAURI_ENV_PLATFORM?.includes('ios') ?? 'false'
+        process.env.TAURI_ENV_PLATFORM?.includes('ios') ?? false
      ),
      IS_ANDROID: JSON.stringify(
-        process.env.TAURI_ENV_PLATFORM?.includes('android') ?? 'false'
+        process.env.TAURI_ENV_PLATFORM?.includes('android') ?? false
      ),
      PLATFORM: JSON.stringify(process.env.TAURI_ENV_PLATFORM),

@ -56,6 +56,9 @@ export default defineConfig(({ mode }) => {
      MODEL_CATALOG_URL: JSON.stringify(
        'https://raw.githubusercontent.com/menloresearch/model-catalog/main/model_catalog.json'
      ),
+      AUTO_UPDATER_DISABLED: JSON.stringify(
+        env.AUTO_UPDATER_DISABLED === 'true'
+      ),
    },

    // Vite options tailored for Tauri development and only applied in `tauri dev` or `tauri build`
--- a/web-app/vitest.config.ts
+++ b/web-app/vitest.config.ts
@ -38,5 +38,6 @@ export default defineConfig({
    VERSION: JSON.stringify('test'),
    POSTHOG_KEY: JSON.stringify(''),
    POSTHOG_HOST: JSON.stringify(''),
+    AUTO_UPDATER_DISABLED: JSON.stringify('false'),
  },
 })
--- a/website/.gitignore
+++ b/website/.gitignore
@ -0,0 +1,21 @@
+# build output
+dist/
+# generated types
+.astro/
+
+# dependencies
+node_modules/
+
+# logs
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+
+
+# environment variables
+.env
+.env.production
+
+# macOS-specific files
+.DS_Store
--- a/website/README.md
+++ b/website/README.md
@ -0,0 +1,28 @@
+# Jan's Website
+
+This website is [built with Starlight](https://starlight.astro.build)
+
+
+Starlight looks for `.md` or `.mdx` files in the `src/content/docs/` directory. Each file is exposed
+as a route based on its file name.
+
+Images can be added to `src/assets/` and embedded in Markdown with a relative link.
+
+Static assets, like favicons, can be placed in the `public/` directory.
+
+If you want to add new pages, these can go in the `src/pages/` directory. Because of the topics plugin
+we are using ([starlight sidebar topics](https://starlight-sidebar-topics.netlify.app/docs/guides/excluded-pages/))
+you will need to exclude them from the sidebar by adding them to the exclude list in `astro.config.mjs`, e.g., `exclude: ['/example'],`.
+
+## 🧞 Commands
+
+All commands are run from the root of the project, from a terminal:
+
+| Command                   | Action                                           |
+| :------------------------ | :----------------------------------------------- |
+| `bun install`             | Installs dependencies                            |
+| `bun dev`             | Starts local dev server at `localhost:4321`      |
+| `bun build`           | Build your production site to `./dist/`          |
+| `bun preview`         | Preview your build locally, before deploying     |
+| `bun astro ...`       | Run CLI commands like `astro add`, `astro check` |
+| `bun astro -- --help` | Get help using the Astro CLI                     |
--- a/website/astro.config.mjs
+++ b/website/astro.config.mjs
@ -0,0 +1,191 @@
+// @ts-check
+import { defineConfig } from 'astro/config'
+import starlight from '@astrojs/starlight'
+import starlightThemeRapide from 'starlight-theme-rapide'
+import starlightSidebarTopics from 'starlight-sidebar-topics'
+import mermaid from 'astro-mermaid'
+
+// https://astro.build/config
+export default defineConfig({
+  // Deploy to the new v2 subdomain
+  site: 'https://v2.jan.ai',
+  // No 'base' property is needed, as this will be deployed to the root of the subdomain.
+  integrations: [
+    mermaid({
+      theme: 'default',
+      autoTheme: true,
+    }),
+    starlight({
+      title: '👋 Jan',
+      favicon: 'jan2.png',
+      plugins: [
+        starlightThemeRapide(),
+        starlightSidebarTopics(
+          [
+            {
+              label: 'Jan Desktop',
+              link: '/',
+              icon: 'rocket',
+              items: [
+                {
+                  label: 'HOW TO',
+                  items: [
+                    {
+                      label: 'Install 👋 Jan',
+                      collapsed: false,
+                      autogenerate: { directory: 'jan/installation' },
+                    },
+                    { label: 'Start Chatting', slug: 'jan/threads' },
+                    {
+                      label: 'Use Jan Models',
+                      collapsed: true,
+                      autogenerate: { directory: 'jan/jan-models' },
+                    },
+                    { label: 'Assistants', slug: 'jan/assistants' },
+                  ],
+                },
+                {
+                  label: 'Cloud Providers',
+                  items: [
+                    { label: 'Anthropic', slug: 'jan/remote-models/anthropic' },
+                    { label: 'OpenAI', slug: 'jan/remote-models/openai' },
+                    { label: 'Gemini', slug: 'jan/remote-models/google' },
+                    {
+                      label: 'OpenRouter',
+                      slug: 'jan/remote-models/openrouter',
+                    },
+                    { label: 'Cohere', slug: 'jan/remote-models/cohere' },
+                    { label: 'Mistral', slug: 'jan/remote-models/mistralai' },
+                    { label: 'Groq', slug: 'jan/remote-models/groq' },
+                  ],
+                },
+                {
+                  label: 'EXPLANATION',
+                  items: [
+                    {
+                      label: 'Local AI Engine',
+                      slug: 'jan/explanation/llama-cpp',
+                    },
+                    {
+                      label: 'Model Parameters',
+                      slug: 'jan/explanation/model-parameters',
+                    },
+                  ],
+                },
+                {
+                  label: 'ADVANCED',
+                  items: [
+                    { label: 'Manage Models', slug: 'jan/manage-models' },
+                    { label: 'Model Context Protocol', slug: 'jan/mcp' },
+                    {
+                      label: 'MCP Examples',
+                      collapsed: true,
+                      items: [
+                        {
+                          label: 'Browser Control (Browserbase)',
+                          slug: 'jan/mcp-examples/browser/browserbase',
+                        },
+                        {
+                          label: 'Code Sandbox (E2B)',
+                          slug: 'jan/mcp-examples/data-analysis/e2b',
+                        },
+                        {
+                          label: 'Design Creation (Canva)',
+                          slug: 'jan/mcp-examples/design/canva',
+                        },
+                        {
+                          label: 'Deep Research (Octagon)',
+                          slug: 'jan/mcp-examples/deepresearch/octagon',
+                        },
+                        {
+                          label: 'Web Search with Exa',
+                          slug: 'jan/mcp-examples/search/exa',
+                        },
+                      ],
+                    },
+                  ],
+                },
+                {
+                  label: 'Local Server',
+                  items: [
+                    { label: 'Introduction', link: '/local-server/' },
+                    { label: 'Server Setup', slug: 'local-server/api-server' },
+                    {
+                      label: 'Jan Data Folder',
+                      slug: 'local-server/data-folder',
+                    },
+                    { label: 'Server Settings', slug: 'local-server/settings' },
+                    {
+                      label: 'Llama.cpp Server',
+                      slug: 'local-server/llama-cpp',
+                    },
+                    {
+                      label: 'Server Troubleshooting',
+                      slug: 'local-server/troubleshooting',
+                    },
+                    {
+                      label: 'Integrations',
+                      collapsed: true,
+                      autogenerate: { directory: 'local-server/integrations' },
+                    },
+                  ],
+                },
+                {
+                  label: 'REFERENCE',
+                  items: [
+                    { label: 'Settings', slug: 'jan/settings' },
+                    { label: 'Jan Data Folder', slug: 'jan/data-folder' },
+                    { label: 'Troubleshooting', slug: 'jan/troubleshooting' },
+                    { label: 'Privacy Policy', slug: 'jan/privacy' },
+                  ],
+                },
+              ],
+            },
+            {
+              label: 'Jan Mobile',
+              link: '/mobile/',
+              badge: { text: 'Coming Soon', variant: 'caution' },
+              icon: 'phone',
+              items: [{ label: 'Overview', slug: 'mobile' }],
+            },
+            {
+              label: 'Jan Server',
+              link: '/server/',
+              badge: { text: 'Coming Soon', variant: 'caution' },
+              icon: 'forward-slash',
+              items: [{ label: 'Overview', slug: 'server' }],
+            },
+          ],
+          {
+            exclude: [
+              '/prods',
+              '/api-reference',
+              '/products',
+              '/products/**/*',
+            ],
+          }
+        ),
+      ],
+      social: [
+        {
+          icon: 'github',
+          label: 'GitHub',
+          href: 'https://github.com/menloresearch/jan',
+        },
+        {
+          icon: 'x.com',
+          label: 'X',
+          href: 'https://twitter.com/jandotai',
+        },
+        {
+          icon: 'discord',
+          label: 'Discord',
+          href: 'https://discord.com/invite/FTk2MvZwJH',
+        },
+      ],
+      components: {
+        Header: './src/components/CustomNav.astro',
+      },
+    }),
+  ],
+})
--- a/website/bun.lock
+++ b/website/bun.lock
--- a/website/package.json
+++ b/website/package.json
@ -0,0 +1,27 @@
+{
+  "name": "website",
+  "type": "module",
+  "version": "0.0.1",
+  "scripts": {
+    "dev": "astro dev",
+    "start": "astro dev",
+    "build": "astro build",
+    "preview": "astro preview",
+    "astro": "astro"
+  },
+  "dependencies": {
+    "@astrojs/starlight": "^0.35.1",
+    "@lorenzo_lewis/starlight-utils": "^0.3.2",
+    "astro": "^5.6.1",
+    "astro-mermaid": "^1.0.4",
+    "gsap": "^3.13.0",
+    "mermaid": "^11.9.0",
+    "phosphor-astro": "^2.1.0",
+    "sharp": "^0.34.3",
+    "starlight-openapi": "^0.19.1",
+    "starlight-sidebar-topics": "^0.6.0",
+    "starlight-theme-rapide": "^0.5.1",
+    "starlight-videos": "^0.3.0"
+  },
+  "packageManager": "yarn@1.22.22"
+}
--- a/website/public/favicon.svg
+++ b/website/public/favicon.svg
@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 128 128"><path fill-rule="evenodd" d="M81 36 64 0 47 36l-1 2-9-10a6 6 0 0 0-9 9l10 10h-2L0 64l36 17h2L28 91a6 6 0 1 0 9 9l9-10 1 2 17 36 17-36v-2l9 10a6 6 0 1 0 9-9l-9-9 2-1 36-17-36-17-2-1 9-9a6 6 0 1 0-9-9l-9 10v-2Zm-17 2-2 5c-4 8-11 15-19 19l-5 2 5 2c8 4 15 11 19 19l2 5 2-5c4-8 11-15 19-19l5-2-5-2c-8-4-15-11-19-19l-2-5Z" clip-rule="evenodd"/><path d="M118 19a6 6 0 0 0-9-9l-3 3a6 6 0 1 0 9 9l3-3Zm-96 4c-2 2-6 2-9 0l-3-3a6 6 0 1 1 9-9l3 3c3 2 3 6 0 9Zm0 82c-2-2-6-2-9 0l-3 3a6 6 0 1 0 9 9l3-3c3-2 3-6 0-9Zm96 4a6 6 0 0 1-9 9l-3-3a6 6 0 1 1 9-9l3 3Z"/><style>path{fill:#000}@media (prefers-color-scheme:dark){path{fill:#fff}}</style></svg>
--- a/website/public/gifs/jan-nano-demo.gif
+++ b/website/public/gifs/jan-nano-demo.gif
--- a/website/public/jan.png
+++ b/website/public/jan.png
--- a/website/public/jan2.png
+++ b/website/public/jan2.png
--- a/website/public/openapi/openapi.json
+++ b/website/public/openapi/openapi.json
--- a/website/public/videos/jan-nano-demo.mp4
+++ b/website/public/videos/jan-nano-demo.mp4
--- a/website/src/assets/add_assistant.png
+++ b/website/src/assets/add_assistant.png
--- a/website/src/assets/anthropic.png
+++ b/website/src/assets/anthropic.png
--- a/website/src/assets/api-server-logs.png
+++ b/website/src/assets/api-server-logs.png
--- a/website/src/assets/api-server-ui.png
+++ b/website/src/assets/api-server-ui.png
--- a/website/src/assets/api-server.png
+++ b/website/src/assets/api-server.png
--- a/website/src/assets/api-server2.png
+++ b/website/src/assets/api-server2.png
--- a/website/src/assets/assistant-add-dialog.png
+++ b/website/src/assets/assistant-add-dialog.png
--- a/website/src/assets/assistant-dropdown-updated.png
+++ b/website/src/assets/assistant-dropdown-updated.png
--- a/website/src/assets/assistant-dropdown.png
+++ b/website/src/assets/assistant-dropdown.png
--- a/website/src/assets/assistant-edit-dialog.png
+++ b/website/src/assets/assistant-edit-dialog.png
--- a/website/src/assets/assistants-ui-overview.png
+++ b/website/src/assets/assistants-ui-overview.png
--- a/website/src/assets/browserbase.png
+++ b/website/src/assets/browserbase.png
--- a/website/src/assets/browserbase2.png
+++ b/website/src/assets/browserbase2.png
--- a/website/src/assets/browserbase3.png
+++ b/website/src/assets/browserbase3.png
--- a/website/src/assets/browserbase4.png
+++ b/website/src/assets/browserbase4.png
--- a/website/src/assets/browserbase5.png
+++ b/website/src/assets/browserbase5.png
--- a/website/src/assets/browserbase6.png
+++ b/website/src/assets/browserbase6.png
--- a/website/src/assets/browserbase7.png
+++ b/website/src/assets/browserbase7.png
--- a/website/src/assets/canva.png
+++ b/website/src/assets/canva.png
--- a/website/src/assets/canva2.png
+++ b/website/src/assets/canva2.png
--- a/website/src/assets/canva3.png
+++ b/website/src/assets/canva3.png
--- a/website/src/assets/canva4.png
+++ b/website/src/assets/canva4.png
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 128 128"><path fill-rule="evenodd" d="M81 36 64 0 47 36l-1 2-9-10a6 6 0 0 0-9 9l10 10h-2L0 64l36 17h2L28 91a6 6 0 1 0 9 9l9-10 1 2 17 36 17-36v-2l9 10a6 6 0 1 0 9-9l-9-9 2-1 36-17-36-17-2-1 9-9a6 6 0 1 0-9-9l-9 10v-2Zm-17 2-2 5c-4 8-11 15-19 19l-5 2 5 2c8 4 15 11 19 19l2 5 2-5c4-8 11-15 19-19l5-2-5-2c-8-4-15-11-19-19l-2-5Z" clip-rule="evenodd"/><path d="M118 19a6 6 0 0 0-9-9l-3 3a6 6 0 1 0 9 9l3-3Zm-96 4c-2 2-6 2-9 0l-3-3a6 6 0 1 1 9-9l3 3c3 2 3 6 0 9Zm0 82c-2-2-6-2-9 0l-3 3a6 6 0 1 0 9 9l3-3c3-2 3-6 0-9Zm96 4a6 6 0 0 1-9 9l-3-3a6 6 0 1 1 9-9l3 3Z"/><style>path{fill:#000}@media (prefers-color-scheme:dark){path{fill:#fff}}</style></svg>