Merge branch 'dev' into dev-web

2025-09-26 15:55:14 +07:00 · 2025-09-26 15:55:14 +07:00 · efdd1b3971
commit efdd1b3971
parent bc8ff74e98 b422970369
140 changed files with 5806 additions and 5186 deletions
--- a/.github/workflows/jan-docs.yml
+++ b/.github/workflows/jan-docs.yml
@ -53,6 +53,9 @@ jobs:
      - name: Install dependencies
        working-directory: docs
        run: yarn install
+      - name: Clean output directory
+        working-directory: docs
+        run: rm -rf out/* .next/*
      - name: Build website
        working-directory: docs
        run: export NODE_ENV=production && yarn build && cp _redirects out/_redirects && cp _headers out/_headers
--- a/.github/workflows/jan-tauri-build-nightly-external.yaml
+++ b/.github/workflows/jan-tauri-build-nightly-external.yaml
@ -15,7 +15,6 @@ on:
      - 'pre-install/**'
      - 'Makefile'
      - 'package.json'
-      - 'mise.toml'

 jobs:
  get-update-version:
--- a/.github/workflows/jan-tauri-build-nightly.yaml
+++ b/.github/workflows/jan-tauri-build-nightly.yaml
@ -35,7 +35,6 @@ on:
      - 'pre-install/**'
      - 'Makefile'
      - 'package.json'
-      - 'mise.toml'


 jobs:
--- a/.github/workflows/template-tauri-build-linux-x64-external.yml
+++ b/.github/workflows/template-tauri-build-linux-x64-external.yml
@ -79,8 +79,33 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json

-          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
+          
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
+
          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/Cargo.toml---------"
+          cat ./src-tauri/Cargo.toml
+
+          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"

          if [ "${{ inputs.channel }}" != "stable" ]; then
            jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
--- a/.github/workflows/template-tauri-build-linux-x64-flatpak.yml
+++ b/.github/workflows/template-tauri-build-linux-x64-flatpak.yml
@ -100,13 +100,36 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json

-          # Temporarily enable devtool on prod build
-          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
-          cat ./src-tauri/Cargo.toml
+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
+          
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml

          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/Cargo.toml---------"
          cat ./src-tauri/Cargo.toml

+          # Temporarily enable devtool on prod build
+          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
+          cat ./src-tauri/Cargo.toml  
+
          # Change app name for beta and nightly builds
          if [ "${{ inputs.channel }}" != "stable" ]; then
            jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
--- a/.github/workflows/template-tauri-build-linux-x64.yml
+++ b/.github/workflows/template-tauri-build-linux-x64.yml
@ -53,7 +53,7 @@ on:
        value: ${{ jobs.build-linux-x64.outputs.APPIMAGE_FILE_NAME }}
 jobs:
  build-linux-x64:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    outputs:
      DEB_SIG: ${{ steps.packageinfo.outputs.DEB_SIG }}
      APPIMAGE_SIG: ${{ steps.packageinfo.outputs.APPIMAGE_SIG }}
@ -117,11 +117,34 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json

-          # Temporarily enable devtool on prod build
-          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
-          cat ./src-tauri/Cargo.toml
+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
+          
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml

          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/Cargo.toml---------"
+          cat ./src-tauri/Cargo.toml
+
+          # Temporarily enable devtool on prod build
+          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
          cat ./src-tauri/Cargo.toml

          # Change app name for beta and nightly builds
--- a/.github/workflows/template-tauri-build-macos-external.yml
+++ b/.github/workflows/template-tauri-build-macos-external.yml
@ -42,31 +42,6 @@ jobs:
        run: |
          cargo install ctoml

-      - name: Create bun and uv universal
-        run: |
-          mkdir -p ./src-tauri/resources/bin/
-          cd ./src-tauri/resources/bin/
-          curl -L -o bun-darwin-x64.zip https://github.com/oven-sh/bun/releases/download/bun-v1.2.10/bun-darwin-x64.zip
-          curl -L -o bun-darwin-aarch64.zip https://github.com/oven-sh/bun/releases/download/bun-v1.2.10/bun-darwin-aarch64.zip
-          unzip bun-darwin-x64.zip
-          unzip bun-darwin-aarch64.zip
-          lipo -create -output bun-universal-apple-darwin bun-darwin-x64/bun bun-darwin-aarch64/bun
-          cp -f bun-darwin-aarch64/bun bun-aarch64-apple-darwin 
-          cp -f bun-darwin-x64/bun bun-x86_64-apple-darwin
-          cp -f bun-universal-apple-darwin bun
-
-          curl -L -o uv-x86_64.tar.gz https://github.com/astral-sh/uv/releases/download/0.6.17/uv-x86_64-apple-darwin.tar.gz
-          curl -L -o uv-arm64.tar.gz https://github.com/astral-sh/uv/releases/download/0.6.17/uv-aarch64-apple-darwin.tar.gz
-          tar -xzf uv-x86_64.tar.gz
-          tar -xzf uv-arm64.tar.gz
-          mv uv-x86_64-apple-darwin uv-x86_64
-          mv uv-aarch64-apple-darwin uv-aarch64
-          lipo -create -output uv-universal-apple-darwin uv-x86_64/uv uv-aarch64/uv
-          cp -f uv-x86_64/uv uv-x86_64-apple-darwin
-          cp -f uv-aarch64/uv uv-aarch64-apple-darwin
-          cp -f uv-universal-apple-darwin uv
-          ls -la
-
      - name: Update app version
        run: |
          echo "Version: ${{ inputs.new_version }}"
@ -74,8 +49,35 @@ jobs:
          mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json
+          
+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
+          
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
+
          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/Cargo.toml---------"
+          cat ./src-tauri/Cargo.toml
+          
          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
+          
          if [ "${{ inputs.channel }}" != "stable" ]; then
            jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
            mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
--- a/.github/workflows/template-tauri-build-macos.yml
+++ b/.github/workflows/template-tauri-build-macos.yml
@ -101,7 +101,30 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json

+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
+          
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
+
          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/Cargo.toml---------"
          cat ./src-tauri/Cargo.toml

          # Temporarily enable devtool on prod build
--- a/.github/workflows/template-tauri-build-windows-x64-external.yml
+++ b/.github/workflows/template-tauri-build-windows-x64-external.yml
@ -49,68 +49,61 @@ jobs:
          # Update tauri.conf.json
          jq --arg version "${{ inputs.new_version }}" '.version = $version | .bundle.createUpdaterArtifacts = false' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
          mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
-          jq '.bundle.windows.nsis.template = "tauri.bundle.windows.nsis.template"' ./src-tauri/tauri.windows.conf.json > /tmp/tauri.windows.conf.json
-          mv /tmp/tauri.windows.conf.json ./src-tauri/tauri.windows.conf.json
          jq '.bundle.windows.signCommand = "echo External build - skipping signature: %1"' ./src-tauri/tauri.windows.conf.json > /tmp/tauri.windows.conf.json
          mv /tmp/tauri.windows.conf.json ./src-tauri/tauri.windows.conf.json
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json

-          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
-          echo "---------Cargo.toml---------"
-          cat ./src-tauri/Cargo.toml
+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
          
-          generate_build_version() {
-              ### Examble
-              ### input 0.5.6 output will be 0.5.6 and 0.5.6.0
-              ### input 0.5.6-rc2-beta output will be 0.5.6 and 0.5.6.2
-              ### input 0.5.6-1213 output will be 0.5.6 and and 0.5.6.1213
-              local new_version="$1"
-              local base_version
-              local t_value
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json

-              # Check if it has a "-"
-              if [[ "$new_version" == *-* ]]; then
-                  base_version="${new_version%%-*}" # part before -
-                  suffix="${new_version#*-}"         # part after -
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json

-                  # Check if it is rcX-beta
-                  if [[ "$suffix" =~ ^rc([0-9]+)-beta$ ]]; then
-                      t_value="${BASH_REMATCH[1]}"
-                  else
-                      t_value="$suffix"
-                  fi
-              else
-                  base_version="$new_version"
-                  t_value="0"
-              fi
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json

-              # Export two values
-              new_base_version="$base_version"
-              new_build_version="${base_version}.${t_value}"
-          }
-          generate_build_version ${{ inputs.new_version }}
-          sed -i "s/jan_version/$new_base_version/g" ./src-tauri/tauri.bundle.windows.nsis.template
-          sed -i "s/jan_build/$new_build_version/g" ./src-tauri/tauri.bundle.windows.nsis.template
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
+
+          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/Cargo.toml---------"
+          cat ./src-tauri/Cargo.toml

          if [ "${{ inputs.channel }}" != "stable" ]; then
            jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
            mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
+            
+            # Update product name
+            jq --arg name "Jan-${{ inputs.channel }}" '.productName = $name' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
+            mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
+
            chmod +x .github/scripts/rename-tauri-app.sh
            .github/scripts/rename-tauri-app.sh ./src-tauri/tauri.conf.json ${{ inputs.channel }}
+
+            echo "---------tauri.conf.json---------"
+            cat ./src-tauri/tauri.conf.json
+
+            # Update Cargo.toml
            ctoml ./src-tauri/Cargo.toml package.name "Jan-${{ inputs.channel }}"
            ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
+            echo "------------------"
+            cat ./src-tauri/Cargo.toml
+
            chmod +x .github/scripts/rename-workspace.sh
            .github/scripts/rename-workspace.sh ./package.json ${{ inputs.channel }}
-            sed -i "s/jan_productname/Jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template
-            sed -i "s/jan_mainbinaryname/jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template
-          else
-            sed -i "s/jan_productname/Jan/g" ./src-tauri/tauri.bundle.windows.nsis.template
-            sed -i "s/jan_mainbinaryname/jan/g" ./src-tauri/tauri.bundle.windows.nsis.template
+            cat ./package.json
          fi
-          echo "---------nsis.template---------"
-          cat ./src-tauri/tauri.bundle.windows.nsis.template
-
      - name: Build app
        shell: bash
        run: |
--- a/.github/workflows/template-tauri-build-windows-x64.yml
+++ b/.github/workflows/template-tauri-build-windows-x64.yml
@ -95,47 +95,41 @@ jobs:
          # Update tauri.conf.json
          jq --arg version "${{ inputs.new_version }}" '.version = $version | .bundle.createUpdaterArtifacts = true' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
          mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
-          jq '.bundle.windows.nsis.template = "tauri.bundle.windows.nsis.template"' ./src-tauri/tauri.windows.conf.json > /tmp/tauri.windows.conf.json
-          mv /tmp/tauri.windows.conf.json ./src-tauri/tauri.windows.conf.json
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json

+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
+          
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
+
          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
-          echo "---------Cargo.toml---------"
+          echo "---------./src-tauri/Cargo.toml---------"
          cat ./src-tauri/Cargo.toml

-          generate_build_version() {
-              ### Examble
-              ### input 0.5.6 output will be 0.5.6 and 0.5.6.0
-              ### input 0.5.6-rc2-beta output will be 0.5.6 and 0.5.6.2
-              ### input 0.5.6-1213 output will be 0.5.6 and and 0.5.6.1213
-              local new_version="$1"
-              local base_version
-              local t_value
-
-              # Check if it has a "-"
-              if [[ "$new_version" == *-* ]]; then
-                  base_version="${new_version%%-*}" # part before -
-                  suffix="${new_version#*-}"         # part after -
-
-                  # Check if it is rcX-beta
-                  if [[ "$suffix" =~ ^rc([0-9]+)-beta$ ]]; then
-                      t_value="${BASH_REMATCH[1]}"
-                  else
-                      t_value="$suffix"
-                  fi
-              else
-                  base_version="$new_version"
-                  t_value="0"
-              fi
-
-              # Export two values
-              new_base_version="$base_version"
-              new_build_version="${base_version}.${t_value}"
-          }
-          generate_build_version ${{ inputs.new_version }}
-          sed -i "s/jan_version/$new_base_version/g" ./src-tauri/tauri.bundle.windows.nsis.template
-          sed -i "s/jan_build/$new_build_version/g" ./src-tauri/tauri.bundle.windows.nsis.template          
+          # Add sign commands to tauri.windows.conf.json
+          jq '.bundle.windows.signCommand = "powershell -ExecutionPolicy Bypass -File ./sign.ps1 %1"' ./src-tauri/tauri.windows.conf.json > /tmp/tauri.windows.conf.json
+          mv /tmp/tauri.windows.conf.json ./src-tauri/tauri.windows.conf.json
+          
+          echo "---------tauri.windows.conf.json---------"
+          cat ./src-tauri/tauri.windows.conf.json

          # Temporarily enable devtool on prod build
          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
@ -143,8 +137,13 @@ jobs:

          # Change app name for beta and nightly builds
          if [ "${{ inputs.channel }}" != "stable" ]; then
+            # Update updater endpoint
            jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
            mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
+            
+            # Update product name
+            jq --arg name "Jan-${{ inputs.channel }}" '.productName = $name' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
+            mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json

            chmod +x .github/scripts/rename-tauri-app.sh
            .github/scripts/rename-tauri-app.sh ./src-tauri/tauri.conf.json ${{ inputs.channel }}
@ -161,15 +160,7 @@ jobs:
            chmod +x .github/scripts/rename-workspace.sh
            .github/scripts/rename-workspace.sh ./package.json ${{ inputs.channel }}
            cat ./package.json
-            
-            sed -i "s/jan_productname/Jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template
-            sed -i "s/jan_mainbinaryname/jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template
-          else
-            sed -i "s/jan_productname/Jan/g" ./src-tauri/tauri.bundle.windows.nsis.template
-            sed -i "s/jan_mainbinaryname/jan/g" ./src-tauri/tauri.bundle.windows.nsis.template
          fi
-          echo "---------nsis.template---------"
-          cat ./src-tauri/tauri.bundle.windows.nsis.template

      - name: Install AzureSignTool
        run: |
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -126,8 +126,7 @@ jan/
 ├── scripts/              # Build utilities
 │
 ├── package.json          # Root workspace configuration
-├── Makefile              # Build automation commands
-├── mise.toml             # Mise tool configuration  
+├── Makefile              # Build automation commands  
 ├── LICENSE               # Apache 2.0 license
 └── README.md             # Project overview
 ```
@ -149,19 +148,6 @@ cd jan
 make dev
 ```

-**Option 2: The Easier Way (Mise)**
-```bash
-git clone https://github.com/menloresearch/jan
-cd jan
-
-# Install mise
-curl https://mise.run | sh
-
-# Let mise handle everything
-mise install    # installs Node.js, Rust, and other tools
-mise dev        # runs the full development setup
-```
-
 ## How Can I Contribute?

 ### Reporting Bugs
--- a/3
+++ b/3
@ -72,6 +72,9 @@ lint: install-and-build
 test: lint
 	yarn download:bin
 	yarn download:lib
+ifeq ($(OS),Windows_NT)
+	yarn download:windows-installer
+endif
 	yarn test
 	yarn copy:assets:tauri
 	yarn build:icon
--- a/README.md
+++ b/README.md
@ -93,29 +93,6 @@ This handles everything: installs dependencies, builds core components, and laun
 - `make test` - Run tests and linting
 - `make clean` - Delete everything and start fresh

-### Run with Mise (easier)
-
-You can also run with [mise](https://mise.jdx.dev/), which is a bit easier as it ensures Node.js, Rust, and other dependency versions are automatically managed:
-
-```bash
-git clone https://github.com/menloresearch/jan
-cd jan
-
-# Install mise (if not already installed)
-curl https://mise.run | sh
-
-# Install tools and start development
-mise install    # installs Node.js, Rust, and other tools
-mise dev        # runs the full development setup
-```
-
-**Available mise commands:**
- `mise dev` - Full development setup and launch
- `mise build` - Production build
- `mise test` - Run tests and linting
- `mise clean` - Delete everything and start fresh
- `mise tasks` - List all available tasks
-
 ### Manual Commands

 ```bash
--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@ -240,6 +240,12 @@ export abstract class AIEngine extends BaseExtension {
    EngineManager.instance().register(this)
  }

+  /**
+   * Gets model info
+   * @param modelId
+   */
+  abstract get(modelId: string): Promise<modelInfo | undefined>
+
  /**
   * Lists available models
   */
@ -268,6 +274,10 @@ export abstract class AIEngine extends BaseExtension {
   */
  abstract delete(modelId: string): Promise<void>

+  /**
+   * Updates a model
+   */
+  abstract update(modelId: string, model: Partial<modelInfo>): Promise<void>
  /**
   * Imports a model
   */
@ -283,11 +293,6 @@ export abstract class AIEngine extends BaseExtension {
   */
  abstract getLoadedModels(): Promise<string[]>

-  /**
-   * Optional method to get the underlying chat client
-   */
-  getChatClient?(sessionId: string): any
-
  /**
   * Check if a tool is supported by the model
   * @param modelId
--- a/core/src/browser/fs.ts
+++ b/core/src/browser/fs.ts
@ -43,6 +43,12 @@ const mkdir = (...args: any[]) => globalThis.core.api?.mkdir({ args })
 */
 const rm = (...args: any[]) => globalThis.core.api?.rm({ args })

+/**
+ * Moves a file from the source path to the destination path.
+ * @returns {Promise<any>} A Promise that resolves when the file is moved successfully.
+ */
+const mv = (...args: any[]) => globalThis.core.api?.mv({ args })
+
 /**
 * Deletes a file from the local file system.
 * @param {string} path - The path of the file to delete.
@ -92,6 +98,7 @@ export const fs = {
  readdirSync,
  mkdir,
  rm,
+  mv,
  unlinkSync,
  appendFileSync,
  copyFile,
--- a/core/src/types/api/index.ts
+++ b/core/src/types/api/index.ts
@ -91,6 +91,7 @@ export enum FileSystemRoute {
  existsSync = 'existsSync',
  readdirSync = 'readdirSync',
  rm = 'rm',
+  mv = 'mv',
  mkdir = 'mkdir',
  readFileSync = 'readFileSync',
  writeFileSync = 'writeFileSync',
--- a/docs/_redirects
+++ b/docs/_redirects
@ -1,699 +1,133 @@
-/team /about/team 302
-/about/teams /about/team 302
-/about/faq /docs 302
-/about/acknowledgements /docs 302
-/about/community /about 302
-/guides /docs 302
-/docs/troubleshooting/failed-to-fetch /docs/troubleshooting 302
-/guides/troubleshooting/gpu-not-used /docs/troubleshooting#troubleshooting-nvidia-gpu 302
-/guides/troubleshooting /docs/troubleshooting 302
-/docs/troubleshooting/stuck-on-broken-build /docs/troubleshooting 302
-/docs/troubleshooting/somethings-amiss /docs/troubleshooting 302
-/docs/troubleshooting/how-to-get-error-logs /docs/troubleshooting 302
-/docs/troubleshooting/permission-denied /docs/troubleshooting 302
-/docs/troubleshooting/unexpected-token /docs/troubleshooting 302
-/docs/troubleshooting/undefined-issue /docs/troubleshooting 302
-/getting-started/troubleshooting /docs/troubleshooting 302
-/docs/troubleshooting/gpu-not-used /docs/troubleshooting 302
-/guides/integrations/openrouter /docs/remote-models/openrouter 302
-/guides/integrations/continue /integrations/coding/continue-dev 302
-/docs/extension-capabilities /docs/extensions 302
-/guides/using-extensions /docs/extensions 302
-/docs/extension-guides /docs/extensions 302
-/features/extensions /docs/extensions 302
-/integrations/tensorrt /docs/built-in/tensorrt-llm 302
-/guides/using-models/integrate-with-remote-server /docs/remote-inference/generic-openai 302
-/guides/using-models/customize-engine-settings /docs/built-in/llama-cpp 302
-/developers/plugins/azure-openai /docs/remote-models/openai 302
-/docs/api-reference/assistants /api-reference#tag/assistants 302
-/docs/api-reference/models/list /api-reference#tag/models 302
-/docs/api-reference/threads /api-reference#tag/chat 302
-/docs/api-reference/messages /api-reference#tag/messages 302
-/docs/api-reference/models /api-reference#tag/models 302
-/chat /docs/threads 302
-/guides/chatting/manage-history /docs/threads/ 302
-/guides/chatting/start-thread /docs/threads/ 302
-/guides/using-server /docs/local-api/ 302
-/guides/using-server/server /docs/local-api#step-2-srt-and-use-the-built-in-api-server 302
-/docs/get-started /docs 302
-/guides/how-jan-works /about/how-we-work 302
-/acknowledgements /about/acknowledgements 302
-/community /about/community 302
-/faq /about/faq 302
-/how-we-work /about/how-we-work 302
-/wall-of-love /about/wall-of-love 302
-/guides/troubleshooting/failed-to-fetch /docs/troubleshooting 302
-/docs/troubleshooting/gpu-not-used /docs/troubleshooting 302
-/docs/troubleshooting/failed-to-fetch /docs/troubleshooting 302
-/guides/ /docs 302
-/guides/quickstart/ /docs/quickstart 302
-/guides/models/ /docs/models 302
-/guides/threads/ /docs/threads 302
-/guides/local-api/ /docs/local-api 302
-/guides/advanced/ /docs/settings 302
-/guides/engines/llamacpp/ /docs/built-in/llama-cpp 302
-/guides/engines/tensorrt-llm/ /docs/built-in/tensorrt-llm 302
-/guides/engines/lmstudio/ /docs/local-models/lmstudio 302
-/guides/engines/ollama/ /docs/built-in/llama-cpp 302
-/guides/engines/groq/ /docs/remote-models/groq 302
-/guides/engines/mistral/ /docs/remote-models/mistralai 302
-/guides/engines/openai/ /docs/remote-models/openai 302
-/guides/engines/remote-server/ /docs/remote-inference/generic-openai 302
-/extensions/ /docs/extensions 302
-/integrations/discord/ /integrations/messaging/llmcord 302
-/discord https://discord.gg/FTk2MvZwJH 301
-/integrations/interpreter/ /integrations/function-calling/interpreter 302
-/integrations/raycast/ /integrations/workflow-automation/raycast 302
-/docs/integrations/raycast /integrations/workflow-automation/raycast 302
-/docs/integrations /integrations 302
-/docs/engineering/files/ /docs 302
-/integrations/openrouter/ /docs/remote-models/openrouter 302
-/integrations/continue/ /integrations/coding/continue-dev 302
-/troubleshooting/ /docs/troubleshooting 302
-/changelog/changelog-v0.4.9/ /changelog 302
-/changelog/changelog-v0.4.8/ /changelog 302
-/changelog/changelog-v0.4.7/ /changelog 302
-/changelog/changelog-v0.4.6/ /changelog 302
-/changelog/changelog-v0.4.5/ /changelog 302
-/changelog/changelog-v0.4.4/ /changelog 302
-/changelog/changelog-v0.4.3/ /changelog 302
-/changelog/changelog-v0.4.2/ /changelog 302
-/changelog/changelog-v0.4.1/ /changelog 302
-/changelog/changelog-v0.4.0/ /changelog 302
-/changelog/changelog-v0.3.3/ /changelog 302
-/changelog/changelog-v0.3.2/ /changelog 302
-/changelog/changelog-v0.3.1/ /changelog 302
-/changelog/changelog-v0.3.0/ /changelog 302
-/changelog/changelog-v0.2.3/ /changelog 302
-/changelog/changelog-v0.2.2/ /changelog 302
-/changelog/changelog-v0.2.1/ /changelog 302
-/changelog/changelog-v0.2.0/ /changelog 302
-/team/ /about/team 302
-/team/contributor-program/ /about/team 302
-/team/join-us/ /about/team 302
-/how-we-work/ /about/how-we-work 302
-/how-we-work/strategy/ /about/how-we-work/strategy 302
-/how-we-work/project-management/ /about/how-we-work/project-management 302
-/engineering/ /about/how-we-work/engineering 302
-/engineering/ci-cd/ /about/how-we-work/engineering/ci-cd 302
-/engineering/qa/ /about/how-we-work/engineering/qa 302
-/how-we-work/product-design/ /about 302
-/about/how-we-work/product-design /about 302
-/how-we-work/analytics/ /about/how-we-work/analytics 302
-/how-we-work/website-docs/ /about/how-we-work/website-docs 302
-/blog/postmortems/january-10-2024-bitdefender-false-positive-flag/ /post/bitdefender 302
-/guides/error-codes/something-amiss/ /docs/troubleshooting#somethings-amiss 302
-/guides/error-codes/how-to-get-error-logs/ /docs/troubleshooting#how-to-get-error-logs 302
-/guides/chatting/ /docs/threads 302
-/guides/integration/openinterpreter/ /integrations/function-calling/interpreter 302
-/developer/build-assistant/ /docs/assistants 302
-/guides/integrations/ /integrations 302
-/specs/hub/ /docs 302
-/install/windows/ /docs/desktop/windows 302
-/install/linux/ /docs/desktop/linux 302
-/install/nightly/ /docs/desktop/windows 302
-/docs/engineering/fine-tuning/ /docs 302
-/developer/assistant/ /docs/assistants 302
-/guides/common-error/broken-build/ /docs/troubleshooting#broken-build 302
-/guides/using-server/using-server/ /docs/local-api 302
-/guides/integrations/azure-openai-service/ /docs/remote-models/openai 302
-/specs/messages/ /docs/threads 302
-/docs/engineering/models/ /docs/models 302
-/docs/specs/assistants/ /docs/assistants 302
-/docs/engineering/chats/ /docs/threads 302
-/guides/using-extensions/extension-settings/ /docs/extensions 302
-/guides/models/customize-engine/ /docs/models 302
-/guides/integration/mistral/ /docs/remote-models/mistralai 302
-/guides/common-error/ /docs/troubleshooting 302
-/guides/integrations/ollama/ /docs/local-models/ollama 302
-/server-suite/ /api-reference 302
-/guides/integrations/lmstudio/ /docs/local-models/lmstudio 302
-/guides/integrations/mistral-ai/ /docs/remote-models/mistralai 302
-/guides/start-server/ /docs/local-api 302
-/guides/changelog/ /changelog 302
-/guides/models-list/ /docs/models 302
-/guides/thread/ /docs/threads 302
-/docs/engineering/messages/ /docs/threads 302
-/guides/faqs/ /about/faq 302
-/docs/integrations/openrouter/ /docs/remote-models/openrouter 302
-/guides/windows /docs/desktop/windows 302
-/docs/integrations/ollama/ /docs/local-models/ollama 302
-/api/overview/ /api-reference 302
-/docs/extension-guides/ /docs/extensions 302
-/specs/settings/ /docs 302
-/docs/UI/ /docs 302
-/guides/using-models/import-models-using-absolute-filepath/ /docs/models 302
-/install/docker/ /docs/desktop 302
-/install/ /docs/desktop 302
-/install/from-source/ /docs/desktop 302
-/docs/installation/server /docs/desktop 302
-/v1/models /docs/models 302
-/guides/advanced-settings/ /docs/settings 302
-/guides/using-models/install-from-hub/ /docs/models/manage-models#download-from-jan-hub 302
-/guides/using-models/import-manually/ /docs/models 302
-/docs/team/contributor-program/ /about/team 302
-/docs/modules/models /docs/models 302
-/getting-started/install/linux /docs/desktop/linux 302
-/guides/chatting/start-thread/ /docs/threads 302
-/api/files/ /docs 302
-/specs/threads/ /docs/threads 302
-/about/brand-assets /about 302
-/guides/chatting/upload-images/ /docs/threads 302
-/guides/using-models/customize-models/ /docs/models 302
-/docs/modules/models/ /docs/models 302
-/getting-started/install/linux/ /docs/desktop/linux 302
-/specs/chats/ /docs/threads 302
-/specs/engine/ /docs 302
-/specs/data-structures /docs 302
-/docs/extension-capabilities/ /docs/extensions 302
-/docs/get-started/use-local-server/ /docs/local-api 302
-/guides/how-jan-works/ /about/how-we-work 302
-/guides/install/cloud-native /docs/desktop 302
-/guides/windows/ /docs/desktop/windows 302
-/specs/ /docs 302
-/docs/get-started/build-extension/ /docs/extensions 302
-/specs/files/ /docs 302
-/guides/using-models/package-models/ /docs/models 302
-/install/overview/ /docs/desktop/windows 302
-/docs/get-started/extension-anatomy/ /docs/extensions 302
-/docs/get-started/ /docs 302
-/guides/mac/ /docs/desktop/mac 302
-/intro/ /about 302
-/specs/fine-tuning/ /docs 302
-/guides/server/ /docs/desktop 302
-/specs/file-based/ /docs 302
-/docs/extension-guides/monitoring/ /docs/extensions 302
-/api/ /api-reference 302
-/getting-started/build-an-app /docs/quickstart 302
-/features/ai-models/ /docs/models 302
-/reference/store/ /api-reference 302
-/tutorials/build-chat-app /docs/quickstart 302
-/features/acceleration /docs/built-in/llama-cpp 302
-/getting-started/install/mac /docs/desktop/mac 302
-docs/guides/fine-tuning/what-models-can-be-fine-tuned /docs 302
-/docs/specs/threads /docs/threads 302
-/docs/api-reference/fine-tuning /api-reference 302
-/docs/guides/speech-to-text/prompting /docs/quickstart 302
-/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model /docs 302
-/getting-started/install/windows /docs/desktop/windows 302
-/docs/modules/assistants /docs/assistants 302
-/docs/modules/chats /docs/threads 302
-/docs/specs/chats /docs/threads 302
-/docs/modules/files /docs 302
-/tutorials/build-rag-app /docs/tools/retrieval 302
-/docs/models/model-endpoint-compatibility /docs/models 302
-/docs/guides/legacy-fine-tuning/creating-training-data /docs 302
-/docs/specs/models /docs/models 302
-/docs/guides/safety-best-practices/end-user-ids /docs/quickstart 302
-/docs/modules/assistants/ /docs/assistants 302
-/docs/models/overview /docs/models 302
-/docs/api-reference/files /api-reference 302
-/docs/models/tts /docs/models 302
-/docs/guides/fine-tuning /docs 302
-/docs/specs/files /docs 302
-/docs/modules/threads /docs/threads 302
-/guides/linux /docs/desktop/linux 302
-/developer/build-engine/engine-anatomy/ /docs 302
-/developer/engine/ /docs 302
-/docs/product/system-monitor/ /docs 302
-/docs/product/settings/ /docs 302
-/developer/build-assistant/your-first-assistant/ /docs 302
-/engineering/research/ /docs 302
-/guides/troubleshooting/gpu-not-used/ /docs/troubleshooting#troubleshooting-nvidia-gpu 302
-/troubleshooting/gpu-not-used/ /docs/troubleshooting#troubleshooting-nvidia-gpu 302
-/docs/integrations/langchain/ /integrations 302
-/onboarding/ /docs/quickstart 302
-/cortex/docs https://cortex.so/ 301
-/installation/hardware/ /docs/desktop/windows 302
-/docs/features/load-unload /docs 302
-/guides/chatting/upload-docs/ /docs/threads 302
-/developer/build-extension/package-your-assistant/ /docs 302
-/blog/hello-world /blog 302
-/docs/get-started/build-on-mobile/ /docs/quickstart 302
-/ai/anything-v4 /docs 302
-/nitro /docs 302
-/tokenizer /docs 302
-/hardware/examples/3090x1-@dan-jan /docs 302
-/guides/concepts/ /about 302
-/platform/ /docs 302
-/hardware/examples/AMAZON-LINK-HERE /docs 302
-/guides/threads/?productId=openai&prompt=What /docs 302
-/guides/threads/?productId=openjourney&prompt=realistic%20portrait%20of%20an%20gray%20dog,%20bright%20eyes,%20radiant%20and%20ethereal%20intricately%20detailed%20photography,%20cinematic%20lighting,%2050mm%20lens%20with%20bokeh /docs 302
-/guides/threads/?productId=openjourney&prompt=old,%20female%20robot,%20metal,%20rust,%20wisible%20wires,%20destroyed,%20sad,%20dark,%20dirty,%20looking%20at%20viewer,%20portrait,%20photography,%20detailed%20skin,%20realistic,%20photo-realistic,%208k,%20highly%20detailed,%20full%20length%20frame,%20High%20detail%20RAW%20color%20art,%20piercing,%20diffused%20soft%20lighting,%20shallow%20depth%20of%20field,%20sharp%20focus,%20hyperrealism,%20cinematic%20lighting /docs 302
-/guides/threads/?productId=openjourney&prompt=a%20young%20caucasian%20man%20holding%20his%20chin.pablo%20picasso%20style,%20acrylic%20painting,%20trending%20on%20pixiv%20fanbox,%20palette%20knife%20and%20brush.%20strokes /docs 302
-/guides/threads/?productId=airoboros&prompt=Let%27s%20role%20play.%20You%20are%20a%20robot%20in%20a%20post-apocalyptic%20world. /docs 302
-/chat?productId=pirsus-epic-realism /docs 302
-/chat?productId=ether-blu-mix /docs 302
-/chat?productId=deliberate /docs 302
-/chat?productId=wizard_vicuna /docs 302
-/chat?productId=disneypixar /docs 302
-/chat?productId=meina-mix /docs 302
-/chat?productId=anything-v4 /docs 302
-/chat?productId=airoboros /docs 302
-/chat?productId=ghost-mix /docs 302
-/ai/toonyou /docs 302
-/chat?productId=xrica-mix /docs 302
-/ai/openai /docs 302
-/chat?productId=been-you /docs 302
-/chat?productId=toonyou /docs 302
-/handbook/product-and-community/ /about/community 302
-/handbook/contributing-to-jan/how-to-get-involved-and-faq/ /about 302
-/handbook/engineering-exellence/one-the-tools-what-we-use-and-why/ /about 302
-/handbook/from-spaghetti-flinging-to-strategy/how-we-gtm/ /about/how-we-work/strategy 302
-/handbook/product-and-community/our-okrs/ /about 302
-/products-and-innovations/philosophy-behind-product-development/ /about 302
-/handbook/core-contributors/ /about/team 302
-/handbook/contributing-to-jan/feedback-channels/ /about/how-we-work 302
-/handbook/meet-jan/ /docs 302
-/handbook/engineering-exellence/ /about 302
-/blog/tags/hello/ /blog 302
-/about/community/events/nvidia-llm-day-nov-23/ /about 302
-/guides/gpus-and-vram /docs 302
-/careers/ /about/team 302
-/handbook/engineering/ /about/team 302
-/handbook/products-and-innovations/ /about 302
-/handbook/contributing-to-jan/ /about 302
-/handbook/meet-jan/vision-and-mission/ /about 302
-/handbook/products-and-innovations/roadmap-present-and-future-directions/ /about 302
-/handbook/what-we-do/ /about/team 302
-/handbook/onboarding/ /docs 302
-/handbook/products-and-innovations/overview-of-jan-framework-and-its-applications/ /docs 302
-/handbook/product/ /docs 302
-/running /docs 302
-/running?model=Open%20Journey%20SD /docs 302
-/ai/been-you /about 302
-/tokenizer?view=bpe /docs 302
-/docs/engineering/ /docs 302
-/developer/install-and-prerequisites#system-requirements /docs/desktop/windows 302
-/guides/quickstart /docs/quickstart 302
-/guides/models /docs/models 302
-/guides/threads /docs/threads 302
-/guides/local-api /docs/local-api 302
-/guides/advanced /docs/settings 302
-/guides/engines/llamacpp /docs/built-in/llama-cpp 302
-/guides/engines/tensorrt-llm /docs/built-in/tensorrt-llm 302
-/guides/engines/lmstudio /docs/local-models/lmstudio 302
-/guides/engines/ollama /docs/local-models/ollama 302
-/guides/engines/groq /docs/remote-models/groq 302
-/guides/engines/mistral /docs/remote-models/mistralai 302
-/guides/engines/openai /docs/remote-models/openai 302
-/guides/engines/remote-server /docs/remote-inference/generic-openai 302
-/extensions /docs/extensions 302
-/integrations/discord /integrations/messaging/llmcord 302
-/docs/integrations/discord /integrations/messaging/llmcord 302
-/integrations/interpreter /integrations/function-calling/interpreter 302
-/integrations/raycast /integrations/workflow-automation/raycast 302
-/integrations/openrouter /docs/remote-models/openrouter 302
-/integrations/continue /integrations/coding/continue-dev 302
-/troubleshooting /docs/troubleshooting 302
-/changelog/changelog-v0.4.9 /changelog 302
-/changelog/changelog-v0.4.8 /changelog 302
-/changelog/changelog-v0.4.7 /changelog 302
-/changelog/changelog-v0.4.6 /changelog 302
-/changelog/changelog-v0.4.5 /changelog 302
-/changelog/changelog-v0.4.4 /changelog 302
-/changelog/changelog-v0.4.3 /changelog 302
-/changelog/changelog-v0.4.2 /changelog 302
-/changelog/changelog-v0.4.1 /changelog 302
-/changelog/changelog-v0.4.0 /changelog 302
-/changelog/changelog-v0.3.3 /changelog 302
-/changelog/changelog-v0.3.2 /changelog 302
-/changelog/changelog-v0.3.1 /changelog 302
-/changelog/changelog-v0.3.0 /changelog 302
-/changelog/changelog-v0.2.3 /changelog 302
-/changelog/changelog-v0.2.2 /changelog 302
-/changelog/changelog-v0.2.1 /changelog 302
-/changelog/changelog-v0.2.0 /changelog 302
-/guides/troubleshooting/ /docs/troubleshooting 302
-/docs/troubleshooting/failed-to-fetch/ /docs/troubleshooting 302
-/docs/troubleshooting/stuck-on-broken-build/ /docs/troubleshooting 302
-/docs/troubleshooting/somethings-amiss/ /docs/troubleshooting 302
-/docs/troubleshooting/how-to-get-error-logs/ /docs/troubleshooting 302
-/docs/troubleshooting/permission-denied/ /docs/troubleshooting 302
-/docs/troubleshooting/unexpected-token/ /docs/troubleshooting 302
-/docs/troubleshooting/undefined-issue/ /docs/troubleshooting 302
-/getting-started/troubleshooting/ /docs/troubleshooting 302
-/docs/troubleshooting/gpu-not-used/ /docs/troubleshooting#troubleshooting-nvidia-gpu 302
-/guides/integrations/openrouter/ /docs/remote-models/openrouter 302
-/guides/integrations/continue/ /integrations/coding/continue-dev 302
-/guides/using-extensions/ /docs/extensions 302
-/features/extensions/ /docs/extensions 302
-/integrations/tensorrt /docs/built-in/tensorrt-llm 302
-/integrations/tensorrt/ /docs/built-in/tensorrt-llm 302
-/guides/using-models/integrate-with-remote-server/ /docs/remote-inference/generic-openai 302
-/guides/using-models/customize-engine-settings/ /docs/built-in/llama-cpp 302
-/developers/plugins/azure-openai/ /docs/remote-models/openai 302
-/docs/api-reference/assistants/ /api-reference#tag/assistants 302
-/docs/api-reference/models/list/ /api-reference#tag/models 302
-/docs/api-reference/threads/ /api-reference#tag/chat 302
-/docs/api-reference/messages/ /api-reference#tag/messages 302
-/docs/api-reference/models/ /api-reference#tag/models 302
-/chat/ /docs/threads 302
-/guides/chatting/manage-history/ /docs/threads/ 302
-/guides/using-server/ /docs/local-api 302
-/guides/using-server/server /docs/local-api 302
-/guides/server /docs/desktop 302
-/acknowledgements/ /about/acknowledgements 302
-/community/ /about/community 302
-/faq/ /about/faq 302
-/wall-of-love/ /about/wall-of-love 302
-/guides/troubleshooting/failed-to-fetch/ /docs/troubleshooting 302
-/docs/troubleshooting/gpu-not-used/ /docs/troubleshooting#troubleshooting-nvidia-gpu 302
-/docs/troubleshooting/failed-to-fetch/ /docs/troubleshooting 302
-/team/contributor-program /about/team 302
-/team/join-us /about/team 302
-/how-we-work/strategy /about/how-we-work/strategy 302
-/how-we-work/strategy/ /about/how-we-work/strategy 302
-/how-we-work/project-management /about/how-we-work/project-management 302
-/engineering /about/how-we-work/engineering 302
-/engineering/ci-cd /about/how-we-work/engineering/ci-cd 302
-/engineering/qa /about/how-we-work/engineering/qa 302
-/how-we-work/product-design /about 302
-/how-we-work/analytics /about/how-we-work/analytics 302
-/how-we-work/website-docs /about/how-we-work/website-docs 302
-/blog/postmortems/january-10-2024-bitdefender-false-positive-flag /post/bitdefender 302
-/guides/error-codes/something-amiss /docs/troubleshooting#somethings-amiss 302
-/guides/error-codes/how-to-get-error-logs /docs/troubleshooting#how-to-get-error-logs 302
-/guides/chatting /docs/threads 302
-/guides/integration/openinterpreter /integrations/function-calling/interpreter 302
-/developer/build-assistant /docs/assistants 302
-/guides/integrations /integrations 302
-/specs/hub /docs 302
-/install/windows /docs/desktop/windows 302
-/install/linux /docs/desktop/linux 302
-/install/nightly /docs/desktop/windows 302
-/docs/engineering/fine-tuning /docs 302
-/developer/assistant /docs/assistants 302
-/guides/common-error/broken-build /docs/troubleshooting#broken-build 302
-/guides/using-server/using-server /docs/local-api 302
-/guides/integrations/azure-openai-service /docs/remote-models/openai 302
-/specs/messages /docs/threads 302
-/docs/engineering/models /docs/models 302
-/docs/specs/assistants /docs/assistants 302
-/docs/engineering/chats /docs/threads 302
-/guides/using-extensions/extension-settings /docs/extensions 302
-/guides/models/customize-engine /docs/models 302
-/guides/integration/mistral /docs/remote-models/mistralai 302
-/guides/common-error /docs/troubleshooting 302
-/guides/integrations/ollama /docs/local-models/ollama 302
-/server-suite /api-reference 302
-/guides/integrations/lmstudio /docs/local-models/lmstudio 302
-/guides/integrations/mistral-ai /docs/remote-models/mistralai 302
-/guides/start-server /docs/local-api 302
-/guides/changelog /changelog 302
-/guides/models-list /docs/models 302
-/guides/thread /docs/threads 302
-/docs/engineering/messages /docs/threads 302
-/guides/faqs /about/faq 302
-/docs/integrations/openrouter /docs/remote-models/openrouter 302
-/docs/integrations/ollama/ /docs/local-models/ollama 302
-/api/overview /api-reference 302
-/docs/extension-guides /docs/extensions 302
-/specs/settings /docs 302
-/docs/UI /docs 302
-/guides/using-models/import-models-using-absolute-filepath /docs/models 302
-/install/docker /docs/desktop 302
-/v1/models/ /docs/models 302
-/guides/using-models/import-manually /docs/models 302
-/docs/team/contributor-program /about/team 302
-/guides/chatting/start-thread /docs/threads 302
-/api/files /docs 302
-/specs/threads /docs/threads 302
-/about/brand-assets/ /about 302
-/guides/chatting/upload-images /docs/threads 302
-/guides/using-models/customize-models /docs/models 302
-/specs/chats /docs/threads 302
-/specs/engine /docs 302
-/specs/data-structures/ /docs 302
-/docs/extension-capabilities /docs/extensions 302
-/docs/get-started/use-local-server /docs/local-api 302
-/guides/install/cloud-native/ /docs/desktop 302
-/guides/install/ /docs/desktop 302
-/docs/installation/desktop /docs/desktop 302
-/specs /docs 302
-/docs/get-started/build-extension /docs/extensions 302
-/specs/files /docs 302
-/guides/using-models/package-models /docs/models 302
-/guides/using-models/ /docs/models 302
-/install/overview /docs/desktop/windows 302
-/developer/prereq/ /docs 302
-/docs/get-started/extension-anatomy /docs/extensions 302
-/guides/mac /docs/desktop/mac 302
-/intro /about 302
-/specs/fine-tuning /docs 302
-/specs/file-based /docs 302
-/docs/extension-guides/monitoring /docs/extensions 302
-/api /api-reference 302
-/getting-started/build-an-app/ /docs/quickstart 302
-/features/ai-models /docs/models 302
-/reference/store /api-reference 302
-/tutorials/build-chat-app/ /docs/quickstart 302
-/features/acceleration/ /docs/built-in/llama-cpp 302
-/getting-started/install/mac/ /docs/desktop/mac 302
-docs/guides/fine-tuning/what-models-can-be-fine-tuned/ /docs 302
-/docs/specs/threads/ /docs/threads 302
-/docs/api-reference/fine-tuning/ /api-reference 302
-/docs/guides/speech-to-text/prompting/ /docs/quickstart 302
-/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model/ /docs 302
-/getting-started/install/windows/ /docs/desktop/windows 302
-/docs/modules/chats/ /docs/threads 302
-/docs/specs/chats/ /docs/threads 302
-/docs/modules/files/ /docs 302
-/tutorials/build-rag-app/ /docs/tools/retrieval 302
-/docs/models/model-endpoint-compatibility/ /docs/models 302
-/docs/guides/legacy-fine-tuning/creating-training-data/ /docs 302
-/docs/specs/models/ /docs/models 302
-/docs/guides/safety-best-practices/end-user-ids/ /docs/quickstart 302
-/docs/models/overview/ /docs/models 302
-/docs/api-reference/files/ /api-reference 302
-/docs/models/tts/ /docs/models 302
-/docs/guides/fine-tuning/ /docs 302
-/docs/specs/files/ /docs 302
-/docs/modules/threads/ /docs/threads 302
-/guides/linux/ /docs/desktop/linux 302
-/developer/build-engine/engine-anatomy /docs 302
-/developer/engine /docs 302
-/docs/product/system-monitor /docs 302
-/docs/product/settings /docs 302
-/developer/build-assistant/your-first-assistant /docs 302
-/engineering/research /docs 302
-/docs/integrations/langchain /integrations 302
-/onboarding /docs/quickstart 302
-/installation/hardware /docs/desktop/windows 302
-/docs/features/load-unload/ /docs 302
-/guides/chatting/upload-docs /docs/threads 302
-/developer/build-extension/package-your-assistant /docs 302
-/blog/hello-world/ /blog 302
-/docs/get-started/build-on-mobile /docs/quickstart 302
-/ai/anything-v4/ /docs 302
-/nitro/ /docs 302
-/tokenizer/ /docs 302
-/hardware/examples/3090x1-@dan-jan/ /docs 302
-/guides/concepts /about 302
-/platform /docs 302
-/hardware/examples/AMAZON-LINK-HERE/ /docs 302
-/guides/threads/?productId=openai&prompt=What/ /docs 302
-/guides/threads/?productId=openjourney&prompt=realistic%20portrait%20of%20an%20gray%20dog,%20bright%20eyes,%20radiant%20and%20ethereal%20intricately%20detailed%20photography,%20cinematic%20lighting,%2050mm%20lens%20with%20bokeh/ /docs 302
-/guides/threads/?productId=openjourney&prompt=old,%20female%20robot,%20metal,%20rust,%20wisible%20wires,%20destroyed,%20sad,%20dark,%20dirty,%20looking%20at%20viewer,%20portrait,%20photography,%20detailed%20skin,%20realistic,%20photo-realistic,%208k,%20highly%20detailed,%20full%20length%20frame,%20High%20detail%20RAW%20color%20art,%20piercing,%20diffused%20soft%20lighting,%20shallow%20depth%20of%20field,%20sharp%20focus,%20hyperrealism,%20cinematic%20lighting/ /docs 302
-/guides/threads/?productId=openjourney&prompt=a%20young%20caucasian%20man%20holding%20his%20chin.pablo%20picasso%20style,%20acrylic%20painting,%20trending%20on%20pixiv%20fanbox,%20palette%20knife%20and%20brush.%20strokes/ /docs 302
-/guides/threads/?productId=airoboros&prompt=Let%27s%20role%20play.%20You%20are%20a%20robot%20in%20a%20post-apocalyptic%20world./ /docs 302
-/chat?productId=pirsus-epic-realism/ /docs 302
-/chat?productId=ether-blu-mix/ /docs 302
-/chat?productId=deliberate/ /docs 302
-/chat?productId=wizard_vicuna/ /docs 302
-/chat?productId=disneypixar/ /docs 302
-/chat?productId=meina-mix/ /docs 302
-/chat?productId=anything-v4/ /docs 302
-/chat?productId=airoboros/ /docs 302
-/chat?productId=ghost-mix/ /docs 302
-/ai/toonyou/ /docs 302
-/chat?productId=xrica-mix/ /docs 302
-/ai/openai/ /docs 302
-/chat?productId=been-you/ /docs 302
-/chat?productId=toonyou/ /docs 302
-/handbook/product-and-community /about/community 302
-/handbook/contributing-to-jan/how-to-get-involved-and-faq /about 302
-/handbook/engineering-exellence/one-the-tools-what-we-use-and-why /about 302
-/handbook/from-spaghetti-flinging-to-strategy/how-we-gtm /about/how-we-work/strategy 302
-/handbook/product-and-community/our-okrs /about 302
-/products-and-innovations/philosophy-behind-product-development /about 302
-/handbook/core-contributors /about/team 302
-/handbook/contributing-to-jan/feedback-channels /about/how-we-work 302
-/handbook/meet-jan /docs 302
-/handbook/engineering-exellence /about 302
-/blog/tags/hello /blog 302
-/about/community/events/nvidia-llm-day-nov-23 /about 302
-/guides/gpus-and-vram/ /docs 302
-/careers /about/team 302
-/handbook/engineering /about/team 302
-/handbook/products-and-innovations /about 302
-/handbook/contributing-to-jan /about 302
-/handbook/meet-jan/vision-and-mission /about 302
-/handbook/products-and-innovations/roadmap-present-and-future-directions /about 302
-/handbook/what-we-do /about/team 302
-/handbook/onboarding /docs 302
-/handbook/products-and-innovations/overview-of-jan-framework-and-its-applications /docs 302
-/handbook/product /docs 302
-/running/ /docs 302
-/running?model=Open%20Journey%20SD/ /docs 302
-/ai/been-you/ /about 302
-/tokenizer?view=bpe/ /docs 302
-/docs/engineering /docs 302
-/developer /docs 302
-/developer/ /docs 302
-/developer/architecture /docs/architecture 302
-/developer/architecture/ /docs/architecture 302
-/developer/file-based /docs 302
-/developer/file-based/ /docs 302
-/developer/framework /docs 302
-/developer/framework/ /docs 302
-/developer/framework/engineering /docs 302
-/developer/framework/engineering/ /docs 302
-/developer/framework/engineering/assistants /docs/assistants 302
-/developer/framework/engineering/assistants/ /docs/assistants 302
-/developer/framework/engineering/chats /docs/threads 302
-/developer/framework/engineering/chats/ /docs/threads 302
-/developer/framework/engineering/engine /docs 302
-/developer/framework/engineering/engine/ /docs 302
-/developer/framework/engineering/files /docs 302
-/developer/framework/engineering/files/ /docs 302
-/developer/framework/engineering/fine-tuning /docs 302
-/developer/framework/engineering/fine-tuning/ /docs 302
-/developer/framework/engineering/messages /docs/threads 302
-/developer/framework/engineering/messages/ /docs/threads 302
-/developer/framework/engineering/models /docs/models 302
-/developer/framework/engineering/models/ /docs/models 302
-/developer/framework/engineering/prompts /docs 302
-/developer/framework/engineering/prompts/ /docs 302
-/developer/framework/engineering/threads /docs/threads 302
-/developer/framework/engineering/threads/ /docs/threads 302
-/developer/framework/product /docs 302
-/developer/framework/product/ /docs 302
-/developer/framework/product/chat /docs/threads 302
-/developer/framework/product/chat/ /docs/threads 302
-/developer/framework/product/hub /docs 302
-/developer/framework/product/hub/ /docs 302
-/developer/framework/product/jan /about 302
-/developer/framework/product/jan/ /about 302
-/developer/framework/product/settings /docs/settings 302
-/developer/framework/product/settings/ /docs/settings 302
-/developer/framework/product/system-monitor /docs 302
-/developer/framework/product/system-monitor/ /docs 302
-/developer/user-interface /docs 302
-/developer/user-interface/ /docs 302
-/docs/desktop /docs/desktop/windows 302
-/docs/desktop/ /docs/desktop/windows 302
-/docs/inferences/groq /docs/remote-models/groq 302
-/docs/inferences/groq/ /docs/remote-models/groq 302
-/docs/inferences/llamacpp /docs/built-in/llama-cpp 302
-/docs/inferences/llamacpp/ /docs/built-in/llama-cpp 302
-/docs/inferences/lmstudio /docs/local-models/lmstudio 302
-/docs/inferences/lmstudio/ /docs/local-models/lmstudio 302
-/docs/inferences/mistralai /docs/remote-models/mistralai 302
-/docs/inferences/mistralai/ /docs/remote-models/mistralai 302
-/docs/inferences/ollama /docs/local-models/ollama 302
-/docs/inferences/ollama/ /docs/local-models/ollama 302
-/docs/inferences/openai /docs/remote-models/openai 302
-/docs/inferences/openai/ /docs/remote-models/openai 302
-/docs/inferences/remote-server-integration /docs/remote-inference/generic-openai 302
-/docs/inferences/remote-server-integration/ /docs/remote-inference/generic-openai 302
-/docs/inferences/tensorrtllm /docs/built-in/tensorrt-llm 302
-/docs/inferences/tensorrtllm/ /docs/built-in/tensorrt-llm 302
-/docs/integrations/router /docs/remote-models/openrouter 302
-/docs/integrations/router/ /docs/remote-models/openrouter 302
-/docs/server /docs/local-api 302
-/docs/server/ /docs/local-api 302
-/features/ /docs 302
-/features /docs 302
-/features/local/ /docs/local-api 302
-/features/local /docs/local-api 302
-/guides/providers/tensorrt-llm /docs/built-in/tensorrt-llm 302
-/guides/providers/tensorrt-llm/ /docs/built-in/tensorrt-llm 302
-/hardware/recommendations/by-model/ /docs 302
-/hardware/recommendations/by-hardware/ /docs 302
-/product /docs 302
-/product/features /docs 302
-/product/features/agents-framework /docs 302
-/product/features/api-server /docs/local-api 302
-/product/features/data-security /docs 302
-/product/features/extensions-framework /docs/extensions 302
-/product/features/local /docs 302
-/product/features/remote /docs 302
-/product/home-server /docs/local-api 302
-/guides/providers/tensorrt-llm/ /docs/built-in/tensorrt-llm 302
-/docs/tools /docs/tools/retrieval 302
-/docs/local-inference/llamacpp /docs/built-in/llama-cpp 302
-/docs/local-inference/tensorrtllm /docs/built-in/tensorrt-llm 302
-/guides/using-server/server/ /docs/local-api 302
-/integrations/coding/vscode /integrations/coding/continue-dev 302
-/docs/integrations/interpreter /integrations/function-calling/interpreter 302
-/cortex/built-in/llama-cpp /docs 302
-/docs/desktop-installation/linux /docs/desktop/linux 302
-/docs/desktop-installation/windows /docs/desktop/windows 302
-/docs/desktop-installation/mac /docs/desktop/mac 302
-/desktop/ /docs/desktop 302
-/developer/ui/ /docs 302
-/docs/local-inference/lmstudio /docs/local-models/lmstudio 302
-/docs/local-inference/ollama /docs/local-models/ollama 302
-/docs/remote-inference/openai /docs/remote-models/openai 302
-/docs/remote-inference/groq /docs/remote-models/groq 302
-/docs/remote-inference/mistralai /docs/remote-models/mistralai 302
-/docs/remote-inference/openrouter /docs/remote-models/openrouter 302
-/docs/remote-inference/generic-openai /docs/remote-models/generic-openai 302
-/docs/desktop-installation /docs/desktop 302
-/hardware/concepts/gpu-and-vram/ /docs 302
-/hardware/recommendations/by-usecase/ /docs 302
-/about/how-we-work/strategy /about 302
-/docs/engineering/assistants/ /docs 302
-/cortex https://cortex.so/docs/ 301
-/cortex/quickstart https://cortex.so/docs/quickstart 301
-/cortex/hardware https://cortex.so/docs/hardware 301
-/cortex/installation https://cortex.so/docs/category/installation 301
-/cortex/installation/mac https://cortex.so/docs/instalation/mac 301
-/cortex/installation/windows https://cortex.so/docs/instalation/windows 301
-/cortex/installation/linux https://cortex.so/docs/instalation/linux 301
-/cortex/command-line https://cortex.so/docs/command-line 301
-/cortex/ts-library https://cortex.so/docs/ts-library 301
-/cortex/py-library https://cortex.so/docs/py-library 301
-/cortex/server https://cortex.so/docs/server 301
-/cortex/text-generation https://cortex.so/docs/text-generation 301
-/cortex/cli https://cortex.so/docs/cli/ 301
-/cortex/cli/init https://cortex.so/docs/cli/init 301
-/cortex/cli/pull https://cortex.so/docs/cli/pull 301
-/cortex/cli/run https://cortex.so/docs/cli/run 301
-/cortex/cli/models https://cortex.so/docs/cli/models/ 301
-/cortex/cli/models/download https://cortex.so/docs/cli/models/download 301
-/cortex/cli/models/list https://cortex.so/docs/cli/models/list 301
-/cortex/cli/models/get https://cortex.so/docs/cli/models/get 301
-/cortex/cli/models/update https://cortex.so/docs/cli/models/update 301
-/cortex/cli/models/start https://cortex.so/docs/cli/models/start 301
-/cortex/cli/models/stop https://cortex.so/docs/cli/models/stop 301
-/cortex/cli/models/remove https://cortex.so/docs/cli/models/remove 301
-/cortex/cli/ps https://cortex.so/docs/cli/ps 301
-/cortex/cli/chat https://cortex.so/docs/cli/chat 301
-/cortex/cli/kill https://cortex.so/docs/cli/kill 301
-/cortex/cli/serve https://cortex.so/docs/cli/serve 301
-/cortex/architecture https://cortex.so/docs/architecture 301
-/cortex/cortex-cpp https://cortex.so/docs/cortex-cpp 301
-/cortex/cortex-llamacpp https://cortex.so/docs/cortex-llamacpp 301
-/api-reference https://cortex.so/api-reference 301
-/docs/assistants /docs 302
-/docs/server-installation/ /docs/desktop 302
-/docs/server-installation/onprem /docs/desktop 302
-/docs/server-installation/aws /docs/desktop 302
-/docs/server-installation/gcp /docs/desktop 302
-/docs/server-installation/azure /docs/desktop 302
-/about /docs 302
-/api-server /docs/api-server 302
-/cdn-cgi/l/email-protection 302
-/docs/built-in/tensorrt-llm 302
-/docs/desktop/beta /docs 302
-/docs/docs/data-folder /docs/data-folder 302
-/docs/docs/desktop/linux /docs/desktop/linux 302
-/docs/docs/troubleshooting /docs/troubleshooting 302
-/docs/local-engines/llama-cpp 302
-/docs/models/model-parameters 302
-/mcp /docs/mcp 302
-/quickstart /docs/quickstart 302
-/server-examples/continue-dev /docs/server-examples/continue-dev 302
+/about                                          /handbook  302
+/about/                                         /handbook  302
+/about/community                                /handbook  302
+/about/handbook                                 /handbook  302
+/about/handbook/analytics                       /handbook  302
+/about/handbook/project-management              /handbook  302
+/about/handbook/strategy                        /handbook  302
+/about/handbook/website-docs                    /handbook  302
+/about/how-we-work/product-design               /handbook  302
+/about/how-we-work/strategy                     /handbook  302
+/about/investors                                /handbook  302
+/about/roadmap                                  /handbook  302
+/about/team                                     /handbook  302
+/about/vision                                   /handbook  302
+/about/wall-of-love                             /handbook  302
+/handbook/contributing-to-jan/                  /handbook 302
+/handbook/core-contributors/how-we-hire/        /handbook 302
+/handbook/engineering-excellence/              /handbook 302
+/handbook/engineering/                          /handbook 302
+/handbook/product-and-community/                /handbook 302
+/handbook/products-and-innovations/             /handbook 302
+/handbook/what-we-do/our-approach-to-design/    /handbook 302
+/how-we-work/product-design                     /handbook 302    
+/handbook/product-and-community/approaches-to-beta-testing-and-user-engagement/      /handbook 302
+/cortex/assistants                              /docs/ 302
+/cortex/build-extension                         /docs/ 302
+/cortex/built-in/tensorrt-llm                   /docs/ 302
+/cortex/cli/kill                                /docs/ 302
+/cortex/command-line                            /docs/ 302
+/cortex/cortex-openvino                         /docs/ 302
+/cortex/cortex-python                           /docs/ 302
+/cortex/cortex-tensorrt-llm                     /docs/ 302
+/cortex/desktop-installation/linux              /docs/ 302
+/cortex/embeddings                              /docs/ 302
+/cortex/ext-architecture                        /docs/ 302
+/cortex/fine-tuning                             /docs/ 302
+/cortex/fine-tuning/overview                    /docs/ 302
+/cortex/function-calling                        /docs/ 302
+/cortex/installation/linux                      /docs/ 302
+/cortex/installation/mac                        /docs/ 302
+/cortex/model-operations                        /docs/ 302
+/cortex/model-operations/overview               /docs/ 302
+/cortex/rag/overview                            /docs/ 302
+/cortex/server                                  /docs/ 302
+/docs/tools/retrieval                           /docs/ 302
+/developer/framework/engineering/chats          /docs/ 302
+/developer/framework/engineering/threads/       /docs/ 302
+/developer/framework/product/chat               /docs/ 302
+/docs/extensions                                /docs/ 302
+/docs/shortcuts                                 /docs/ 302
+/docs/models                                    /docs/ 302
+/integrations/function-calling/interpreter      /docs/ 302
+/docs/desktop/built-in/tensorrt-llm             /docs 302
+/docs/desktop/beta                              /docs/desktop 302
+/platforms	                                    /docs/desktop 302
+/docs/built-in/llama-cpp                        /docs/desktop/llama-cpp 302
+/docs/install-engines                           /docs/desktop/llama-cpp 302
+/docs/local-api                                 /docs/desktop/api-server 302
+/docs/local-engines/llama-cpp                   /docs/desktop/llama-cpp 302
+/docs/api-server                                /docs/desktop/api-server 302
+/docs/assistants                                /docs/desktop/assistants 302
+/docs/models/manage-models                      /docs/desktop/manage-models 302
+/docs/data-folder                               /docs/desktop/data-folder 302
+/cortex/vision                                  /handbook/open-superintelligence 302
+/docs/models/model-parameters                   /docs/desktop/model-parameters 302
+/docs/remote-models/generic-openai              /docs/desktop/remote-models/openai 302
+/docs/threads                                   /changelog/2024-01-16-settings-options-right-panel 302 
+/docs/desktop/docs/data-folder                  /docs/desktop/data-folder 302
+/docs/desktop/docs/desktop/install/linux	    /docs/desktop/install/linux 302
+/docs/desktop/docs/desktop/troubleshooting	    /docs/desktop/troubleshooting 302
+/docs/desktop/linux	                            /docs/desktop/install/linux 302
+/docs/desktop/local-engines/llama-cpp	        /docs/desktop/llama-cpp-server 302
+/docs/desktop/models/model-parameters	        /docs/desktop/model-parameters 302
+/docs/desktop/windows	                        /docs/desktop/install/windows 302
+/docs/docs/data-folder	                        /docs/desktop/data-folder 302
+/docs/docs/desktop/linux	                    /docs/desktop/install/linux 302
+/docs/docs/troubleshooting	                    /docs/desktop/troubleshooting 302
+/docs/jan-models/jan-nano-32	                /docs/desktop/jan-models/jan-nano-32 302
+/docs/jan-models/jan-v1	                        /docs/desktop/jan-models/jan-v1 302
+/docs/jan-models/lucy	                        /docs/desktop/jan-models/lucy 302
+/docs/llama-cpp	                                /docs/desktop/llama-cpp 302
+/docs/manage-models	                            /docs/desktop/manage-models 302
+/docs/mcp	                                    /docs/desktop/mcp 302
+/docs/mcp-examples/data-analysis/e2b	        /docs/desktop/mcp-examples/data-analysis/e2b 302
+/docs/mcp-examples/deepresearch/octagon	        /docs/desktop/mcp-examples/deepresearch/octagon 302
+/docs/mcp-examples/design/canva	                /docs/desktop/mcp-examples/design/canva 302
+/docs/mcp-examples/productivity/linear	        /docs/desktop/mcp-examples/productivity/linear 302
+/docs/mcp-examples/search/exa	                /docs/desktop/mcp-examples/search/exa 302
+/docs/model-parameters	                        /docs/desktop/model-parameters 302
+/docs/remote-models/cohere	                    /docs/desktop/remote-models/cohere 302
+/docs/remote-models/google	                    /docs/desktop/remote-models/google 302
+/docs/remote-models/groq	                    /docs/desktop/remote-models/groq 302
+/docs/remote-models/huggingface	                /docs/desktop/remote-models/huggingface 302
+/docs/remote-models/mistralai	                /docs/desktop/remote-models/mistralai 302
+/docs/remote-models/openai	                    /docs/desktop/remote-models/openai 302
+/docs/server-examples/continue-dev	            /docs/desktop/server-examples/continue-dev 302
+/docs/server-examples/n8n	                    /docs/desktop/server-examples/n8n 302
+/docs/server-troubleshooting	                /docs/desktop/troubleshooting 302
+/docs/privacy-policy	                        /privacy 302
+/docs/server-settings 	                        /docs/desktop/server-settings 302
+/docs/settings	                                /docs/desktop/settings 302
+/docs/llama-cpp-server	                        /docs/desktop/llama-cpp-server 302
+/docs/install/linux	                            /docs/desktop/install/linux 302
+/docs/install/macos	                            /docs/desktop/install/mac 302
+/docs/install/windows	                        /docs/desktop/install/windows 302
+/docs/mcp-examples/browser/browserbase	        /docs/desktop/mcp-examples/browser/browserbase 302
+/docs/jan-models/jan-nano-128	                /docs/desktop/jan-models/jan-nano-128 302
+/docs/mcp-examples/search/serper	            /docs/desktop/mcp-examples/search/serper 302
+/docs/mcp-examples/data-analysis/jupyter	    /docs/desktop/mcp-examples/data-analysis/jupyter 302
+/docs/mcp-examples/productivity/todoist	        /docs/desktop/mcp-examples/productivity/todoist 302
+/docs/remote-models/anthropic	                /docs/desktop/remote-models/anthropic 302
+/docs/remote-models/openrouter	                /docs/desktop/remote-models/openrouter 302
+/docs/server-examples/llmcord	                /docs/desktop/server-examples/llmcord 302
+/docs/server-examples/tabby	                    /docs/desktop/server-examples/tabby 302

+/guides/integrations/continue/                  /docs/desktop/server-examples/continue-dev 302
+/continue-dev                                   /docs/desktop/server-examples/continue-dev 302
+/integrations                                   /docs/desktop/server-examples/continue-dev 302
+/integrations/coding/continue-dev               /docs/desktop/server-examples/continue-dev 302
+/integrations/continue/                         /docs/desktop/server-examples/continue-dev 302
+/integrations/coding/tabby                      /docs/desktop/server-examples/tabby 302
+/integrations/messaging/llmcord                 /docs/desktop/server-examples/llmcord 302
+/integrations/workflow-automation/n8n           /docs/desktop/server-examples/n8n 302
+/local-server/continue-dev                      /docs/desktop/server-examples/continue-dev 302
+/local-server/data-folder                       /docs/desktop/desktop/data-folder 302
+/local-server/llama-cpp                         /docs/desktop/desktop/llama-cpp 302
+/local-server/n8n                               /docs/desktop/server-examples/n8n 302
+/local-server/settings                          /docs/desktop/server-settings 302
+/local-server/tabby                             /docs/desktop/server-examples/tabby 302
+/local-server/troubleshooting                   /docs/desktop/troubleshooting 302
+/mcp                                            /docs/desktop/mcp 302
+/quickstart                                     /docs/desktop/quickstart 302
+/server-examples/continue-dev                   /docs/desktop/server-examples/continue-dev 302
--- a/docs/public/sitemap-0.xml
+++ b/docs/public/sitemap-0.xml
@ -1,148 +1,125 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:mobile="http://www.google.com/schemas/sitemap-mobile/1.0" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
-<url><loc>https://jan.ai</loc><lastmod>2025-03-10T05:06:47.876Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about/handbook</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about/handbook/analytics</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about/handbook/engineering</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about/handbook/engineering/ci-cd</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about/handbook/engineering/qa</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about/handbook/product-design</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about/handbook/project-management</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about/handbook/strategy</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about/handbook/website-docs</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about/investors</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about/team</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about/vision</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/about/wall-of-love</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/blog</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2023-12-21-faster-inference-across-platform</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-01-16-settings-options-right-panel</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-01-29-local-api-server</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-02-05-jan-data-folder</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-02-10-jan-is-more-stable</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-02-26-home-servers-with-helm</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-03-06-ui-revamp-settings</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-03-11-import-models</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-03-19-nitro-tensorrt-llm-extension</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-04-02-groq-api-integration</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-04-15-new-mistral-extension</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-04-25-llama3-command-r-hugginface</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-05-20-llamacpp-upgrade-new-remote-models</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-05-28-cohere-aya-23-8b-35b-phi-3-medium</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-06-21-nvidia-nim-support</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-07-15-claude-3-5-support</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-09-01-llama3-1-gemma2-support</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-09-17-improved-cpu-performance</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-10-24-jan-stable</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-11-22-jan-bugs</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-11.14-jan-supports-qwen-coder</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-12-03-jan-is-faster</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-12-05-jan-hot-fix-mac</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2024-12-30-jan-new-privacy</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2025-01-06-key-issues-resolved</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/changelog/2025-01-23-deepseek-r1-jan</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/architecture</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/assistants</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/build-extension</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/chat</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/init</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/kill</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/models</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/models/download</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/models/get</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/models/list</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/models/remove</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/models/start</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/models/stop</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/models/update</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/ps</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/pull</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/run</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cli/serve</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/command-line</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cortex-cpp</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cortex-llamacpp</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cortex-openvino</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cortex-python</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/cortex-tensorrt-llm</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/embeddings</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/embeddings/overview</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/error-codes</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/ext-architecture</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/fine-tuning</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/fine-tuning/overview</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/function-calling</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/hardware</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/installation</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/installation/linux</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/installation/mac</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/installation/windows</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/model-operations</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/model-operations/overview</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/py-library</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/quickstart</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/rag</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/rag/overview</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/server</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/text-generation</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/ts-library</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/vision</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/cortex/vision/overview</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/api-server</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/assistants</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/configure-extensions</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/data-folder</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/desktop</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/desktop/linux</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/desktop/mac</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/desktop/windows</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/error-codes</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/extensions</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/extensions-settings/model-management</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/extensions-settings/system-monitoring</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/install-engines</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/install-extensions</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/local-engines/llama-cpp</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/models</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/models/manage-models</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/models/model-parameters</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/privacy</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/privacy-policy</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/quickstart</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/remote-models/anthropic</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/remote-models/cohere</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/remote-models/deepseek</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/remote-models/google</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/remote-models/groq</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/remote-models/martian</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/remote-models/mistralai</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/remote-models/nvidia-nim</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/remote-models/openai</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/remote-models/openrouter</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/remote-models/triton</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/settings</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/threads</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/tools/retrieval</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/docs/troubleshooting</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/download</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/integrations</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/integrations/coding/continue-dev</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/integrations/coding/tabby</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/integrations/function-calling/interpreter</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/integrations/messaging/llmcord</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/integrations/workflow-automation/n8n</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/post/benchmarking-nvidia-tensorrt-llm</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/post/bitdefender</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/post/data-is-moat</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/post/deepseek-r1-locally</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/post/offline-chatgpt-alternative</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/post/rag-is-not-enough</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/post/run-ai-models-locally</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/privacy</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
-<url><loc>https://jan.ai/support</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai</loc><lastmod>2025-09-24T03:40:05.491Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/api-reference</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/api-reference/api-reference</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/api-reference/architecture</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/api-reference/configuration</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/api-reference/development</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/api-reference/installation</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/blog</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2023-12-21-faster-inference-across-platform</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-01-16-settings-options-right-panel</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-01-29-local-api-server</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-02-05-jan-data-folder</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-02-10-jan-is-more-stable</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-02-26-home-servers-with-helm</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-03-06-ui-revamp-settings</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-03-11-import-models</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-03-19-nitro-tensorrt-llm-extension</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-04-02-groq-api-integration</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-04-15-new-mistral-extension</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-04-25-llama3-command-r-hugginface</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-05-20-llamacpp-upgrade-new-remote-models</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-05-28-cohere-aya-23-8b-35b-phi-3-medium</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-06-21-nvidia-nim-support</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-07-15-claude-3-5-support</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-09-01-llama3-1-gemma2-support</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-09-17-improved-cpu-performance</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-10-24-jan-stable</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-11-22-jan-bugs</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-11.14-jan-supports-qwen-coder</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-12-03-jan-is-faster</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-12-05-jan-hot-fix-mac</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2024-12-30-jan-new-privacy</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2025-01-06-key-issues-resolved</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2025-01-23-deepseek-r1-jan</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2025-02-18-advanced-llama.cpp-settings</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2025-03-14-jan-security-patch</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2025-05-14-jan-qwen3-patch</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2025-06-19-jan-ui-revamp</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2025-06-26-jan-nano-mcp</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2025-07-17-responsive-ui</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2025-07-31-llamacpp-tutorials</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2025-08-07-gpt-oss</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2025-08-14-general-improvs</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2025-08-28-image-support</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/changelog/2025-09-18-auto-optimize-vision-imports</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/api-server</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/assistants</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/data-folder</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/install/linux</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/install/mac</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/install/windows</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/jan-models/jan-nano-128</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/jan-models/jan-nano-32</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/jan-models/jan-v1</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/jan-models/lucy</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/llama-cpp</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/llama-cpp-server</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/manage-models</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/mcp</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/mcp-examples/browser/browserbase</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/mcp-examples/data-analysis/e2b</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/mcp-examples/data-analysis/jupyter</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/mcp-examples/deepresearch/octagon</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/mcp-examples/design/canva</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/mcp-examples/productivity/linear</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/mcp-examples/productivity/todoist</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/mcp-examples/search/exa</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/mcp-examples/search/serper</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/model-parameters</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/privacy</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/privacy-policy</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/quickstart</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/remote-models/anthropic</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/remote-models/cohere</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/remote-models/google</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/remote-models/groq</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/remote-models/huggingface</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/remote-models/mistralai</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/remote-models/openai</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/remote-models/openrouter</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/server-examples/continue-dev</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/server-examples/llmcord</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/server-examples/n8n</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/server-examples/tabby</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/server-settings</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/server-troubleshooting</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/settings</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/desktop/troubleshooting</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server/api-reference</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server/api-reference-administration</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server/api-reference-authentication</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server/api-reference-chat</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server/api-reference-chat-conversations</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server/api-reference-conversations</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server/api-reference-jan-responses</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server/api-reference-jan-server</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server/architecture</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server/configuration</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server/development</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server/installation</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/docs/server/overview</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/download</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/handbook</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/handbook/betting-on-open-source</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/handbook/open-superintelligence</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/post/benchmarking-nvidia-tensorrt-llm</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/post/bitdefender</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/post/data-is-moat</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/post/deepresearch</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/post/deepseek-r1-locally</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/post/jan-v1-for-research</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/post/offline-chatgpt-alternative</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/post/qwen3-settings</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/post/rag-is-not-enough</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/post/run-ai-models-locally</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/post/run-gpt-oss-locally</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/privacy</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
+<url><loc>https://jan.ai/support</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
 </urlset>
--- a/docs/src/components/FooterMenu/index.tsx
+++ b/docs/src/components/FooterMenu/index.tsx
@ -77,9 +77,9 @@ export default function Footer() {
  return (
    <footer className="py-4 w-full">
      <div className="mx-auto">
-        <div className="grid grid-cols-1 md:grid-cols-6 gap-8">
+        <div className="grid grid-cols-1 lg:grid-cols-6 gap-8">
          {/* Jan Logo and Newsletter */}
-          <div className="md:col-span-2">
+          <div className="md:col-span-1 lg:col-span-2">
            <h2 className="text-[52px] font-bold mb-6">Jan</h2>
            <div>
              <div className="flex items-center gap-2 mb-3">
@ -138,7 +138,7 @@ export default function Footer() {
          {/* Menu Columns */}
          {FOOTER_MENUS.map((menu) => (
            <div key={menu.title} className="">
-              <h3 className="text-lg mb-4 font-bold">{menu.title}</h3>
+              <h3 className="text-base mb-4 font-bold">{menu.title}</h3>
              <ul className="space-y-2">
                {menu.links.map((link) => (
                  <li key={link.name}>
--- a/docs/src/components/Home/index.tsx
+++ b/docs/src/components/Home/index.tsx
@ -95,7 +95,7 @@ const Home = () => {
          <div className="container mx-auto relative z-10">
            <div className="flex justify-center items-center mt-14 lg:mt-20 px-4">
              <a
-                href=""
+                href={`https://github.com/menloresearch/jan/releases/tag/${lastVersion}`}
                target="_blank"
                rel="noopener noreferrer"
                className="bg-black/40 px-3 lg:px-4 rounded-full h-10 inline-flex items-center max-w-full animate-fade-in delay-100"
@ -109,7 +109,7 @@ const Home = () => {
                </span>
              </a>
            </div>
-            <div className="mt-10">
+            <div className="mt-4">
              <div className="text-center relative lg:w-1/2 mx-auto">
                <div className="flex flex-col lg:flex-row items-center justify-center gap-4 animate-fade-in-up delay-300">
                  <span>
@ -124,15 +124,20 @@ const Home = () => {
                  </h1>
                </div>
                <p className="px-4 lg:px-0 mt-2 text-lg lg:text-2xl font-medium leading-relaxed text-white animate-fade-in-up delay-500 -tracking-[0.6px]">
-                  Jan is the open-source ChatGPT replacement.
+                  The best of open-source AI in an easy-to-use product.
                </p>
              </div>
-              <div className="flex px-4 flex-col lg:flex-row items-center gap-4 w-full justify-center text-center animate-fade-in-up delay-600 mt-8 lg:mt-10">
-                <DropdownButton
-                  size="xxl"
-                  className="w-full !rounded-[20px] lg:w-auto"
-                  lastRelease={lastRelease}
-                />
+              <div className="flex px-4 flex-col lg:flex-row items-start gap-4 w-full justify-center text-center animate-fade-in-up delay-600 mt-8 lg:mt-10">
+                <div>
+                  <DropdownButton
+                    size="xxl"
+                    className="w-full !rounded-[20px] lg:w-auto"
+                    lastRelease={lastRelease}
+                  />
+                  <div className="font-medium text-center mt-2 text-white">
+                    +{totalDownload(release)} downloads
+                  </div>
+                </div>
                <a
                  href="https://discord.com/invite/FTk2MvZwJH"
                  target="_blank"
@ -189,7 +194,8 @@ const Home = () => {
                        </defs>
                      </svg>
                      <span className="text-sm">
-                        {formatCompactNumber(discordWidget.presence_count)}
+                        15k+
+                        {/* {formatCompactNumber(discordWidget.presence_count)} */}
                      </span>
                    </div>
                  </Button>
@ -198,7 +204,7 @@ const Home = () => {
            </div>
          </div>

-          <div className="absolute w-full bottom-0 left-0 flex justify-center">
+          <div className="absolute w-full -bottom-10 left-0 flex justify-center">
            <img
              className="abs animate-float scale-[175%] md:scale-100"
              src={CuteRobotFlyingPNG.src}
@ -448,9 +454,10 @@ const Home = () => {
                          <div className="flex items-center gap-1 ml-3">
                            <IoMdPeople className="size-5" />
                            <span className="text-sm">
-                              {formatCompactNumber(
+                              15k+
+                              {/* {formatCompactNumber(
                                discordWidget.presence_count
-                              )}
+                              )} */}
                            </span>
                          </div>
                        </Button>
@ -483,9 +490,10 @@ const Home = () => {
                          <div className="flex items-center gap-1 ml-3">
                            <IoMdPeople className="size-5" />
                            <span className="text-sm">
-                              {formatCompactNumber(
+                              15k+
+                              {/* {formatCompactNumber(
                                discordWidget.presence_count
-                              )}
+                              )} */}
                            </span>
                          </div>
                        </Button>
--- a/docs/src/pages/docs/desktop/data-folder.mdx
+++ b/docs/src/pages/docs/desktop/data-folder.mdx
@ -155,7 +155,7 @@ Debugging headquarters (`/logs/app.txt`):
 The silicon brain collection. Each model has its own `model.json`.

 <Callout type="info">
-Full parameters: [here](/docs/model-parameters)
+Full parameters: [here](/docs/desktop/model-parameters)
 </Callout>

 ### `threads/`
@ -216,5 +216,5 @@ Chat archive. Each thread (`/threads/jan_unixstamp/`) contains:


 ## Delete Jan Data
-Uninstall guides: [Mac](/docs/desktop/mac#step-2-clean-up-data-optional),
-[Windows](/docs/desktop/windows#step-2-handle-jan-data), or [Linux](docs/desktop/linux#uninstall-jan).
+Uninstall guides: [Mac](/docs/desktop/install/mac#step-2-clean-up-data-optional),
+[Windows](/docs/desktop/install/windows#step-2-handle-jan-data), or [Linux](docs/desktop/install/linux#uninstall-jan).
--- a/docs/src/pages/docs/desktop/index.mdx
+++ b/docs/src/pages/docs/desktop/index.mdx
@ -184,9 +184,9 @@ Jan is built on the shoulders of giants:

 <FAQBox title="Is Jan compatible with my system?">
  **Supported OS**:
-  - [Windows 10+](/docs/desktop/windows#compatibility)
-  - [macOS 12+](/docs/desktop/mac#compatibility)
-  - [Linux (Ubuntu 20.04+)](/docs/desktop/linux)
+  - [Windows 10+](/docs/desktop/install/windows#compatibility)
+  - [macOS 12+](/docs/desktop/install/mac#compatibility)
+  - [Linux (Ubuntu 20.04+)](/docs/desktop/install/linux)

  **Hardware**:
  - Minimum: 8GB RAM, 10GB storage
@ -216,7 +216,7 @@ Jan is built on the shoulders of giants:

 <FAQBox title="How does Jan protect privacy?">
  - Runs 100% offline once models are downloaded
-  - All data stored locally in [Jan Data Folder](/docs/data-folder)
+  - All data stored locally in [Jan Data Folder](/docs/desktop/data-folder)
  - No telemetry without explicit consent
  - Open source code you can audit

--- a/docs/src/pages/docs/desktop/install/linux.mdx
+++ b/docs/src/pages/docs/desktop/install/linux.mdx
@ -193,7 +193,7 @@ $XDG_CONFIG_HOME = /home/username/custom_config
 ~/.config/Jan/data

 ```
-See [Jan Data Folder](/docs/data-folder) for details.
+See [Jan Data Folder](/docs/desktop/data-folder) for details.


 ## GPU Acceleration
@ -244,7 +244,7 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64
 ### Step 2: Enable GPU Acceleration

 1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Local Engine** > **Llama.cpp**
-2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/local-engines/llama-cpp).
+2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/desktop/local-engines/llama-cpp).

 <Callout type="info">
 CUDA offers better performance than Vulkan.
@ -258,7 +258,7 @@ CUDA offers better performance than Vulkan.
 Requires Vulkan support.

 1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Hardware** > **GPUs**
-2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/local-engines/llama-cpp).
+2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/desktop/local-engines/llama-cpp).

 </Tabs.Tab>

@ -266,7 +266,7 @@ Requires Vulkan support.
 Requires Vulkan support.

 1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Hardware** > **GPUs**
-2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/local-engines/llama-cpp).
+2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/desktop/local-engines/llama-cpp).
 </Tabs.Tab>

 </Tabs>
--- a/docs/src/pages/docs/desktop/install/mac.mdx
+++ b/docs/src/pages/docs/desktop/install/mac.mdx
@ -111,7 +111,7 @@ Default location:
 # Default installation directory
 ~/Library/Application\ Support/Jan/data
 ```
-See [Jan Data Folder](/docs/data-folder) for details.
+See [Jan Data Folder](/docs/desktop/data-folder) for details.


 ## Uninstall Jan
@ -158,7 +158,7 @@ No, it cannot be restored once you delete the Jan data folder during uninstallat
 </FAQBox>

 <Callout type="info">
-💡 Warning: If you have any trouble during installation, please see our [Troubleshooting](/docs/troubleshooting)
+💡 Warning: If you have any trouble during installation, please see our [Troubleshooting](/docs/desktop/troubleshooting)
 guide to resolve your problem.

 </Callout>
--- a/docs/src/pages/docs/desktop/install/windows.mdx
+++ b/docs/src/pages/docs/desktop/install/windows.mdx
@ -119,7 +119,7 @@ Default installation path:
 ~\Users\<YourUsername>\AppData\Roaming\Jan\data
 ```

-See [Jan Data Folder](/docs/data-folder) for complete folder structure details.
+See [Jan Data Folder](/docs/desktop/data-folder) for complete folder structure details.


 ## GPU Acceleration
--- a/docs/src/pages/docs/desktop/llama-cpp-server.mdx
+++ b/docs/src/pages/docs/desktop/llama-cpp-server.mdx
@ -24,7 +24,7 @@ import { Settings } from 'lucide-react'
 `llama.cpp` is the core **inference engine** Jan uses to run AI models locally on your computer. This section covers the settings for the engine itself, which control *how* a model processes information on your hardware.

 <Callout>
-Looking for API server settings (like port, host, CORS)? They have been moved to the dedicated [**Local API Server**](/docs/api-server) page.
+Looking for API server settings (like port, host, CORS)? They have been moved to the dedicated [**Local API Server**](/docs/desktop/api-server) page.
 </Callout>

 ## Accessing Engine Settings
--- a/docs/src/pages/docs/desktop/manage-models.mdx
+++ b/docs/src/pages/docs/desktop/manage-models.mdx
@ -30,9 +30,9 @@ This guide shows you how to add, customize, and delete models within Jan.
 Local models are managed through [Llama.cpp](https://github.com/ggerganov/llama.cpp), and these models are in a
 format called GGUF. When you run them locally, they will use your computer's memory (RAM) and processing power, so
 please make sure that you download models that match the hardware specifications for your operating system:
- [Mac](/docs/desktop/mac#compatibility)
- [Windows](/docs/desktop/windows#compatibility)
- [Linux](/docs/desktop/linux#compatibility).
+- [Mac](/docs/desktop/install/mac#compatibility)
+- [Windows](/docs/desktop/install/windows#compatibility)
+- [Linux](/docs/desktop/install/linux#compatibility).

 ### Adding Models

@ -156,7 +156,7 @@ For advanced users who want to add a specific model that is not available within
 Key fields to configure:
 1. The **Settings** array is where you can set the path or location of your model in your computer, the context
 length allowed, and the chat template expected by your model.
-2. The [**Parameters**](/docs/model-parameters) are the adjustable settings that affect how your model operates or
+2. The [**Parameters**](/docs/desktop/model-parameters) are the adjustable settings that affect how your model operates or
 processes the data. The fields in the parameters array are typically general and can be used across different
 models. Here is an example of model parameters:

@ -186,7 +186,7 @@ models. Here is an example of model parameters:

 <Callout type="info">
 When using cloud models, be aware of any associated costs and rate limits from the providers. See detailed guide for
-each cloud model provider [here](/docs/remote-models/anthropic).
+each cloud model provider [here](/docs/desktop/remote-models/anthropic).
 </Callout>

 Jan supports connecting to various AI cloud providers that are OpenAI API-compatible, including: OpenAI (GPT-4o, o3,...),
--- a/docs/src/pages/docs/desktop/mcp.mdx
+++ b/docs/src/pages/docs/desktop/mcp.mdx
@ -100,7 +100,7 @@ making your workflows more modular and adaptable over time.
 <Callout type="info">
  To use MCP effectively, ensure your AI model supports tool calling capabilities:
  - For cloud models (like Claude or GPT-4): Verify tool calling is enabled in your API settings
-  - For local models: Enable tool calling in the model parameters [click the edit button in Model Capabilities](/docs/model-parameters#model-capabilities-edit-button)
+  - For local models: Enable tool calling in the model parameters [click the edit button in Model Capabilities](/docs/desktop/model-parameters#model-capabilities-edit-button)
  - Check the model's documentation to confirm MCP compatibility
 </Callout>

--- a/docs/src/pages/docs/desktop/privacy.mdx
+++ b/docs/src/pages/docs/desktop/privacy.mdx
@ -26,7 +26,7 @@ import { Callout } from 'nextra/components'
 Jan is your AI. Period. Here's what we do with data.

 <Callout>
-Full privacy policy lives [here](/docs/privacy-policy), if you're into that sort of thing.
+Full privacy policy lives [here](/docs/desktop/privacy-policy), if you're into that sort of thing.
 </Callout>

 <Callout type="info">
--- a/docs/src/pages/docs/desktop/quickstart.mdx
+++ b/docs/src/pages/docs/desktop/quickstart.mdx
@ -27,7 +27,7 @@ Get up and running with Jan in minutes. This guide will help you install Jan, do
 ### Step 1: Install Jan

 1. [Download Jan](/download)
-2. Install the app ([Mac](/docs/desktop/mac), [Windows](/docs/desktop/windows), [Linux](/docs/desktop/linux))
+2. Install the app ([Mac](/docs/desktop/install/mac), [Windows](/docs/desktop/install/windows), [Linux](/docs/desktop/install/linux))
 3. Launch Jan

 ### Step 2: Download Jan v1
@ -61,7 +61,7 @@ Try asking Jan v1 questions like:
 - "What are the pros and cons of electric vehicles?"

 <Callout type="tip">
-**Want to give Jan v1 access to current web information?** Check out our [Serper MCP tutorial](/docs/mcp-examples/search/serper) to enable real-time web search with 2,500 free searches!
+**Want to give Jan v1 access to current web information?** Check out our [Serper MCP tutorial](/docs/desktop/mcp-examples/search/serper) to enable real-time web search with 2,500 free searches!
 </Callout>

 </Steps>
@ -138,4 +138,4 @@ Connect to OpenAI, Anthropic, Groq, Mistral, and others:

 ![Connect Remote APIs](./_assets/quick-start-03.png)

-For detailed setup, see [Remote APIs](/docs/remote-models/openai).
+For detailed setup, see [Remote APIs](/docs/desktop/remote-models/openai).
--- a/docs/src/pages/docs/desktop/remote-models/anthropic.mdx
+++ b/docs/src/pages/docs/desktop/remote-models/anthropic.mdx
@ -56,7 +56,7 @@ Ensure your API key has sufficient credits
 ## Available Anthropic Models

 Jan automatically includes Anthropic's available models. In case you want to use a specific Anthropic model
-that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/manage-models#add-models-1):
+that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/desktop/manage-models#add-models-1):
 - See list of available models in [Anthropic Models](https://docs.anthropic.com/claude/docs/models-overview).
 - The `id` property must match the model name in the list. For example, `claude-opus-4@20250514`, `claude-sonnet-4@20250514`, or `claude-3-5-haiku@20241022`.

@ -72,7 +72,7 @@ Common issues and solutions:
 **2. Connection Problems**
 - Check your internet connection
 - Verify Anthropic's system status
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
+- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)

 **3. Model Unavailable**
 - Confirm your API key has access to the model
--- a/docs/src/pages/docs/desktop/remote-models/cohere.mdx
+++ b/docs/src/pages/docs/desktop/remote-models/cohere.mdx
@ -55,7 +55,7 @@ Ensure your API key has sufficient credits.
 ## Available Cohere Models

 Jan automatically includes Cohere's available models. In case you want to use a specific
-Cohere model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/manage-models):
+Cohere model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/desktop/manage-models):
 - See list of available models in [Cohere Documentation](https://docs.cohere.com/v2/docs/models).
 - The `id` property must match the model name in the list. For example, `command-nightly` or `command-light`.

@ -71,7 +71,7 @@ Common issues and solutions:
 **2. Connection Problems**
 - Check your internet connection
 - Verify Cohere's [system status](https://status.cohere.com/)
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
+- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)

 **3. Model Unavailable**
 - Confirm your API key has access to the model
--- a/docs/src/pages/docs/desktop/remote-models/google.mdx
+++ b/docs/src/pages/docs/desktop/remote-models/google.mdx
@ -53,7 +53,7 @@ Ensure your API key has sufficient credits
 ## Available Google Models

 Jan automatically includes Google's available models like Gemini series. In case you want to use a specific
-Gemini model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/manage-models#add-models-1):
+Gemini model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/desktop/manage-models#add-models-1):
 - See list of available models in [Google Models](https://ai.google.dev/gemini-api/docs/models/gemini).
 - The `id` property must match the model name in the list. For example, `gemini-1.5-pro` or `gemini-2.0-flash-lite-preview`.

@ -69,7 +69,7 @@ Common issues and solutions:
 **2. Connection Problems**
 - Check your internet connection
 - Verify [Gemini's system status](https://www.google.com/appsstatus/dashboard/)
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
+- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)

 **3. Model Unavailable**
 - Confirm your API key has access to the model
--- a/docs/src/pages/docs/desktop/remote-models/groq.mdx
+++ b/docs/src/pages/docs/desktop/remote-models/groq.mdx
@ -54,7 +54,7 @@ Ensure your API key has sufficient credits
 ## Available Models Through Groq

 Jan automatically includes Groq's available models. In case you want to use a specific Groq model that
-you cannot find in **Jan**, follow the instructions in the [Add Cloud Models](/docs/manage-models#add-models-1):
+you cannot find in **Jan**, follow the instructions in the [Add Cloud Models](/docs/desktop/manage-models#add-models-1):
 - See list of available models in [Groq Documentation](https://console.groq.com/docs/models).
 - The `id` property must match the model name in the list. For example, if you want to use Llama3.3 70B, you must set the `id` property to `llama-3.3-70b-versatile`.

@ -70,7 +70,7 @@ Common issues and solutions:
 **2. Connection Problems**
 - Check your internet connection
 - Verify Groq's system status
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
+- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)

 **3. Model Unavailable**
 - Confirm your API key has access to the model
--- a/docs/src/pages/docs/desktop/remote-models/huggingface.mdx
+++ b/docs/src/pages/docs/desktop/remote-models/huggingface.mdx
@ -141,7 +141,7 @@ Common issues and solutions:
 **2. Connection Problems**
 - Check your internet connection
 - Verify Hugging Face's system status
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
+- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)

 **3. Model Unavailable**
 - Confirm your API key has access to the model
--- a/docs/src/pages/docs/desktop/remote-models/mistralai.mdx
+++ b/docs/src/pages/docs/desktop/remote-models/mistralai.mdx
@ -56,7 +56,7 @@ Ensure your API key has sufficient credits
 ## Available Mistral Models

 Jan automatically includes Mistral's available models. In case you want to use a specific Mistral model
-that you cannot find in **Jan**, follow the instructions in [Add Cloud Models](/docs/manage-models#add-models-1):
+that you cannot find in **Jan**, follow the instructions in [Add Cloud Models](/docs/desktop/manage-models#add-models-1):
 - See list of available models in [Mistral AI Documentation](https://docs.mistral.ai/platform/endpoints).
 - The `id` property must match the model name in the list. For example, if you want to use
 Mistral Large, you must set the `id` property to `mistral-large-latest`
@ -73,7 +73,7 @@ Common issues and solutions:
 **2. Connection Problems**
 - Check your internet connection
 - Verify Mistral AI's system status
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
+- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)

 **3. Model Unavailable**
 - Confirm your API key has access to the model
--- a/docs/src/pages/docs/desktop/remote-models/openai.mdx
+++ b/docs/src/pages/docs/desktop/remote-models/openai.mdx
@ -58,7 +58,7 @@ Start chatting
 ## Available OpenAI Models

 Jan automatically includes popular OpenAI models. In case you want to use a specific model that you
-cannot find in Jan, follow instructions in [Add Cloud Models](/docs/manage-models#add-models-1):
+cannot find in Jan, follow instructions in [Add Cloud Models](/docs/desktop/manage-models#add-models-1):
 - See list of available models in [OpenAI Platform](https://platform.openai.com/docs/models/overview).
 - The id property must match the model name in the list. For example, if you want to use the
 [GPT-4.5](https://platform.openai.com/docs/models/), you must set the id property
@ -76,7 +76,7 @@ Common issues and solutions:
 2. Connection Problems
 - Check your internet connection
 - Verify OpenAI's [system status](https://status.openai.com)
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
+- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)

 3. Model Unavailable
 - Confirm your API key has access to the model
--- a/docs/src/pages/docs/desktop/remote-models/openrouter.mdx
+++ b/docs/src/pages/docs/desktop/remote-models/openrouter.mdx
@ -88,7 +88,7 @@ Common issues and solutions:
 **2. Connection Problems**
 - Check your internet connection
 - Verify OpenRouter's [system status](https://status.openrouter.ai)
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
+- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)

 **3. Model Unavailable**
 - Confirm the model is currently available on OpenRouter
--- a/docs/src/pages/docs/desktop/server-settings.mdx
+++ b/docs/src/pages/docs/desktop/server-settings.mdx
@ -69,7 +69,7 @@ Click the gear icon next to any model to adjust how it behaves:
 - **Presence Penalty**: Encourages the model to use varied vocabulary

 <Callout type="info">
-For detailed explanations of these parameters, see our [Model Parameters Guide](/docs/model-parameters).
+For detailed explanations of these parameters, see our [Model Parameters Guide](/docs/desktop/model-parameters).
 </Callout>

 ## Hardware Monitoring
@ -117,7 +117,7 @@ Access privacy settings at **Settings** > **Privacy**:
 - Change this setting anytime

 <Callout type="info">
-See exactly what we collect (with your permission) in our [Privacy Policy](/docs/privacy).
+See exactly what we collect (with your permission) in our [Privacy Policy](/docs/desktop/privacy).
 </Callout>

 ![Analytics](./_assets/settings-07.png)
@ -174,7 +174,7 @@ This includes configuration for:
 - CORS (Cross-Origin Resource Sharing)
 - Verbose Logging

-[**Go to Local API Server Settings &rarr;**](/docs/api-server)
+[**Go to Local API Server Settings &rarr;**](/docs/desktop/api-server)

 ## Emergency Options

--- a/docs/src/pages/docs/desktop/server-troubleshooting.mdx
+++ b/docs/src/pages/docs/desktop/server-troubleshooting.mdx
@ -226,7 +226,7 @@ When models won't respond or show these errors:
 - **RAM:** Use models under 80% of available memory
  - 8GB system: Use models under 6GB
  - 16GB system: Use models under 13GB
- **Hardware:** Verify your system meets [minimum requirements](/docs/troubleshooting#step-1-verify-hardware-and-system-requirements)
+- **Hardware:** Verify your system meets [minimum requirements](/docs/desktop/troubleshooting#step-1-verify-hardware-and-system-requirements)

 **2. Adjust Model Settings**
 - Open model settings in the chat sidebar
@ -318,8 +318,8 @@ If these solutions don't work:
 - Include your logs and system info

 **3. Check Resources:**
- [System requirements](/docs/troubleshooting#step-1-verify-hardware-and-system-requirements)
- [Model compatibility guides](/docs/manage-models)
+- [System requirements](/docs/desktop/troubleshooting#step-1-verify-hardware-and-system-requirements)
+- [Model compatibility guides](/docs/desktop/manage-models)
 - [Hardware setup guides](/docs/desktop/)

 <Callout type="info">
--- a/docs/src/pages/docs/desktop/settings.mdx
+++ b/docs/src/pages/docs/desktop/settings.mdx
@ -68,7 +68,7 @@ Click the gear icon next to a model to configure advanced settings:
 - **Repeat Penalty**: Controls how strongly the model avoids repeating phrases (higher values reduce repetition)
 - **Presence Penalty**: Discourages reusing words that already appeared in the text (helps with variety)

-_See [Model Parameters](/docs/model-parameters) for a more detailed explanation._
+_See [Model Parameters](/docs/desktop/model-parameters) for a more detailed explanation._


 ## Hardware
@ -108,7 +108,7 @@ You can help improve Jan by sharing anonymous usage data:
 2. You can change this setting at any time

 <Callout type="info">
-Read more about that we collect with opt-in users at [Privacy](/docs/privacy).
+Read more about that we collect with opt-in users at [Privacy](/docs/desktop/privacy).
 </Callout>

 <br/>
--- a/docs/src/pages/docs/desktop/troubleshooting.mdx
+++ b/docs/src/pages/docs/desktop/troubleshooting.mdx
@ -328,19 +328,19 @@ This command ensures that the necessary permissions are granted for Jan's instal
 When you start a chat with a model and encounter a **Failed to Fetch** or **Something's Amiss** error, here are some possible solutions to resolve it:

 **1. Check System & Hardware Requirements**
- Hardware dependencies: Ensure your device meets all [hardware requirements](docs/troubleshooting#step-1-verify-hardware-and-system-requirements)
- OS: Ensure your operating system meets the minimum requirements ([Mac](/docs/desktop/mac#minimum-requirements), [Windows](/docs/desktop/windows#compatibility), [Linux](docs/desktop/linux#compatibility))
+- Hardware dependencies: Ensure your device meets all [hardware requirements](docs/desktop/troubleshooting#step-1-verify-hardware-and-system-requirements)
+- OS: Ensure your operating system meets the minimum requirements ([Mac](/docs/desktop/install/mac#minimum-requirements), [Windows](/docs/desktop/install/windows#compatibility), [Linux](/docs/desktop/install/linux#compatibility))
 - RAM: Choose models that use less than 80% of your available RAM
  - For 8GB systems: Use models under 6GB
  - For 16GB systems: Use models under 13GB

 **2. Check Model Parameters**
- In **Engine Settings** in right sidebar, check your `ngl` ([number of GPU layers](/docs/models/model-parameters#engine-parameters)) setting to see if it's too high
+- In **Engine Settings** in right sidebar, check your `ngl` ([number of GPU layers](/docs/desktop/models/model-parameters#engine-parameters)) setting to see if it's too high
 - Start with a lower NGL value and increase gradually based on your GPU memory

 **3. Port Conflicts**

-If you check your [app logs](/docs/troubleshooting#how-to-get-error-logs) & see "Bind address failed at 127.0.0.1:39291", check port availability:
+If you check your [app logs](/docs/desktop/troubleshooting#how-to-get-error-logs) & see "Bind address failed at 127.0.0.1:39291", check port availability:
 ```
 # Mac
 netstat -an | grep 39291
@ -371,7 +371,7 @@ This will delete all chat history, models, and settings.
 </Callout>

 **5. Try a clean installation**
- Uninstall Jan & clean Jan data folders ([Mac](/docs/desktop/mac#uninstall-jan), [Windows](/docs/desktop/windows#uninstall-jan), [Linux](docs/desktop/linux#uninstall-jan))
+- Uninstall Jan & clean Jan data folders ([Mac](/docs/desktop/install/mac#uninstall-jan), [Windows](/docs/desktop/install/windows#uninstall-jan), [Linux](/docs/desktop/install/linux#uninstall-jan))
 - Install the latest [stable release](/download)

 <Callout type="warning">
@ -392,7 +392,7 @@ The "Unexpected token" error usually relates to OpenAI API authentication or reg

 ## Need Further Support?
 If you can't find what you need in our troubleshooting guide, feel free reach out to us for extra help:
- **Copy** your [app logs](/docs/troubleshooting#how-to-get-error-logs)
+- **Copy** your [app logs](/docs/desktop/troubleshooting#how-to-get-error-logs)
 - Go to our [Discord](https://discord.com/invite/FTk2MvZwJH) & send it to **#🆘|jan-help** channel for further support.


--- a/docs/src/pages/post/_assets/clock-speed-effect.png
+++ b/docs/src/pages/post/_assets/clock-speed-effect.png
--- a/docs/src/pages/post/_assets/cover-kernel-benchmarking.png
+++ b/docs/src/pages/post/_assets/cover-kernel-benchmarking.png
--- a/docs/src/pages/post/_assets/exceed-sol.png
+++ b/docs/src/pages/post/_assets/exceed-sol.png
--- a/docs/src/pages/post/_assets/fixed-l2.png
+++ b/docs/src/pages/post/_assets/fixed-l2.png
--- a/docs/src/pages/post/_assets/ncu-bench.png
+++ b/docs/src/pages/post/_assets/ncu-bench.png
--- a/docs/src/pages/post/_assets/ncu-compare.png
+++ b/docs/src/pages/post/_assets/ncu-compare.png
--- a/docs/src/pages/post/_assets/small-timed-bug.png
+++ b/docs/src/pages/post/_assets/small-timed-bug.png
--- a/docs/src/pages/post/_assets/speechmatics-events.svg
+++ b/docs/src/pages/post/_assets/speechmatics-events.svg
--- a/docs/src/pages/post/_assets/speechmatics-too-fast.png
+++ b/docs/src/pages/post/_assets/speechmatics-too-fast.png
--- a/docs/src/pages/post/benchmarking-nvidia-tensorrt-llm.mdx
+++ b/docs/src/pages/post/benchmarking-nvidia-tensorrt-llm.mdx
@ -17,7 +17,7 @@ Jan now supports [NVIDIA TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) i
 We've been excited for TensorRT-LLM for a while, and [had a lot of fun implementing it](https://github.com/menloresearch/nitro-tensorrt-llm). As part of the process, we've run some benchmarks, to see how TensorRT-LLM fares on consumer hardware (e.g. [4090s](https://www.nvidia.com/en-us/geforce/graphics-cards/40-series/), [3090s](https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/)) we commonly see in the [Jan's hardware community](https://discord.com/channels/1107178041848909847/1201834752206974996).

 <Callout type="info" >
-  **Give it a try!** Jan's [TensorRT-LLM extension](/docs/built-in/tensorrt-llm) is available in Jan v0.4.9 and up ([see more](/docs/built-in/tensorrt-llm)). We precompiled some TensorRT-LLM models for you to try: `Mistral 7b`, `TinyLlama-1.1b`, `TinyJensen-1.1b` 😂
+  **Give it a try!** Jan's [TensorRT-LLM extension](/docs/desktop/built-in/tensorrt-llm) is available in Jan v0.4.9 and up ([see more](/docs/desktop/built-in/tensorrt-llm)). We precompiled some TensorRT-LLM models for you to try: `Mistral 7b`, `TinyLlama-1.1b`, `TinyJensen-1.1b` 😂

  Bugs or feedback? Let us know on [GitHub](https://github.com/menloresearch/jan) or via [Discord](https://discord.com/channels/1107178041848909847/1201832734704795688).
 </Callout>
--- a/docs/src/pages/post/deepresearch.mdx
+++ b/docs/src/pages/post/deepresearch.mdx
@ -126,7 +126,7 @@ any version with Model Context Protocol in it (>`v0.6.3`).
 **The Key: Assistants + Tools**

 Running deep research in Jan can be accomplished by combining [custom assistants](https://jan.ai/docs/assistants)
-with [MCP search tools](https://jan.ai/docs/mcp-examples/search/exa). This pairing allows any model—local or
+with [MCP search tools](https://jan.ai/docs/desktop/mcp-examples/search/exa). This pairing allows any model—local or
 cloud—to follow a systematic research workflow, to create a report similar to that of other providers, with some
 visible limitations (for now).

--- a/docs/src/pages/post/how-we-benchmark-kernels.mdx
+++ b/docs/src/pages/post/how-we-benchmark-kernels.mdx
@ -0,0 +1,268 @@
+---
+title: "How we (try to) benchmark GPU kernels accurately"
+description: "We present the process behind how we decided to benchmark GPU kernels and iteratively improved our benchmarking pipeline"
+tags: ""
+categories: research
+ogImage: "./_assets/cover-kernel-benchmarking.png"
+date: 2025-09-17
+---
+
+# How we (try to) benchmark GPU kernels accurately
+ 
+If you've read any other kernel benchmarking article before, then a lot of the information in this blogpost might be second-hand nature to you already. In fact, we would like to start off by thanking the blood, sweat and tears poured into many other kernel benchmarking guides made before us, which helped guide us in writing better benchmarking code, as well as in the creation of this blogpost.
+
+Over here at Menlo, we recently acquired some [RTX PRO 6000 Blackwell Workstation Edition](https://www.nvidia.com/en-sg/products/workstations/professional-desktop-gpus/rtx-pro-6000/), and we are trying to make LLM inference engines like [vLLM](https://github.com/vllm-project/vllm) run faster on it. We've been writing our own kernels specifically for the RTX PRO 6000, and seeing if we can improve inference times on our hardware.
+
+This blog will detail more of our own processes of how our ML Efficiency team identified problems in our benchmarking code, and how we iterated on it following the various cool benchmarking guides out there! Without further ado, let's start benchmarking from simple programs, to GPU kernels.
+
+
+## Introduction to kernels and benchmarking
+
+For those new to GPU programming, a [kernel](https://modal.com/gpu-glossary/device-software/kernel) is a piece of CUDA code that programmers write to execute a desired sequence of operations on the GPU. These kernels are launched once and is executed by threads that run concurrently, and we tend to launch these kernels from a [thread block grid](https://modal.com/gpu-glossary/device-software/thread-block-grid), which executes our kernel across multiple [Streaming Multiprocessors (SMs)](https://modal.com/gpu-glossary/device-hardware/streaming-multiprocessor) across the whole GPU.
+
+Benchmarking is a fundamental aspect of high-performance computing. It enables us to quantitatively compare kernel performance across different problem sizes and understand how various hyperparameters impact execution speed. For GPU kernel development, benchmarking serves to assist us in iteratively optimizing our kernels to make them utilize the GPU better. 
+
+That being said, **accurate kernel benchmarking** is much more important, as benchmarking kernels that run on the GPU can become very complex, and there are many traps one might fall into if not enough care is taken when writing benchmarking scripts. A great alternative is to use tools that NVIDIA offers via their [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit), such as [Nsight CUDA Profiling Tools Interface](https://developer.nvidia.com/cupti) (cupti) or use the [Nsight Compute CLI](https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html) (`ncu`), which provide accurate measurements to benchmarking various characteristics of kernels. For us, we wanted to use Python as it made it more convenient to sweep across different problem shapes and kernels quickly, but that meant we also had to learn how to properly benchmark kernels from scratch.
+
+We will showcase some examples of how we can benchmark kernels on the GPU. Moreover, we have chosen Python for most of our benchmarking code, as most of our own codebase is in Python, making it simple to integrate in.
+
+## Benchmarking CUDA programs
+Pytorch provides a very basic API to help time `torch` programs, by following along this [tutorial](https://docs.pytorch.org/tutorials/recipes/recipes/benchmark.html).
+
+We can see a basic implementation could be something as simple as:
+
+```python
+def batched_dot_mul_sum(a, b):
+    '''Computes batched dot by multiplying and summing'''
+    return a.mul(b).sum(-1)
+
+num_threads = torch.get_num_threads()
+print(f'Benchmarking on {num_threads} threads')
+
+t0 = benchmark.Timer(
+    stmt='batched_dot_mul_sum(x, x)',
+    setup='from __main__ import batched_dot_mul_sum',
+    globals={'x': x},
+    num_threads=num_threads,
+    label='Multithreaded batch dot',
+    sub_label='Implemented using mul and sum')
+```
+
+When benchmarking kernels, there are a few tips that we should do, in order to ensure that we accurately benchmark our kernels.
+
+### 1.  Always benchmark the code not with settings from your machine, but with **settings the user will see**.
+  Benchmarking how fast your kernels take on a 3090 is meaningless if you are serving your models on a H100 DGX node. It is always a good idea to benchmark your kernels on the hardware you plan to serve on.
+
+### 2. **Warmup your kernels** 
+Take a look at this snippet from the tutorial.
+  ```
+   mul_sum(x, x):   27.6 μs
+   mul_sum(x, x):   25.3 μs
+   bmm(x, x):      2775.5 μs
+   bmm(x, x):       22.4 μs
+  ```
+  The first kernel for `bmm` takes way longer to run. This is because most of the time is spent loading [cuBLAS](https://developer.nvidia.com/cublas) kernels when running for the first time. 
+
+  Warming up your kernels can be as simple as running the kernel before timing it. This helps to preliminarily load these kernels so that we only measure how long it takes for the kernel to run.
+
+### 3. `torch.cuda.synchronize` and CUDA Events
+
+Now we'll also introduce a new API, which is the standard to benchmarking kernels. [CUDA events](https://docs.pytorch.org/docs/stable/generated/torch.cuda.Event.html) are awesome for a variety of reasons. The simplest reason is that it measures timing from the perspective from the GPU, whereas `time.time()` and `time.perf_counter()` measure time from the perspective of the CPU.
+
+Moreover, its simplistic API allows you to call benchmarking code like this:
+
+```python
+steps = 10
+start_events = [torch.cuda.Event(enable_timing=True) for _ in range(steps)]
+end_events = [torch.cuda.Event(enable_timing=True) for _ in range(steps)]
+
+for i in range(steps):
+    start_events[i].record()
+    run_kernel()
+    end_events[i].record()
+
+torch.cuda.synchronize()
+times = [s.elapsed_time(e) for s, e in zip(start_events, end_events)]
+```
+
+The `torch.cuda.synchronize` tells the CPU to wait for the work on the GPU to finish, so that it can calculate the elapsed time after synchronization, which can be visualised here:
+
+![image](./_assets/speechmatics-events.svg)
+_Figure 1: Illustration taken from https://www.speechmatics.com/company/articles-and-news/timing-operations-in-pytorch_
+
+
+### 4. Flush your L2 Cache
+
+#### What is the L2 Cache
+When data is read or written from [HBM or GDDR](https://www.exxactcorp.com/blog/hpc/gddr6-vs-hbm-gpu-memory), it goes through the [L2 cache](https://docs.nvidia.com/cuda/cuda-c-programming-guide/#architecture) first, which is shared by all [streaming multiprocessors (SM)](https://modal.com/gpu-glossary/device-hardware/streaming-multiprocessor). This L2 cache caches data accesses to local and global memory, and helps reuse data instead of loading it to shared memory again (which can be slow!).
+
+Moreover, unlike the L1 cache, which is present on each SM, all SMs share the same L2 cache!
+
+
+#### Why we need to flush the L2 Cache
+
+Following this [guide](https://guillesanbri.com/CUDA-Benchmarks/#caches), if you had warmed up or run the kernel before, some of the intermediate data might be stored on the L2 cache, which means the kernel might be deceptively faster. 
+
+However, in a real world setting, you want to measure the time it takes realistically for the kernel to run, and more often than not, when running large models, you will be running more than 1 kernel. This means your cache will probably thrash often, and not have data from a specific kernel stored for reuse. Thus, to simulate this behaviour, we would flush the L2 cache beforehand to eliminate "help" from the L2 cache.
+
+Moreover, this also makes it much easier when calculating data reuse for the kernel, as now any L2 cache usage is independent of other kernels or runs.
+
+#### Example of not flushing L2 cache
+Previously when we were initially benchmarking our kernels, we had a small mistake of not flushing the L2 cache.
+
+![image](./_assets/exceed-sol.png)
+_Figure 2: Our SOL %  (which is a percentage of our observed maximum speed) is over 100% for the row for shape [2, 19456, 2560]._
+
+#### How to flush the L2 Cache
+
+To flush it, we should add the following lines:
+
+```python
+l2_size = torch.cuda.get_device_properties().L2_cache_size
+cache = torch.empty(l2_size, dtype=torch.uint8, device="cuda")
+#<your benchmarking code here>
+cache.zero_()  # flush L2 cache
+
+# You should flush your L2 cache within the benchmarking code if you're repeating the same process multiple times
+```
+
+This instantiates data the size of the L2 cache, and by zeroing it in place, we call a write operation which goes through the L2 cache and flushes it.
+
+After flushing the L2 cache, we get a more sensible result here:
+
+![image](./_assets/fixed-l2.png)
+_Figure 3: New SOL% has all values under 100% now after flushing L2 cache._
+
+### 5. Timing short-lived kernels
+
+Initially, we used [Triton's](https://triton-lang.org/main/getting-started/installation.html) [`do_bench`](https://triton-lang.org/main/python-api/generated/triton.testing.do_bench.html) for benchmarking, as it has done everything we have mentioned above, such as warmup, CUDA Events and flushing L2 cache. However, we observed an issue with accurately benchmarking our kernels on smaller shapes. On smaller shapes, the kernel might be too fast, so it may finish before CPU issues a CUDA end event in Python.
+
+![image](./_assets/speechmatics-too-fast.png)
+_Figure 4: Taken from [Speechmatics](https://www.speechmatics.com/company/articles-and-news/timing-operations-in-pytorch), kernel is faster than CUDA event end launch, and therefore the true timing for the kernel is not recorded._
+
+This results in kernels that look very slow:
+
+![image](./_assets/small-timed-bug.png)
+_Figure 5: Side by side comparison of Python benchmark latencies vs `ncu`'s timing (right) for shape [2, 19456,2560]. `ncu` records a much faster duration of 71.36 μs compared to Python's 103.9 μs_
+
+To fix this, we wrote a custom `do_bench_cuda()`, that inserts a dummy, untimed FP32 matmul before benchmarking each shape, so that the CPU has enough time to enqueue the CUDA end event.
+
+This led to more accurate latencies for our small M kernels.
+
+![image](./_assets/fixed-l2.png)
+_Figure 6: There is a significant improvement in SOL% after inserting the dummy matmul._
+
+We then also repeat the benchmark function for each shape on 5 copies of input/output data to make the CUDA event duration longer.
+
+In the end, this is our `do_bench_cuda` function that we used to benchmark our kernels.
+
+```python
+import statistics
+
+import torch
+
+def do_bench_cuda(f, n_warmup: int = 10, n_repeats: int = 20):
+    l2_size = torch.cuda.get_device_properties().L2_cache_size
+    cache = torch.empty(l2_size, dtype=torch.uint8, device="cuda")
+
+    # Matmul in case of short lived CUDA Events
+    A = torch.randn(4096, 4096, dtype=torch.float32, device="cuda")
+    B = torch.randn(4096, 4096, dtype=torch.float32, device="cuda")
+    A @ B
+
+    # L2 cache flush + Warmup
+    for _ in range(n_warmup):
+        cache.zero_()
+        f()
+
+    start_list = [torch.cuda.Event(enable_timing=True) for _ in range(n_repeats)]
+    end_list = [torch.cuda.Event(enable_timing=True) for _ in range(n_repeats)]
+
+    torch.cuda.synchronize()
+
+    for start, end in zip(start_list, end_list):
+        cache.zero_()  # flush L2 cache
+        A @ B  # add a heavy task to fill GPU pipeline
+
+        start.record()
+        f()
+        end.record()
+
+    torch.cuda.synchronize()
+    timings = [start.elapsed_time(end) for start, end in zip(start_list, end_list)]
+    return statistics.median(timings)
+```
+
+
+
+### 6. Clock Speed
+
+This was a silent problem, and it was very hard to find out that this was causing issues on our kernels. We initially found discrepancies between `ncu`'s latency (676.64 μs) and `do_bench_cuda` latency (535 μs) when profiling on the shape [2048, 19456, 2560], as `do_bench` was reporting ~140 μs faster timings than `ncu`'s latency.
+
+As seen, although most of our codebase for benchmarking kernels in Python, developer errors might occur, and it is always good to have a point of reference for the accurate kernel timing. Nsight Compute CLI (`ncu` for short) is a tool that can help us measure the latency of our kernels accurately, and the values obtained from here is a good figure to sanity-check our own benchmarking code. 
+
+#### 6.1 Clock Speed
+
+Firstly, we suspected that clock speed could play a part in causing the discrepancy between `ncu`'s timings and our own benchmarking code. Clock speed can affect benchmarking times as it is the rate at which the GPU's processing units operate, and a higher clock speed translates to more operations per second, which can both speed up and slow down the kernel depending on how it was implemented.
+
+![image](./_assets/clock-speed-effect.png)
+_Figure 7: Taken from [GPU Mode Lecture 56](https://www.youtube.com/watch?v=CtrqBmYtSEk). We can see clock speed affects kernel performance. For problem shape of 1024, it got faster after increasing clock speed, while for problem shape of 384, it became slower after clock speed increased._
+
+Looking at this [forum post](https://forums.developer.nvidia.com/t/nsight-compute-clock-speed-during-profiling/208646/3), we realised that one of the issues causing the discrepancy was because `ncu` by default locks the clock speed to the GPU base clock speed. We tried investigating by locking the clock speed to base clock speed, and also tried locking to max clock speed using `nvidia-smi -ac=<memClk>,<smClk>`. According to the GPU Mode lecture, this was not a proper solution.
+
+This is due to the following reasons:
+- Locking to max clock speed doesn't help as it just sets the ceiling of the GPU performance, our GPU can always go back to base clock speed of ~ 2287 Hz instead of boosted clock speed of 2617 Hz.
+
+- Locking to base clock speed is also not meaningful, as it does not properly reflect the performance and experience users will get on our kernels, which at best will run on boosted clock speed.
+
+However, we did find out that we should set the `ncu` `--clock-control` to `None`, so that it would not limit itself to just the base clock speeds. This helped improve the latency on `ncu` from 676.64 μs to 575 μs when profiled on the same problem shape  of [2048, 19456, 2560].
+
+#### 6.2 Discrepancies after `clock-control`
+
+At the time of writing, we have observed that `ncu` sometimes gives different latency results on the same benchmarking code with the same problem shapes. The cause of this is because when we set `clock-control` speed to `None`, the GPU clock speed is stochastic, and therefore affect the latency of the kernels measured. A more holistic approach would be to also benchmark kernels across different fixed clock speeds. 
+![image](./_assets/ncu-compare.png)
+_Figure 8: On the same benchmarking code and problem shapes, we can see vast deviations in duration, which is caused by the differences in SM Frequency. This resonates with the graphs shown in Figure 7._
+
+As a result, there can be some discrepancy in `ncu`'s and our own benchmark timings. To figure out if your discrepancy is caused by the SM frequency, you can use the relationship
+that FLOPS is directly proportional to SM clock, so their durations are inversely proportional.
+
+In our case:
+`544 / 2.14 (SM freq for 575 μs kernel) * 2.28 (SM freq for 544 μs kernel) = ~579`, so most of the discrepancy was coming from the SM frequency being different. 
+
+The final command we used was:
+
+`ncu -s 5 -k $kernel_name --clock-control none python3 benchmarks/bench_mm.py --profile 2048 19456 2560`
+
+Explanation of arguments:
+`-s`: Number of kernels skipped 
+`-k`: name of kernel to profile
+`--clock-control`: Whether clock speed is controlled 
+
+Below is a side to side comparison of `ncu`'s benchmarked latency and our script after all the adjustments made.
+
+![image](./_assets/ncu-bench.png)
+_Figure 9: Side by side comparison of the above `ncu` command (left) (measuring shape [2048,19456,2560]) with our own Python benchmarking script (right). We can see at most 10us difference between `Duration` in `ncu` and our benchmarking script's `Latency (us)` measurement._
+
+## Conclusion and TLDR;
+
+TLDR, when benchmarking
+1. Make sure to use the hardware you intend to deploy on
+2. Warmup before benchmarking your kernels
+3. Use CUDA events
+4. Flush your L2 Cache
+5. Use a dummy matmul to make timings more accurate for short lived kernels
+6. Ensure your clock speed doesn't cause inconsistent readings
+
+We hope that this helps anyone who are interested in benchmarking their own kernels, or are interested in how GPU kernels are benchmarked. Happy benchmarking!
+
+### Acknowledgements and Related Resources:
+
+We would like to give thanks and credit to the many resources and guides that we used in our own journey of finding out how to best benchmark our kernels on our GPUs, and a lot of our work would probably not be possible if it was not for these amazing guides.
+
+- GPU Mode Lecture 56, presented by Georgii Evtushenko : https://www.youtube.com/watch?v=CtrqBmYtSEk
+- https://www.spatters.ca/mma-matmul (Benchmarking using ncu for matrix multiplications)
+- https://www.speechmatics.com/company/articles-and-news/timing-operations-in-pytorch (CUDA Events)
+- https://guillesanbri.com/CUDA-Benchmarks/ (Good resource for introduction to benchmarking)
+- https://modal.com/gpu-glossary/device-hardware/cuda-device-architecture (Glossary of Architecture in general)
+- https://docs.nvidia.com/cuda/cuda-c-programming-guide/#global-memory-5-x (L2 cache explanation)
+- https://cvw.cac.cornell.edu/gpu-architecture/gpu-memory/memory_types (L1 vs L2 cache)
--- a/extensions-web/package.json
+++ b/extensions-web/package.json
@ -22,16 +22,16 @@
  },
  "devDependencies": {
    "@janhq/core": "workspace:*",
-    "typescript": "^5.3.3",
-    "vite": "^5.0.0",
-    "vitest": "^2.0.0",
-    "zustand": "^5.0.8"
+    "typescript": "5.9.2",
+    "vite": "5.4.20",
+    "vitest": "2.1.9",
+    "zustand": "5.0.8"
  },
  "peerDependencies": {
    "@janhq/core": "*",
-    "zustand": "^5.0.0"
+    "zustand": "5.0.3"
  },
  "dependencies": {
-    "@modelcontextprotocol/sdk": "^1.17.5"
+    "@modelcontextprotocol/sdk": "1.17.5"
  }
 }
--- a/extensions-web/src/jan-provider-web/provider.ts
+++ b/extensions-web/src/jan-provider-web/provider.ts
@ -16,14 +16,20 @@ import {
 import { janApiClient, JanChatMessage } from './api'
 import { janProviderStore } from './store'

+// Jan models support tools via MCP
+const JAN_MODEL_CAPABILITIES = ['tools'] as const
+
 export default class JanProviderWeb extends AIEngine {
  readonly provider = 'jan'
  private activeSessions: Map<string, SessionInfo> = new Map()

  override async onLoad() {
    console.log('Loading Jan Provider Extension...')
-    
+
    try {
+      // Check and clear invalid Jan models (capabilities mismatch)
+      this.validateJanModelsLocalStorage()
+
      // Initialize authentication and fetch models
      await janApiClient.initialize()
      console.log('Jan Provider Extension loaded successfully')
@ -35,22 +41,93 @@ export default class JanProviderWeb extends AIEngine {
    super.onLoad()
  }

+  // Verify Jan models capabilities in localStorage
+  private validateJanModelsLocalStorage() {
+    try {
+      console.log("Validating Jan models in localStorage...")
+      const storageKey = 'model-provider'
+      const data = localStorage.getItem(storageKey)
+      if (!data) return
+
+      const parsed = JSON.parse(data)
+      if (!parsed?.state?.providers) return
+
+      // Check if any Jan model has incorrect capabilities
+      let hasInvalidModel = false
+
+      for (const provider of parsed.state.providers) {
+        if (provider.provider === 'jan' && provider.models) {
+          for (const model of provider.models) {
+            console.log(`Checking Jan model: ${model.id}`, model.capabilities)
+            if (JSON.stringify(model.capabilities) !== JSON.stringify(JAN_MODEL_CAPABILITIES)) {
+              hasInvalidModel = true
+              console.log(`Found invalid Jan model: ${model.id}, clearing localStorage`)
+              break
+            }
+          }
+        }
+        if (hasInvalidModel) break
+      }
+
+      // If any invalid model found, just clear the storage
+      if (hasInvalidModel) {
+        // Force clear the storage
+        localStorage.removeItem(storageKey)
+        // Verify it's actually removed
+        const afterRemoval = localStorage.getItem(storageKey)
+        // If still present, try setting to empty state
+        if (afterRemoval) {
+          // Try alternative clearing method
+          localStorage.setItem(storageKey, JSON.stringify({ state: { providers: [] }, version: parsed.version || 3 }))
+        }
+        console.log('Cleared model-provider from localStorage due to invalid Jan capabilities')
+        // Force a page reload to ensure clean state
+        window.location.reload()
+      }
+    } catch (error) {
+      console.error('Failed to check Jan models:', error)
+    }
+  }
+
  override async onUnload() {
    console.log('Unloading Jan Provider Extension...')
-    
+
    // Clear all sessions
    for (const sessionId of this.activeSessions.keys()) {
      await this.unload(sessionId)
    }
-    
+
    janProviderStore.reset()
    console.log('Jan Provider Extension unloaded')
  }

+  async get(modelId: string): Promise<modelInfo | undefined> {
+    return janApiClient
+      .getModels()
+      .then((list) => list.find((e) => e.id === modelId))
+      .then((model) =>
+        model
+          ? {
+              id: model.id,
+              name: model.id, // Use ID as name for now
+              quant_type: undefined,
+              providerId: this.provider,
+              port: 443, // HTTPS port for API
+              sizeBytes: 0, // Size not provided by Jan API
+              tags: [],
+              path: undefined, // Remote model, no local path
+              owned_by: model.owned_by,
+              object: model.object,
+              capabilities: [...JAN_MODEL_CAPABILITIES],
+            }
+          : undefined
+      )
+  }
+
  async list(): Promise<modelInfo[]> {
    try {
      const janModels = await janApiClient.getModels()
-      
+
      return janModels.map((model) => ({
        id: model.id,
        name: model.id, // Use ID as name for now
@ -62,7 +139,7 @@ export default class JanProviderWeb extends AIEngine {
        path: undefined, // Remote model, no local path
        owned_by: model.owned_by,
        object: model.object,
-        capabilities: ['tools'], // Jan models support both tools via MCP
+        capabilities: [...JAN_MODEL_CAPABILITIES],
      }))
    } catch (error) {
      console.error('Failed to list Jan models:', error)
@ -75,7 +152,7 @@ export default class JanProviderWeb extends AIEngine {
      // For Jan API, we don't actually "load" models in the traditional sense
      // We just create a session reference for tracking
      const sessionId = `jan-${modelId}-${Date.now()}`
-      
+
      const sessionInfo: SessionInfo = {
        pid: Date.now(), // Use timestamp as pseudo-PID
        port: 443, // HTTPS port
@ -85,8 +162,10 @@ export default class JanProviderWeb extends AIEngine {
      }

      this.activeSessions.set(sessionId, sessionInfo)
-      
-      console.log(`Jan model session created: ${sessionId} for model ${modelId}`)
+
+      console.log(
+        `Jan model session created: ${sessionId} for model ${modelId}`
+      )
      return sessionInfo
    } catch (error) {
      console.error(`Failed to load Jan model ${modelId}:`, error)
@ -97,23 +176,23 @@ export default class JanProviderWeb extends AIEngine {
  async unload(sessionId: string): Promise<UnloadResult> {
    try {
      const session = this.activeSessions.get(sessionId)
-      
+
      if (!session) {
        return {
          success: false,
-          error: `Session ${sessionId} not found`
+          error: `Session ${sessionId} not found`,
        }
      }

      this.activeSessions.delete(sessionId)
      console.log(`Jan model session unloaded: ${sessionId}`)
-      
+
      return { success: true }
    } catch (error) {
      console.error(`Failed to unload Jan session ${sessionId}:`, error)
      return {
        success: false,
-        error: error instanceof Error ? error.message : 'Unknown error'
+        error: error instanceof Error ? error.message : 'Unknown error',
      }
    }
  }
@ -136,9 +215,12 @@ export default class JanProviderWeb extends AIEngine {
      }

      // Convert core chat completion request to Jan API format
-      const janMessages: JanChatMessage[] = opts.messages.map(msg => ({
+      const janMessages: JanChatMessage[] = opts.messages.map((msg) => ({
        role: msg.role as 'system' | 'user' | 'assistant',
-        content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
+        content:
+          typeof msg.content === 'string'
+            ? msg.content
+            : JSON.stringify(msg.content),
      }))

      const janRequest = {
@ -162,18 +244,18 @@ export default class JanProviderWeb extends AIEngine {
      } else {
        // Return single response
        const response = await janApiClient.createChatCompletion(janRequest)
-        
+
        // Check if aborted after completion
        if (abortController?.signal?.aborted) {
          throw new Error('Request was aborted')
        }
-        
+
        return {
          id: response.id,
          object: 'chat.completion' as const,
          created: response.created,
          model: response.model,
-          choices: response.choices.map(choice => ({
+          choices: response.choices.map((choice) => ({
            index: choice.index,
            message: {
              role: choice.message.role,
@ -182,7 +264,12 @@ export default class JanProviderWeb extends AIEngine {
              reasoning_content: choice.message.reasoning_content,
              tool_calls: choice.message.tool_calls,
            },
-            finish_reason: (choice.finish_reason || 'stop') as 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call',
+            finish_reason: (choice.finish_reason || 'stop') as
+              | 'stop'
+              | 'length'
+              | 'tool_calls'
+              | 'content_filter'
+              | 'function_call',
          })),
          usage: response.usage,
        }
@ -193,7 +280,10 @@ export default class JanProviderWeb extends AIEngine {
    }
  }

-  private async *createStreamingGenerator(janRequest: any, abortController?: AbortController) {
+  private async *createStreamingGenerator(
+    janRequest: any,
+    abortController?: AbortController
+  ) {
    let resolve: () => void
    let reject: (error: Error) => void
    const chunks: any[] = []
@ -231,7 +321,7 @@ export default class JanProviderWeb extends AIEngine {
            object: chunk.object,
            created: chunk.created,
            model: chunk.model,
-            choices: chunk.choices.map(choice => ({
+            choices: chunk.choices.map((choice) => ({
              index: choice.index,
              delta: {
                role: choice.delta.role,
@ -261,14 +351,14 @@ export default class JanProviderWeb extends AIEngine {
        if (abortController?.signal?.aborted) {
          throw new Error('Request was aborted')
        }
-        
+
        while (yieldedIndex < chunks.length) {
          yield chunks[yieldedIndex]
          yieldedIndex++
        }
-        
+
        // Wait a bit before checking again
-        await new Promise(resolve => setTimeout(resolve, 10))
+        await new Promise((resolve) => setTimeout(resolve, 10))
      }

      // Yield any remaining chunks
@ -291,24 +381,38 @@ export default class JanProviderWeb extends AIEngine {
  }

  async delete(modelId: string): Promise<void> {
-    throw new Error(`Delete operation not supported for remote Jan API model: ${modelId}`)
+    throw new Error(
+      `Delete operation not supported for remote Jan API model: ${modelId}`
+    )
+  }
+
+  async update(modelId: string, model: Partial<modelInfo>): Promise<void> {
+    throw new Error(
+      `Update operation not supported for remote Jan API model: ${modelId}`
+    )
  }

  async import(modelId: string, _opts: ImportOptions): Promise<void> {
-    throw new Error(`Import operation not supported for remote Jan API model: ${modelId}`)
+    throw new Error(
+      `Import operation not supported for remote Jan API model: ${modelId}`
+    )
  }

  async abortImport(modelId: string): Promise<void> {
-    throw new Error(`Abort import operation not supported for remote Jan API model: ${modelId}`)
+    throw new Error(
+      `Abort import operation not supported for remote Jan API model: ${modelId}`
+    )
  }

  async getLoadedModels(): Promise<string[]> {
-    return Array.from(this.activeSessions.values()).map(session => session.model_id)
+    return Array.from(this.activeSessions.values()).map(
+      (session) => session.model_id
+    )
  }

  async isToolSupported(modelId: string): Promise<boolean> {
    // Jan models support tool calls via MCP
-    console.log(`Checking tool support for Jan model ${modelId}: supported`);
-    return true;
+    console.log(`Checking tool support for Jan model ${modelId}: supported`)
+    return true
  }
-}
+}
--- a/extensions-web/src/shared/auth/service.ts
+++ b/extensions-web/src/shared/auth/service.ts
@ -48,6 +48,18 @@ export class JanAuthService {
   * Called on app load to check existing session
   */
  async initialize(): Promise<void> {
+    // Ensure refreshtoken is valid (in case of expired session or secret change)
+    try {
+      await refreshToken()
+    } catch (error) {
+      console.log('Failed to refresh token on init:', error)
+      // If refresh fails, logout to clear any invalid state
+      console.log('Logging out and clearing auth state to clear invalid session...')
+      await logoutUser()
+      this.clearAuthState()
+      this.authBroadcast.broadcastLogout()
+    }
+    // Authentication state check
    try {
      if (!this.isAuthenticated()) {
        // Not authenticated - ensure guest access
--- a/extensions/assistant-extension/package.json
+++ b/extensions/assistant-extension/package.json
@ -12,11 +12,11 @@
    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
  },
  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^6.0.1",
+    "cpx": "1.5.0",
+    "rimraf": "6.0.1",
    "rolldown": "1.0.0-beta.1",
-    "run-script-os": "^1.1.6",
-    "typescript": "^5.3.3"
+    "run-script-os": "1.1.6",
+    "typescript": "5.9.2"
  },
  "dependencies": {
    "@janhq/core": "../../core/package.tgz",
--- a/extensions/conversational-extension/package.json
+++ b/extensions/conversational-extension/package.json
@ -15,11 +15,11 @@
    "./main": "./dist/module.js"
  },
  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^6.0.1",
+    "cpx": "1.5.0",
+    "rimraf": "6.0.1",
    "rolldown": "1.0.0-beta.1",
    "ts-loader": "^9.5.0",
-    "typescript": "^5.7.2"
+    "typescript": "5.9.2"
  },
  "dependencies": {
    "@janhq/core": "../../core/package.tgz"
--- a/extensions/download-extension/package.json
+++ b/extensions/download-extension/package.json
@ -12,12 +12,12 @@
    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
  },
  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^6.0.1",
+    "cpx": "1.5.0",
+    "rimraf": "6.0.1",
    "rolldown": "1.0.0-beta.1",
-    "run-script-os": "^1.1.6",
+    "run-script-os": "1.1.6",
    "typescript": "5.8.3",
-    "vitest": "^3.0.6"
+    "vitest": "3.2.4"
  },
  "files": [
    "dist/*",
@ -26,7 +26,7 @@
  ],
  "dependencies": {
    "@janhq/core": "../../core/package.tgz",
-    "@tauri-apps/api": "^2.5.0"
+    "@tauri-apps/api": "2.8.0"
  },
  "bundleDependencies": [],
  "installConfig": {
--- a/extensions/llamacpp-extension/package.json
+++ b/extensions/llamacpp-extension/package.json
@ -17,24 +17,24 @@
    "test:coverage": "vitest run --coverage"
  },
  "devDependencies": {
-    "@vitest/ui": "^3.2.4",
-    "cpx": "^1.5.0",
-    "jsdom": "^26.1.0",
-    "rimraf": "^3.0.2",
+    "@vitest/ui": "2.1.9",
+    "cpx": "1.5.0",
+    "jsdom": "26.1.0",
+    "rimraf": "3.0.2",
    "rolldown": "1.0.0-beta.1",
    "ts-loader": "^9.5.0",
-    "typescript": "^5.7.2",
-    "vitest": "^3.2.4"
+    "typescript": "5.9.2",
+    "vitest": "3.2.4"
  },
  "dependencies": {
    "@janhq/core": "../../core/package.tgz",
    "@janhq/tauri-plugin-hardware-api": "link:../../src-tauri/plugins/tauri-plugin-hardware",
    "@janhq/tauri-plugin-llamacpp-api": "link:../../src-tauri/plugins/tauri-plugin-llamacpp",
-    "@tauri-apps/api": "^2.5.0",
-    "@tauri-apps/plugin-http": "^2.5.1",
+    "@tauri-apps/api": "2.8.0",
+    "@tauri-apps/plugin-http": "2.5.0",
    "@tauri-apps/plugin-log": "^2.6.0",
    "fetch-retry": "^5.0.6",
-    "ulidx": "^2.3.0"
+    "ulidx": "2.4.1"
  },
  "engines": {
    "node": ">=18.0.0"
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@ -96,18 +96,6 @@
      "textAlign": "right"
    }
  },
-  {
-    "key": "batch_size",
-    "title": "Batch Size",
-    "description": "Logical maximum batch size for processing prompts.",
-    "controllerType": "input",
-    "controllerProps": {
-      "value": 2048,
-      "placeholder": "2048",
-      "type": "number",
-      "textAlign": "right"
-    }
-  },
  {
    "key": "ubatch_size",
    "title": "uBatch Size",
--- a/extensions/llamacpp-extension/src/backend.ts
+++ b/extensions/llamacpp-extension/src/backend.ts
@ -46,7 +46,6 @@ export async function getLocalInstalledBackends(): Promise<
      }
    }
  }
-  console.debug(local)
  return local
 }

@ -319,7 +318,10 @@ export async function downloadBackend(
    events.emit('onFileDownloadSuccess', { modelId: taskId, downloadType })
  } catch (error) {
    // Fallback: if GitHub fails, retry once with CDN
-    if (source === 'github') {
+    if (
+      source === 'github' &&
+      error?.toString() !== 'Error: Download cancelled'
+    ) {
      console.warn(`GitHub download failed, falling back to CDN:`, error)
      return await downloadBackend(backend, version, 'cdn')
    }
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -37,7 +37,13 @@ import {
 import { invoke } from '@tauri-apps/api/core'
 import { getProxyConfig } from './util'
 import { basename } from '@tauri-apps/api/path'
-import { readGgufMetadata } from '@janhq/tauri-plugin-llamacpp-api'
+import {
+  readGgufMetadata,
+  estimateKVCacheSize,
+  getModelSize,
+  isModelSupported,
+  planModelLoadInternal,
+} from '@janhq/tauri-plugin-llamacpp-api'
 import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'

 // Error message constant - matches web-app/src/utils/error.ts
@ -82,6 +88,7 @@ type ModelPlan = {
  maxContextLength: number
  noOffloadKVCache: boolean
  offloadMmproj?: boolean
+  batchSize: number
  mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
 }

@ -922,6 +929,30 @@ export default class llamacpp_extension extends AIEngine {
    return hash
  }

+  override async get(modelId: string): Promise<modelInfo | undefined> {
+    const modelPath = await joinPath([
+      await this.getProviderPath(),
+      'models',
+      modelId,
+    ])
+    const path = await joinPath([modelPath, 'model.yml'])
+
+    if (!(await fs.existsSync(path))) return undefined
+
+    const modelConfig = await invoke<ModelConfig>('read_yaml', {
+      path,
+    })
+
+    return {
+      id: modelId,
+      name: modelConfig.name ?? modelId,
+      quant_type: undefined, // TODO: parse quantization type from model.yml or model.gguf
+      providerId: this.provider,
+      port: 0, // port is not known until the model is loaded
+      sizeBytes: modelConfig.size_bytes ?? 0,
+    } as modelInfo
+  }
+
  // Implement the required LocalProvider interface methods
  override async list(): Promise<modelInfo[]> {
    const modelsDir = await joinPath([await this.getProviderPath(), 'models'])
@ -1085,7 +1116,10 @@ export default class llamacpp_extension extends AIEngine {
    const archiveName = await basename(path)
    logger.info(`Installing backend from path: ${path}`)

-    if (!(await fs.existsSync(path)) || (!path.endsWith('tar.gz') && !path.endsWith('zip'))) {
+    if (
+      !(await fs.existsSync(path)) ||
+      (!path.endsWith('tar.gz') && !path.endsWith('zip'))
+    ) {
      logger.error(`Invalid path or file ${path}`)
      throw new Error(`Invalid path or file ${path}`)
    }
@ -1121,6 +1155,49 @@ export default class llamacpp_extension extends AIEngine {
    }
  }

+  /**
+   * Update a model with new information.
+   * @param modelId
+   * @param model
+   */
+  async update(modelId: string, model: Partial<modelInfo>): Promise<void> {
+    const modelFolderPath = await joinPath([
+      await this.getProviderPath(),
+      'models',
+      modelId,
+    ])
+    const modelConfig = await invoke<ModelConfig>('read_yaml', {
+      path: await joinPath([modelFolderPath, 'model.yml']),
+    })
+    const newFolderPath = await joinPath([
+      await this.getProviderPath(),
+      'models',
+      model.id,
+    ])
+    // Check if newFolderPath exists
+    if (await fs.existsSync(newFolderPath)) {
+      throw new Error(`Model with ID ${model.id} already exists`)
+    }
+    const newModelConfigPath = await joinPath([newFolderPath, 'model.yml'])
+    await fs.mv(modelFolderPath, newFolderPath).then(() =>
+      // now replace what values have previous model name with format
+      invoke('write_yaml', {
+        data: {
+          ...modelConfig,
+          model_path: modelConfig?.model_path?.replace(
+            `${this.providerId}/models/${modelId}`,
+            `${this.providerId}/models/${model.id}`
+          ),
+          mmproj_path: modelConfig?.mmproj_path?.replace(
+            `${this.providerId}/models/${modelId}`,
+            `${this.providerId}/models/${model.id}`
+          ),
+        },
+        savePath: newModelConfigPath,
+      })
+    )
+  }
+
  override async import(modelId: string, opts: ImportOptions): Promise<void> {
    const isValidModelId = (id: string) => {
      // only allow alphanumeric, underscore, hyphen, and dot characters in modelId
@ -1979,11 +2056,6 @@ export default class llamacpp_extension extends AIEngine {
    return responseData as EmbeddingResponse
  }

-  // Optional method for direct client access
-  override getChatClient(sessionId: string): any {
-    throw new Error('method not implemented yet')
-  }
-
  /**
   * Check if a tool is supported by the model
   * Currently read from GGUF chat_template
@ -2046,7 +2118,7 @@ export default class llamacpp_extension extends AIEngine {
    path: string,
    meta: Record<string, string>
  ): Promise<{ layerSize: number; totalLayers: number }> {
-    const modelSize = await this.getModelSize(path)
+    const modelSize = await getModelSize(path)
    const arch = meta['general.architecture']
    const totalLayers = Number(meta[`${arch}.block_count`]) + 2 // 1 for lm_head layer and 1 for embedding layer
    if (!totalLayers) throw new Error('Invalid metadata: block_count not found')
@ -2062,335 +2134,27 @@ export default class llamacpp_extension extends AIEngine {
      /^\/\/[^/]+/.test(norm) // UNC path //server/share
    )
  }
-
+  /*
+    * if (!this.isAbsolutePath(path))
+      path = await joinPath([await getJanDataFolderPath(), path])
+    if (mmprojPath && !this.isAbsolutePath(mmprojPath))
+      mmprojPath = await joinPath([await getJanDataFolderPath(), path])
+  */
  async planModelLoad(
    path: string,
    mmprojPath?: string,
    requestedCtx?: number
  ): Promise<ModelPlan> {
-    if (!this.isAbsolutePath(path))
+    if (!this.isAbsolutePath(path)) {
      path = await joinPath([await getJanDataFolderPath(), path])
+    }
    if (mmprojPath && !this.isAbsolutePath(mmprojPath))
      mmprojPath = await joinPath([await getJanDataFolderPath(), path])
-    const modelSize = await this.getModelSize(path)
-    const memoryInfo = await this.getTotalSystemMemory()
-    const gguf = await readGgufMetadata(path)
-
-    // Get mmproj size if provided
-    let mmprojSize = 0
-    if (mmprojPath) {
-      mmprojSize = await this.getModelSize(mmprojPath)
-    }
-
-    const { layerSize, totalLayers } = await this.getLayerSize(
-      path,
-      gguf.metadata
-    )
-
-    const kvCachePerToken = (await this.estimateKVCache(gguf.metadata))
-      .perTokenSize
-
-    logger.info(
-      `Model size: ${modelSize}, Layer size: ${layerSize}, Total layers: ${totalLayers}, KV cache per token: ${kvCachePerToken}`
-    )
-
-    // Validate critical values
-    if (!modelSize || modelSize <= 0) {
-      throw new Error(`Invalid model size: ${modelSize}`)
-    }
-    if (!kvCachePerToken || kvCachePerToken <= 0) {
-      throw new Error(`Invalid KV cache per token: ${kvCachePerToken}`)
-    }
-    if (!layerSize || layerSize <= 0) {
-      throw new Error(`Invalid layer size: ${layerSize}`)
-    }
-
-    // Reserve memory for OS, other applications, and fixed engine overhead.
-    const VRAM_RESERVE_GB = 0.5
-    const VRAM_RESERVE_BYTES = VRAM_RESERVE_GB * 1024 * 1024 * 1024
-    const ENGINE_FIXED_OVERHEAD_BYTES = 0.2 * 1024 * 1024 * 1024 // For scratch buffers etc.
-
-    // Get model's maximum context length
-    const arch = gguf.metadata['general.architecture']
-    const modelMaxContextLength =
-      Number(gguf.metadata[`${arch}.context_length`]) || 8192
-
-    const MIN_CONTEXT_LENGTH = 1024
-
-    // Memory percentages applied to both VRAM and RAM
-    const memoryPercentages = { high: 0.7, medium: 0.5, low: 0.4 }
-
-    logger.info(
-      `Memory info - Total (VRAM + RAM): ${memoryInfo.totalMemory}, Total VRAM: ${memoryInfo.totalVRAM}, Mode: ${this.memoryMode}`
-    )
-
-    if (!memoryInfo.totalMemory || isNaN(memoryInfo.totalMemory)) {
-      throw new Error(`Invalid total memory: ${memoryInfo.totalMemory}`)
-    }
-    if (!memoryInfo.totalVRAM || isNaN(memoryInfo.totalVRAM)) {
-      throw new Error(`Invalid total VRAM: ${memoryInfo.totalVRAM}`)
-    }
-    if (!this.memoryMode || !(this.memoryMode in memoryPercentages)) {
-      throw new Error(
-        `Invalid memory mode: ${this.memoryMode}. Must be 'high', 'medium', or 'low'`
-      )
-    }
-
-    // Apply memory mode to both VRAM and RAM separately
-    const memoryModeMultiplier = memoryPercentages[this.memoryMode]
-    const usableVRAM = Math.max(
-      0,
-      memoryInfo.totalVRAM * memoryModeMultiplier -
-        VRAM_RESERVE_BYTES -
-        ENGINE_FIXED_OVERHEAD_BYTES
-    )
-
-    const actualSystemRAM = Math.max(0, memoryInfo.totalRAM)
-    const usableSystemMemory = actualSystemRAM * memoryModeMultiplier
-
-    logger.info(
-      `Actual System RAM: ${actualSystemRAM}, Usable VRAM for plan: ${usableVRAM}, Usable System Memory: ${usableSystemMemory}`
-    )
-
-    let gpuLayers = 0
-    let maxContextLength = 0
-    let noOffloadKVCache = false
-    let mode: ModelPlan['mode'] = 'Unsupported'
-    let offloadMmproj = false
-
-    let remainingVRAM = usableVRAM
-    if (mmprojSize > 0 && mmprojSize <= remainingVRAM) {
-      offloadMmproj = true
-      remainingVRAM -= mmprojSize
-    }
-    const vramForMinContext = (
-      await this.estimateKVCache(gguf.metadata, MIN_CONTEXT_LENGTH)
-    ).size
-
-    const ramForModel = modelSize + (offloadMmproj ? 0 : mmprojSize)
-    if (ramForModel + vramForMinContext > usableSystemMemory + usableVRAM) {
-      logger.error(
-        `Model unsupported. Not enough resources for model and min context.`
-      )
-      return {
-        gpuLayers: 0,
-        maxContextLength: 0,
-        noOffloadKVCache: true,
-        mode: 'Unsupported',
-        offloadMmproj: false,
-      }
-    }
-
-    const targetContext = Math.min(
-      requestedCtx || modelMaxContextLength,
-      modelMaxContextLength
-    )
-
-    let targetContextSize = (
-      await this.estimateKVCache(gguf.metadata, targetContext)
-    ).size
-
-    // Use `kvCachePerToken` for all VRAM calculations
-    if (modelSize + targetContextSize <= remainingVRAM) {
-      mode = 'GPU'
-      gpuLayers = totalLayers
-      maxContextLength = targetContext
-      noOffloadKVCache = false
-      logger.info(
-        'Planning: Ideal case fits. All layers and target context in VRAM.'
-      )
-    } else if (modelSize <= remainingVRAM) {
-      mode = 'GPU'
-      gpuLayers = totalLayers
-      noOffloadKVCache = false
-      const vramLeftForContext = remainingVRAM - modelSize
-      maxContextLength = Math.floor(vramLeftForContext / kvCachePerToken)
-
-      // Add safety check to prevent OOM
-      const safetyBuffer = 0.9 // Use 90% of calculated context to be safe
-      maxContextLength = Math.floor(maxContextLength * safetyBuffer)
-
-      logger.info(
-        `Planning: All layers fit in VRAM, but context must be reduced. VRAM left: ${vramLeftForContext}, kvCachePerToken: ${kvCachePerToken}, calculated context: ${maxContextLength}`
-      )
-    } else {
-      const vramAvailableForLayers = remainingVRAM - vramForMinContext
-
-      if (vramAvailableForLayers >= layerSize) {
-        mode = 'Hybrid'
-        gpuLayers = Math.min(
-          Math.floor(vramAvailableForLayers / layerSize),
-          totalLayers
-        )
-        noOffloadKVCache = false
-        const vramUsedByLayers = gpuLayers * layerSize
-        const vramLeftForContext = remainingVRAM - vramUsedByLayers
-        maxContextLength = Math.floor(vramLeftForContext / kvCachePerToken)
-
-        logger.info(
-          'Planning: Hybrid mode. Offloading layers to fit context in VRAM.'
-        )
-      }
-    }
-
-    // Fallback logic: try different configurations if no VRAM-based plan worked
-    if (mode === 'Unsupported') {
-      logger.info('Planning: Trying fallback configurations...')
-
-      // Try putting some layers on GPU with KV cache in RAM
-      const possibleGpuLayers = Math.floor(remainingVRAM / layerSize)
-      if (possibleGpuLayers > 0) {
-        gpuLayers = Math.min(possibleGpuLayers, totalLayers)
-        const ramUsedByCpuLayers = (totalLayers - gpuLayers) * layerSize
-        const ramUsedByMmproj = !offloadMmproj ? mmprojSize : 0
-        const availableRamForKv =
-          usableSystemMemory - (ramUsedByCpuLayers + ramUsedByMmproj)
-        // Note: Use `kvCachePerToken` for RAM calculation, as the overhead is GPU-specific
-        const contextInRam = Math.floor(availableRamForKv / kvCachePerToken)
-
-        if (contextInRam >= MIN_CONTEXT_LENGTH) {
-          mode = 'Hybrid'
-          maxContextLength = contextInRam
-          noOffloadKVCache = true
-          logger.info(
-            `Planning: Fallback hybrid - GPU layers: ${gpuLayers}, Context in RAM: ${maxContextLength}`
-          )
-        }
-      }
-
-      // If still unsupported, try pure CPU mode
-      if (mode === 'Unsupported') {
-        gpuLayers = 0
-        noOffloadKVCache = true
-        offloadMmproj = false
-        const ramUsedByModel = modelSize + mmprojSize
-        const availableRamForKv = usableSystemMemory - ramUsedByModel
-        maxContextLength = Math.floor(availableRamForKv / kvCachePerToken)
-        if (maxContextLength >= MIN_CONTEXT_LENGTH) {
-          mode = 'CPU'
-          logger.info(`Planning: CPU mode - Context: ${maxContextLength}`)
-        }
-      }
-    }
-
-    if (mode === 'CPU' || noOffloadKVCache) {
-      offloadMmproj = false
-    }
-
-    if (requestedCtx && requestedCtx > 0) {
-      maxContextLength = Math.min(maxContextLength, requestedCtx)
-    }
-
-    maxContextLength = Math.min(maxContextLength, modelMaxContextLength)
-
-    if (maxContextLength < MIN_CONTEXT_LENGTH) {
-      mode = 'Unsupported'
-    }
-
-    if (mode === 'Unsupported') {
-      gpuLayers = 0
-      maxContextLength = 0
-    }
-
-    maxContextLength = isNaN(maxContextLength)
-      ? 0
-      : Math.floor(maxContextLength)
-
-    const mmprojInfo = mmprojPath
-      ? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(
-          2
-        )}MB, offloadMmproj=${offloadMmproj}`
-      : ''
-
-    logger.info(
-      `Final plan for ${path}: gpuLayers=${gpuLayers}/${totalLayers}, ` +
-        `maxContextLength=${maxContextLength}, noOffloadKVCache=${noOffloadKVCache}, ` +
-        `mode=${mode}${mmprojInfo}`
-    )
-
-    return {
-      gpuLayers,
-      maxContextLength,
-      noOffloadKVCache,
-      mode,
-      offloadMmproj,
-    }
-  }
-  /**
-   * estimate KVCache size from a given metadata
-   */
-  private async estimateKVCache(
-    meta: Record<string, string>,
-    ctx_size?: number
-  ): Promise<{ size: number; perTokenSize: number }> {
-    const arch = meta['general.architecture']
-    if (!arch) throw new Error('Invalid metadata: architecture not found')
-
-    const nLayer = Number(meta[`${arch}.block_count`])
-    if (!nLayer) throw new Error('Invalid metadata: block_count not found')
-
-    const nHead = Number(meta[`${arch}.attention.head_count`])
-    if (!nHead) throw new Error('Invalid metadata: head_count not found')
-
-    // Try to get key/value lengths first (more accurate)
-    const keyLen = Number(meta[`${arch}.attention.key_length`])
-    const valLen = Number(meta[`${arch}.attention.value_length`])
-
-    let headDim: number
-
-    if (keyLen && valLen) {
-      // Use explicit key/value lengths if available
-      logger.info(
-        `Using explicit key_length: ${keyLen}, value_length: ${valLen}`
-      )
-      headDim = keyLen + valLen
-    } else {
-      // Fall back to embedding_length estimation
-      const embeddingLen = Number(meta[`${arch}.embedding_length`])
-      if (!embeddingLen)
-        throw new Error('Invalid metadata: embedding_length not found')
-
-      // Standard transformer: head_dim = embedding_dim / num_heads
-      // For KV cache: we need both K and V, so 2 * head_dim per head
-      headDim = (embeddingLen / nHead) * 2
-      logger.info(
-        `Using embedding_length estimation: ${embeddingLen}, calculated head_dim: ${headDim}`
-      )
-    }
-
-    const maxCtx = Number(meta[`${arch}.context_length`])
-    if (!maxCtx) throw new Error('Invalid metadata: context_length not found')
-
-    // b) If the user supplied a value, clamp it to the model's max
-    let ctxLen = ctx_size ? Math.min(ctx_size, maxCtx) : maxCtx
-
-    logger.info(`Final context length used for KV size: ${ctxLen}`)
-    logger.info(`nLayer: ${nLayer}, nHead: ${nHead}, headDim (K+V): ${headDim}`)
-
-    logger.info(`ctxLen: ${ctxLen}`)
-    logger.info(`nLayer: ${nLayer}`)
-    logger.info(`nHead: ${nHead}`)
-    logger.info(`headDim: ${headDim}`)
-
-    // Consider f16 by default
-    // Can be extended by checking cache-type-v and cache-type-k
-    // but we are checking overall compatibility with the default settings
-    // fp16 = 8 bits * 2 = 16
-    const bytesPerElement = 2
-
-    // Total KV cache size per token = nHead * headDim * bytesPerElement * nLayer
-    const kvPerToken = nHead * headDim * bytesPerElement * nLayer
-
-    return { size: ctxLen * kvPerToken, perTokenSize: kvPerToken }
-  }
-
-  private async getModelSize(path: string): Promise<number> {
-    if (path.startsWith('https://')) {
-      const res = await fetch(path, { method: 'HEAD' })
-      const len = res.headers.get('content-length')
-      return len ? parseInt(len, 10) : 0
-    } else {
-      return (await fs.fileStat(path)).size
+    try {
+      const result = await planModelLoadInternal(path, this.memoryMode, mmprojPath, requestedCtx)
+      return result
+    } catch (e) {
+      throw new Error(String(e))
    }
  }

@ -2404,50 +2168,11 @@ export default class llamacpp_extension extends AIEngine {
   */
  async isModelSupported(
    path: string,
-    ctx_size?: number
+    ctxSize?: number
  ): Promise<'RED' | 'YELLOW' | 'GREEN'> {
    try {
-      const modelSize = await this.getModelSize(path)
-      const memoryInfo = await this.getTotalSystemMemory()
-
-      logger.info(`modelSize: ${modelSize}`)
-
-      const gguf = await readGgufMetadata(path)
-      let kvCacheSize: number
-      if (ctx_size) {
-        kvCacheSize = (await this.estimateKVCache(gguf.metadata, ctx_size)).size
-      } else {
-        kvCacheSize = (await this.estimateKVCache(gguf.metadata)).size
-      }
-
-      // Total memory consumption = model weights + kvcache
-      const totalRequired = modelSize + kvCacheSize
-      logger.info(
-        `isModelSupported: Total memory requirement: ${totalRequired} for ${path}`
-      )
-
-      // Use 80% of total memory as the usable limit
-      const USABLE_MEMORY_PERCENTAGE = 0.9
-      const usableTotalMemory =
-        memoryInfo.totalRAM * USABLE_MEMORY_PERCENTAGE +
-        memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
-      const usableVRAM = memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
-
-      // Check if model fits in total memory at all (this is the hard limit)
-      if (totalRequired > usableTotalMemory) {
-        return 'RED' // Truly impossible to run
-      }
-
-      // Check if everything fits in VRAM (ideal case)
-      if (totalRequired <= usableVRAM) {
-        return 'GREEN'
-      }
-
-      // If we get here, it means:
-      // - Total requirement fits in combined memory
-      // - But doesn't fit entirely in VRAM
-      // This is the CPU-GPU hybrid scenario
-      return 'YELLOW'
+      const result = await isModelSupported(path, Number(ctxSize))
+      return result
    } catch (e) {
      throw new Error(String(e))
    }
@ -2601,7 +2326,8 @@ export default class llamacpp_extension extends AIEngine {
    metadata: Record<string, string>
  ): Promise<number> {
    // Extract vision parameters from metadata
-    const projectionDim = Math.floor(Number(metadata['clip.vision.projection_dim']) / 10) || 256
+    const projectionDim =
+      Math.floor(Number(metadata['clip.vision.projection_dim']) / 10) || 256

    // Count images in messages
    let imageCount = 0
--- a/extensions/llamacpp-extension/tsconfig.json
+++ b/extensions/llamacpp-extension/tsconfig.json
@ -1,6 +1,6 @@
 {
  "compilerOptions": {
-    "target": "es2016",
+    "target": "es2018",
    "module": "ES6",
    "moduleResolution": "node",
    "outDir": "./dist",
--- a/extensions/yarn.lock
+++ b/extensions/yarn.lock
--- a/mise.toml
+++ b/mise.toml
@ -1,286 +0,0 @@
-[tools]
-node = "20"
-rust = "1.85.1"
-sccache = "latest"
-
-[env]
-_.path = ['./node_modules/.bin']
-RUSTC_WRAPPER="sccache"
-
-
-# ============================================================================
-# CORE SETUP AND CONFIGURATION TASKS
-# ============================================================================
-
-[tasks.config-yarn]
-description = "Configure yarn version and settings"
-run = [
-  "corepack enable",
-  "corepack prepare yarn@4.5.3 --activate",
-  "yarn --version",
-  "yarn config set -H enableImmutableInstalls false"
-]
-
-[tasks.install]
-description = "Install dependencies"
-depends = ["config-yarn"]
-run = "yarn install"
-sources = ['package.json', 'yarn.lock']
-outputs = ['node_modules']
-
-[tasks.build-tauri-plugin-api]
-description = "Build Tauri plugin API"
-depends = ["install"]
-run = "yarn build:tauri:plugin:api"
-sources = ['src-tauri/plugins/**/*']
-outputs = [
-  'src-tauri/plugins/tauri-plugin-hardware/dist-js',
-  'src-tauri/plugins/tauri-plugin-llamacpp/dist-js',
-]
-
-[tasks.build-core]
-description = "Build core package"
-depends = ["build-tauri-plugin-api"]
-run = "yarn build:core"
-sources = ['core/**/*']
-outputs = ['core/dist']
-
-[tasks.build-extensions]
-description = "Build extensions"
-depends = ["build-core"]
-run = "yarn build:extensions && yarn build:extensions-web"
-sources = ['extensions/**/*']
-outputs = ['pre-install/*.tgz']
-
-[tasks.install-and-build]
-description = "Install dependencies and build core and extensions (matches Makefile)"
-depends = ["build-extensions"]
-
-# ============================================================================
-# DEVELOPMENT TASKS
-# ============================================================================
-
-[tasks.dev]
-description = "Start development server (matches Makefile)"
-depends = ["install-and-build"]
-run = [
-  "yarn download:bin",
-  "yarn dev"
-]
-
-[tasks.dev-tauri]
-description = "Start development server with Tauri (DEPRECATED - matches Makefile)"
-depends = ["install-and-build"]
-run = [
-  "yarn download:bin",
-  "yarn dev:tauri"
-]
-
-# ============================================================================
-# WEB APPLICATION DEVELOPMENT TASKS
-# ============================================================================
-
-[tasks.dev-web-app]
-description = "Start web application development server (matches Makefile)"
-depends = ["build-core"]
-run = "yarn dev:web-app"
-
-[tasks.build-web-app]
-description = "Build web application (matches Makefile)"
-depends = ["build-core"]
-run = "yarn build:web-app"
-
-[tasks.serve-web-app]
-description = "Serve built web application"
-run = "yarn serve:web-app"
-
-[tasks.build-serve-web-app]
-description = "Build and serve web application (matches Makefile)"
-depends = ["build-web-app"]
-run = "yarn serve:web-app"
-
-# ============================================================================
-# BUILD TASKS
-# ============================================================================
-
-[tasks.install-rust-targets]
-description = "Install required Rust targets for MacOS universal builds"
-run = '''
-#!/usr/bin/env bash
-# Check if we're on macOS
-if [[ "$OSTYPE" == "darwin"* ]]; then
-  echo "Detected macOS, installing universal build targets..."
-  rustup target add x86_64-apple-darwin
-  rustup target add aarch64-apple-darwin
-  echo "Rust targets installed successfully!"
-fi
-'''
-
-[tasks.build]
-description = "Build complete application (matches Makefile)"
-depends = ["install-rust-targets", "install-and-build"]
-run = [
-  "yarn download:bin",
-  "yarn build"
-]
-
-# ============================================================================
-# QUALITY ASSURANCE TASKS
-# ============================================================================
-
-[tasks.lint]
-description = "Run linting (matches Makefile)"
-depends = ["build-extensions"]
-run = "yarn lint"
-
-# ============================================================================
-# RUST TEST COMPONENTS
-# ============================================================================
-
-[tasks.test-rust-main]
-description = "Test main src-tauri package"
-run = "cargo test --manifest-path src-tauri/Cargo.toml --no-default-features --features test-tauri -- --test-threads=1"
-
-[tasks.test-rust-hardware]
-description = "Test hardware plugin"
-run = "cargo test --manifest-path src-tauri/plugins/tauri-plugin-hardware/Cargo.toml"
-
-[tasks.test-rust-llamacpp]
-description = "Test llamacpp plugin"
-run = "cargo test --manifest-path src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml"
-
-[tasks.test-rust-utils]
-description = "Test utils package"
-run = "cargo test --manifest-path src-tauri/utils/Cargo.toml"
-
-[tasks.test-rust]
-description = "Run all Rust tests"
-depends = ["test-rust-main", "test-rust-hardware", "test-rust-llamacpp", "test-rust-utils"]
-
-# ============================================================================
-# JS TEST COMPONENTS
-# ============================================================================
-
-[tasks.test-js-setup]
-description = "Setup for JS tests"
-run = [
-  "yarn download:bin",
-  "yarn download:lib", 
-  "yarn copy:assets:tauri",
-  "yarn build:icon"
-]
-
-[tasks.test-js]
-description = "Run JS tests"
-depends = ["test-js-setup"]
-run = "yarn test"
-
-# ============================================================================
-# COMBINED TEST TASKS
-# ============================================================================
-
-[tasks.test]
-description = "Run complete test suite (matches Makefile)"
-depends = ["lint", "test-js", "test-rust"]
-
-# ============================================================================
-# PARALLEL-FRIENDLY QUALITY ASSURANCE TASKS
-# ============================================================================
-
-[tasks.lint-only]
-description = "Run linting only (parallel-friendly)"
-depends = ["build-extensions"]
-run = "yarn lint"
-hide = true
-
-[tasks.test-only]
-description = "Run tests only (parallel-friendly)"
-depends = ["build-extensions", "test-js", "test-rust"]
-hide = true
-
-[tasks.qa-parallel]
-description = "Run linting and testing in parallel"
-depends = ["lint-only", "test-only"]
-
-# ============================================================================
-# UTILITY TASKS
-# ============================================================================
-
-[tasks.clean]
-description = "Clean all build artifacts and dependencies (cross-platform - matches Makefile)"
-run = '''
-#!/usr/bin/env bash
-echo "Cleaning build artifacts and dependencies..."
-
-# Platform detection and cleanup (matches Makefile exactly)
-if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then
-  # Windows cleanup using PowerShell (matches Makefile)
-  powershell -Command "Get-ChildItem -Path . -Include node_modules, .next, dist, build, out, .turbo, .yarn -Recurse -Directory | Remove-Item -Recurse -Force" 2>/dev/null || true
-  powershell -Command "Get-ChildItem -Path . -Include package-lock.json, tsconfig.tsbuildinfo -Recurse -File | Remove-Item -Recurse -Force" 2>/dev/null || true
-  powershell -Command "Remove-Item -Recurse -Force ./pre-install/*.tgz" 2>/dev/null || true
-  powershell -Command "Remove-Item -Recurse -Force ./extensions/*/*.tgz" 2>/dev/null || true
-  powershell -Command "Remove-Item -Recurse -Force ./electron/pre-install/*.tgz" 2>/dev/null || true
-  powershell -Command "Remove-Item -Recurse -Force ./src-tauri/resources" 2>/dev/null || true
-  powershell -Command "Remove-Item -Recurse -Force ./src-tauri/target" 2>/dev/null || true
-  powershell -Command "if (Test-Path \"\$(\$env:USERPROFILE)\\jan\\extensions\\\") { Remove-Item -Path \"\$(\$env:USERPROFILE)\\jan\\extensions\" -Recurse -Force }" 2>/dev/null || true
-elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
-  # Linux cleanup (matches Makefile)
-  find . -name "node_modules" -type d -prune -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name ".next" -type d -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name "dist" -type d -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name "build" -type d -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name "out" -type d -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name ".turbo" -type d -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name ".yarn" -type d -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name "package-lock.json" -type f -exec rm -rf '{}' + 2>/dev/null || true
-  rm -rf ./pre-install/*.tgz 2>/dev/null || true
-  rm -rf ./extensions/*/*.tgz 2>/dev/null || true
-  rm -rf ./electron/pre-install/*.tgz 2>/dev/null || true
-  rm -rf ./src-tauri/resources 2>/dev/null || true
-  rm -rf ./src-tauri/target 2>/dev/null || true
-  rm -rf ~/jan/extensions 2>/dev/null || true
-  rm -rf "~/.cache/jan*" 2>/dev/null || true
-  rm -rf "./.cache" 2>/dev/null || true
-else
-  # macOS cleanup (matches Makefile)
-  find . -name "node_modules" -type d -prune -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name ".next" -type d -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name "dist" -type d -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name "build" -type d -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name "out" -type d -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name ".turbo" -type d -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name ".yarn" -type d -exec rm -rf '{}' + 2>/dev/null || true
-  find . -name "package-lock.json" -type f -exec rm -rf '{}' + 2>/dev/null || true
-  rm -rf ./pre-install/*.tgz 2>/dev/null || true
-  rm -rf ./extensions/*/*.tgz 2>/dev/null || true
-  rm -rf ./electron/pre-install/*.tgz 2>/dev/null || true
-  rm -rf ./src-tauri/resources 2>/dev/null || true
-  rm -rf ./src-tauri/target 2>/dev/null || true
-  rm -rf ~/jan/extensions 2>/dev/null || true
-  rm -rf ~/Library/Caches/jan* 2>/dev/null || true
-fi
-
-echo "Clean completed!"
-'''
-
-[tasks.all]
-description = "Default target - shows available commands (matches Makefile)"
-run = "echo 'Specify a target to run. Use: mise tasks'"
-
-# ============================================================================
-# DEVELOPMENT WORKFLOW SHORTCUTS
-# ============================================================================
-
-[tasks.setup]
-description = "Complete development setup"
-depends = ["install-and-build"]
-alias = "init"
-
-[tasks.ci]
-description = "Run CI pipeline (lint + test sequentially)"
-depends = ["test"]
-
-[tasks.ci-parallel]
-description = "Run CI pipeline (lint + test in parallel)"
-depends = ["qa-parallel"]
-alias = "ci-fast"
--- a/package.json
+++ b/package.json
@ -27,7 +27,8 @@
    "copy:assets:tauri": "cpx \"pre-install/*.tgz\" \"src-tauri/resources/pre-install/\" && cpx \"LICENSE\" \"src-tauri/resources/\"",
    "download:lib": "node ./scripts/download-lib.mjs",
    "download:bin": "node ./scripts/download-bin.mjs",
-    "build:tauri:win32": "yarn download:bin && yarn download:lib && yarn tauri build",
+    "download:windows-installer": "node ./scripts/download-win-installer-deps.mjs",
+    "build:tauri:win32": "yarn download:bin && yarn download:lib && yarn download:windows-installer && yarn tauri build",
    "build:tauri:linux": "yarn download:bin && yarn download:lib && NO_STRIP=1 ./src-tauri/build-utils/shim-linuxdeploy.sh yarn tauri build && ./src-tauri/build-utils/buildAppImage.sh",
    "build:tauri:darwin": "yarn download:bin && yarn tauri build --target universal-apple-darwin",
    "build:tauri": "yarn build:icon && yarn copy:assets:tauri && run-script-os",
--- a/scripts/download-win-installer-deps.mjs
+++ b/scripts/download-win-installer-deps.mjs
@ -0,0 +1,83 @@
+console.log('Downloading Windows installer dependencies...')
+// scripts/download-win-installer-deps.mjs
+import https from 'https'
+import fs, { mkdirSync } from 'fs'
+import os from 'os'
+import path from 'path'
+import { copySync } from 'cpx'
+
+function download(url, dest) {
+  return new Promise((resolve, reject) => {
+    console.log(`Downloading ${url} to ${dest}`)
+    const file = fs.createWriteStream(dest)
+    https
+      .get(url, (response) => {
+        console.log(`Response status code: ${response.statusCode}`)
+        if (
+          response.statusCode >= 300 &&
+          response.statusCode < 400 &&
+          response.headers.location
+        ) {
+          // Handle redirect
+          const redirectURL = response.headers.location
+          console.log(`Redirecting to ${redirectURL}`)
+          download(redirectURL, dest).then(resolve, reject) // Recursive call
+          return
+        } else if (response.statusCode !== 200) {
+          reject(`Failed to get '${url}' (${response.statusCode})`)
+          return
+        }
+        response.pipe(file)
+        file.on('finish', () => {
+          file.close(resolve)
+        })
+      })
+      .on('error', (err) => {
+        fs.unlink(dest, () => reject(err.message))
+      })
+  })
+}
+
+async function main() {
+  console.log('Starting Windows installer dependencies download')
+  const platform = os.platform() // 'darwin', 'linux', 'win32'
+  const arch = os.arch() // 'x64', 'arm64', etc.
+
+  if (arch != 'x64') return
+
+
+  const libDir = 'src-tauri/resources/lib'
+  const tempDir = 'scripts/dist'
+
+  try {
+    mkdirSync('scripts/dist')
+  } catch (err) {
+    // Expect EEXIST error if the directory already exists
+  }
+
+  // Download VC++ Redistributable 17
+  if (platform == 'win32') {
+    const vcFilename = 'vc_redist.x64.exe'
+    const vcUrl = 'https://aka.ms/vs/17/release/vc_redist.x64.exe'
+    
+    console.log(`Downloading VC++ Redistributable...`)
+    const vcSavePath = path.join(tempDir, vcFilename)
+    if (!fs.existsSync(vcSavePath)) {
+      await download(vcUrl, vcSavePath)
+    }
+
+    // copy to tauri resources
+    try {
+      copySync(vcSavePath, libDir)
+    } catch (err) {
+      // Expect EEXIST error
+    }
+  }
+
+  console.log('Windows installer dependencies downloads completed.')
+}
+
+main().catch((err) => {
+  console.error('Error:', err)
+  process.exit(1)
+})
--- a/src-tauri/plugins/tauri-plugin-hardware/src/lib.rs
+++ b/src-tauri/plugins/tauri-plugin-hardware/src/lib.rs
@ -15,6 +15,8 @@ use tauri::Runtime;

 static SYSTEM_INFO: OnceLock<SystemInfo> = OnceLock::new();

+pub use commands::get_system_info;
+
 /// Initialize the hardware plugin
 pub fn init<R: Runtime>() -> tauri::plugin::TauriPlugin<R> {
    tauri::plugin::Builder::new("hardware")
--- a/src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
@ -24,6 +24,7 @@ tauri = { version = "2.5.0", default-features = false, features = [] }
 thiserror = "2.0.12"
 tokio = { version = "1", features = ["full"] }
 reqwest = { version = "0.11", features = ["json", "blocking", "stream"] }
+tauri-plugin-hardware = { path = "../tauri-plugin-hardware" }

 # Unix-specific dependencies
 [target.'cfg(unix)'.dependencies]
--- a/src-tauri/plugins/tauri-plugin-llamacpp/build.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/build.rs
@ -14,6 +14,10 @@ const COMMANDS: &[&str] = &[
    "get_session_by_model",
    // GGUF commands
    "read_gguf_metadata",
+    "estimate_kv_cache_size",
+    "get_model_size",
+    "is_model_supported",
+    "plan_model_load"
 ];

 fn main() {
--- a/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts
@ -2,28 +2,28 @@ import { invoke } from '@tauri-apps/api/core'

 // Types
 export interface SessionInfo {
-  pid: number;
-  port: number;
-  model_id: string;
-  model_path: string;
-  api_key: string;
+  pid: number
+  port: number
+  model_id: string
+  model_path: string
+  api_key: string
 }

 export interface DeviceInfo {
-  id: string;
-  name: string;
-  memory: number;
+  id: string
+  name: string
+  memory: number
 }

 export interface GgufMetadata {
-  version: number;
-  tensor_count: number;
-  metadata: Record<string, string>;
+  version: number
+  tensor_count: number
+  metadata: Record<string, string>
 }

 // Cleanup commands
 export async function cleanupLlamaProcesses(): Promise<void> {
-  return await invoke('plugin:llamacpp|cleanup_llama_processes');
+  return await invoke('plugin:llamacpp|cleanup_llama_processes')
 }

 // LlamaCpp server commands
@ -35,12 +35,12 @@ export async function loadLlamaModel(
  return await invoke('plugin:llamacpp|load_llama_model', {
    backendPath,
    libraryPath,
-    args
-  });
+    args,
+  })
 }

 export async function unloadLlamaModel(pid: number): Promise<void> {
-  return await invoke('plugin:llamacpp|unload_llama_model', { pid });
+  return await invoke('plugin:llamacpp|unload_llama_model', { pid })
 }

 export async function getDevices(
@ -49,8 +49,8 @@ export async function getDevices(
 ): Promise<DeviceInfo[]> {
  return await invoke('plugin:llamacpp|get_devices', {
    backendPath,
-    libraryPath
-  });
+    libraryPath,
+  })
 }

 export async function generateApiKey(
@ -59,35 +59,84 @@ export async function generateApiKey(
 ): Promise<string> {
  return await invoke('plugin:llamacpp|generate_api_key', {
    modelId,
-    apiSecret
-  });
+    apiSecret,
+  })
 }

 export async function isProcessRunning(pid: number): Promise<boolean> {
-  return await invoke('plugin:llamacpp|is_process_running', { pid });
+  return await invoke('plugin:llamacpp|is_process_running', { pid })
 }

 export async function getRandomPort(): Promise<number> {
-  return await invoke('plugin:llamacpp|get_random_port');
+  return await invoke('plugin:llamacpp|get_random_port')
 }

-export async function findSessionByModel(modelId: string): Promise<SessionInfo | null> {
-  return await invoke('plugin:llamacpp|find_session_by_model', { modelId });
+export async function findSessionByModel(
+  modelId: string
+): Promise<SessionInfo | null> {
+  return await invoke('plugin:llamacpp|find_session_by_model', { modelId })
 }

 export async function getLoadedModels(): Promise<string[]> {
-  return await invoke('plugin:llamacpp|get_loaded_models');
+  return await invoke('plugin:llamacpp|get_loaded_models')
 }

 export async function getAllSessions(): Promise<SessionInfo[]> {
-  return await invoke('plugin:llamacpp|get_all_sessions');
+  return await invoke('plugin:llamacpp|get_all_sessions')
 }

-export async function getSessionByModel(modelId: string): Promise<SessionInfo | null> {
-  return await invoke('plugin:llamacpp|get_session_by_model', { modelId });
+export async function getSessionByModel(
+  modelId: string
+): Promise<SessionInfo | null> {
+  return await invoke('plugin:llamacpp|get_session_by_model', { modelId })
 }

 // GGUF commands
 export async function readGgufMetadata(path: string): Promise<GgufMetadata> {
-  return await invoke('plugin:llamacpp|read_gguf_metadata', { path });
+  return await invoke('plugin:llamacpp|read_gguf_metadata', { path })
+}
+
+export async function estimateKVCacheSize(
+  meta: Record<string, string>,
+  ctxSize?: number
+): Promise<{ size: number; per_token_size: number }> {
+  return await invoke('plugin:llamacpp|estimate_kv_cache_size', {
+    meta,
+    ctxSize,
+  })
+}
+
+export async function getModelSize(path: string): Promise<number> {
+  return await invoke('plugin:llamacpp|get_model_size', { path })
+}
+
+export async function isModelSupported(
+  path: string,
+  ctxSize?: number
+): Promise<'RED' | 'YELLOW' | 'GREEN'> {
+  return await invoke('plugin:llamacpp|is_model_supported', {
+    path,
+    ctxSize,
+  })
+}
+
+export async function planModelLoadInternal(
+  path: string,
+  memoryMode: string,
+  mmprojPath?: string,
+  requestedContext?: number
+): Promise<{
+  gpuLayers: number
+  maxContextLength: number
+  noOffloadKVCache: boolean
+  offloadMmproj?: boolean
+  batchSize: number
+  mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
+}> {
+  return await invoke('plugin:llamacpp|plan_model_load', {
+    path,
+    memoryMode,
+    mmprojPath,
+    requestedContext,
+  })
 }
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/estimate_kv_cache_size.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/estimate_kv_cache_size.toml
@ -0,0 +1,13 @@
+# Automatically generated - DO NOT EDIT!
+
+"$schema" = "../../schemas/schema.json"
+
+[[permission]]
+identifier = "allow-estimate-kv-cache-size"
+description = "Enables the estimate_kv_cache_size command without any pre-configured scope."
+commands.allow = ["estimate_kv_cache_size"]
+
+[[permission]]
+identifier = "deny-estimate-kv-cache-size"
+description = "Denies the estimate_kv_cache_size command without any pre-configured scope."
+commands.deny = ["estimate_kv_cache_size"]
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/get_model_size.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/get_model_size.toml
@ -0,0 +1,13 @@
+# Automatically generated - DO NOT EDIT!
+
+"$schema" = "../../schemas/schema.json"
+
+[[permission]]
+identifier = "allow-get-model-size"
+description = "Enables the get_model_size command without any pre-configured scope."
+commands.allow = ["get_model_size"]
+
+[[permission]]
+identifier = "deny-get-model-size"
+description = "Denies the get_model_size command without any pre-configured scope."
+commands.deny = ["get_model_size"]
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/is_model_supported.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/is_model_supported.toml
@ -0,0 +1,13 @@
+# Automatically generated - DO NOT EDIT!
+
+"$schema" = "../../schemas/schema.json"
+
+[[permission]]
+identifier = "allow-is-model-supported"
+description = "Enables the is_model_supported command without any pre-configured scope."
+commands.allow = ["is_model_supported"]
+
+[[permission]]
+identifier = "deny-is-model-supported"
+description = "Denies the is_model_supported command without any pre-configured scope."
+commands.deny = ["is_model_supported"]
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/plan_model_load.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/plan_model_load.toml
@ -0,0 +1,13 @@
+# Automatically generated - DO NOT EDIT!
+
+"$schema" = "../../schemas/schema.json"
+
+[[permission]]
+identifier = "allow-plan-model-load"
+description = "Enables the plan_model_load command without any pre-configured scope."
+commands.allow = ["plan_model_load"]
+
+[[permission]]
+identifier = "deny-plan-model-load"
+description = "Denies the plan_model_load command without any pre-configured scope."
+commands.deny = ["plan_model_load"]
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/reference.md
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/reference.md
@ -16,6 +16,10 @@ Default permissions for the llamacpp plugin
 - `allow-get-all-sessions`
 - `allow-get-session-by-model`
 - `allow-read-gguf-metadata`
+- `allow-estimate-kv-cache-size`
+- `allow-get-model-size`
+- `allow-is-model-supported`
+- `allow-plan-model-load`

 ## Permission Table

@ -55,6 +59,32 @@ Denies the cleanup_llama_processes command without any pre-configured scope.
 <tr>
 <td>

+`llamacpp:allow-estimate-kv-cache-size`
+
+</td>
+<td>
+
+Enables the estimate_kv_cache_size command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
+`llamacpp:deny-estimate-kv-cache-size`
+
+</td>
+<td>
+
+Denies the estimate_kv_cache_size command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
 `llamacpp:allow-find-session-by-model`

 </td>
@ -185,6 +215,32 @@ Denies the get_loaded_models command without any pre-configured scope.
 <tr>
 <td>

+`llamacpp:allow-get-model-size`
+
+</td>
+<td>
+
+Enables the get_model_size command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
+`llamacpp:deny-get-model-size`
+
+</td>
+<td>
+
+Denies the get_model_size command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
 `llamacpp:allow-get-random-port`

 </td>
@ -237,6 +293,32 @@ Denies the get_session_by_model command without any pre-configured scope.
 <tr>
 <td>

+`llamacpp:allow-is-model-supported`
+
+</td>
+<td>
+
+Enables the is_model_supported command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
+`llamacpp:deny-is-model-supported`
+
+</td>
+<td>
+
+Denies the is_model_supported command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
 `llamacpp:allow-is-process-running`

 </td>
@ -289,6 +371,32 @@ Denies the load_llama_model command without any pre-configured scope.
 <tr>
 <td>

+`llamacpp:allow-plan-model-load`
+
+</td>
+<td>
+
+Enables the plan_model_load command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
+`llamacpp:deny-plan-model-load`
+
+</td>
+<td>
+
+Denies the plan_model_load command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
 `llamacpp:allow-read-gguf-metadata`

 </td>
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/default.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/default.toml
@ -3,10 +3,10 @@ description = "Default permissions for the llamacpp plugin"
 permissions = [
    # Cleanup commands
    "allow-cleanup-llama-processes",
-    
+
    # LlamaCpp server commands
    "allow-load-llama-model",
-    "allow-unload-llama-model", 
+    "allow-unload-llama-model",
    "allow-get-devices",
    "allow-generate-api-key",
    "allow-is-process-running",
@ -15,7 +15,11 @@ permissions = [
    "allow-get-loaded-models",
    "allow-get-all-sessions",
    "allow-get-session-by-model",
-    
+
    # GGUF commands
-    "allow-read-gguf-metadata"
+    "allow-read-gguf-metadata",
+    "allow-estimate-kv-cache-size",
+    "allow-get-model-size",
+    "allow-is-model-supported",
+    "allow-plan-model-load"
 ]
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/schemas/schema.json
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/schemas/schema.json
@ -306,6 +306,18 @@
          "const": "deny-cleanup-llama-processes",
          "markdownDescription": "Denies the cleanup_llama_processes command without any pre-configured scope."
        },
+        {
+          "description": "Enables the estimate_kv_cache_size command without any pre-configured scope.",
+          "type": "string",
+          "const": "allow-estimate-kv-cache-size",
+          "markdownDescription": "Enables the estimate_kv_cache_size command without any pre-configured scope."
+        },
+        {
+          "description": "Denies the estimate_kv_cache_size command without any pre-configured scope.",
+          "type": "string",
+          "const": "deny-estimate-kv-cache-size",
+          "markdownDescription": "Denies the estimate_kv_cache_size command without any pre-configured scope."
+        },
        {
          "description": "Enables the find_session_by_model command without any pre-configured scope.",
          "type": "string",
@ -366,6 +378,18 @@
          "const": "deny-get-loaded-models",
          "markdownDescription": "Denies the get_loaded_models command without any pre-configured scope."
        },
+        {
+          "description": "Enables the get_model_size command without any pre-configured scope.",
+          "type": "string",
+          "const": "allow-get-model-size",
+          "markdownDescription": "Enables the get_model_size command without any pre-configured scope."
+        },
+        {
+          "description": "Denies the get_model_size command without any pre-configured scope.",
+          "type": "string",
+          "const": "deny-get-model-size",
+          "markdownDescription": "Denies the get_model_size command without any pre-configured scope."
+        },
        {
          "description": "Enables the get_random_port command without any pre-configured scope.",
          "type": "string",
@ -390,6 +414,18 @@
          "const": "deny-get-session-by-model",
          "markdownDescription": "Denies the get_session_by_model command without any pre-configured scope."
        },
+        {
+          "description": "Enables the is_model_supported command without any pre-configured scope.",
+          "type": "string",
+          "const": "allow-is-model-supported",
+          "markdownDescription": "Enables the is_model_supported command without any pre-configured scope."
+        },
+        {
+          "description": "Denies the is_model_supported command without any pre-configured scope.",
+          "type": "string",
+          "const": "deny-is-model-supported",
+          "markdownDescription": "Denies the is_model_supported command without any pre-configured scope."
+        },
        {
          "description": "Enables the is_process_running command without any pre-configured scope.",
          "type": "string",
@ -414,6 +450,18 @@
          "const": "deny-load-llama-model",
          "markdownDescription": "Denies the load_llama_model command without any pre-configured scope."
        },
+        {
+          "description": "Enables the plan_model_load command without any pre-configured scope.",
+          "type": "string",
+          "const": "allow-plan-model-load",
+          "markdownDescription": "Enables the plan_model_load command without any pre-configured scope."
+        },
+        {
+          "description": "Denies the plan_model_load command without any pre-configured scope.",
+          "type": "string",
+          "const": "deny-plan-model-load",
+          "markdownDescription": "Denies the plan_model_load command without any pre-configured scope."
+        },
        {
          "description": "Enables the read_gguf_metadata command without any pre-configured scope.",
          "type": "string",
@ -439,10 +487,10 @@
          "markdownDescription": "Denies the unload_llama_model command without any pre-configured scope."
        },
        {
-          "description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`",
+          "description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`",
          "type": "string",
          "const": "default",
-          "markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`"
+          "markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`"
        }
      ]
    }
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/commands.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/commands.rs
@ -1,58 +1,141 @@
-use super::helpers;
 use super::types::GgufMetadata;
-use reqwest;
-use std::fs::File;
-use std::io::BufReader;
-
+use super::utils::{estimate_kv_cache_internal, read_gguf_metadata_internal};
+use crate::gguf::types::{KVCacheError, KVCacheEstimate, ModelSupportStatus};
+use std::collections::HashMap;
+use std::fs;
+use tauri::Runtime;
+use tauri_plugin_hardware::get_system_info;
 /// Read GGUF metadata from a model file
 #[tauri::command]
 pub async fn read_gguf_metadata(path: String) -> Result<GgufMetadata, String> {
-    if path.starts_with("http://") || path.starts_with("https://") {
-        // Remote: read in 2MB chunks until successful
+    return read_gguf_metadata_internal(path).await;
+}
+
+#[tauri::command]
+pub async fn estimate_kv_cache_size(
+    meta: HashMap<String, String>,
+    ctx_size: Option<u64>,
+) -> Result<KVCacheEstimate, KVCacheError> {
+    estimate_kv_cache_internal(meta, ctx_size).await
+}
+
+#[tauri::command]
+pub async fn get_model_size(path: String) -> Result<u64, String> {
+    if path.starts_with("https://") {
+        // Handle remote URL
        let client = reqwest::Client::new();
-        let chunk_size = 2 * 1024 * 1024; // Fixed 2MB chunks
-        let max_total_size = 120 * 1024 * 1024; // Don't exceed 120MB total
-        let mut total_downloaded = 0;
-        let mut accumulated_data = Vec::new();
+        let response = client
+            .head(&path)
+            .send()
+            .await
+            .map_err(|e| format!("Failed to fetch HEAD request: {}", e))?;

-        while total_downloaded < max_total_size {
-            let start = total_downloaded;
-            let end = std::cmp::min(start + chunk_size - 1, max_total_size - 1);
-
-            let resp = client
-                .get(&path)
-                .header("Range", format!("bytes={}-{}", start, end))
-                .send()
-                .await
-                .map_err(|e| format!("Failed to fetch chunk {}-{}: {}", start, end, e))?;
-
-            let chunk_data = resp
-                .bytes()
-                .await
-                .map_err(|e| format!("Failed to read chunk response: {}", e))?;
-
-            accumulated_data.extend_from_slice(&chunk_data);
-            total_downloaded += chunk_data.len();
-
-            // Try parsing after each chunk
-            let cursor = std::io::Cursor::new(&accumulated_data);
-            if let Ok(metadata) = helpers::read_gguf_metadata(cursor) {
-                return Ok(metadata);
-            }
-
-            // If we got less data than expected, we've reached EOF
-            if chunk_data.len() < chunk_size {
-                break;
-            }
+        if let Some(content_length) = response.headers().get("content-length") {
+            let content_length_str = content_length
+                .to_str()
+                .map_err(|e| format!("Invalid content-length header: {}", e))?;
+            content_length_str
+                .parse::<u64>()
+                .map_err(|e| format!("Failed to parse content-length: {}", e))
+        } else {
+            Ok(0)
        }
-        Err("Could not parse GGUF metadata from downloaded data".to_string())
    } else {
-        // Local: use streaming file reader
-        let file =
-            File::open(&path).map_err(|e| format!("Failed to open local file {}: {}", path, e))?;
-        let reader = BufReader::new(file);
-
-        helpers::read_gguf_metadata(reader)
-            .map_err(|e| format!("Failed to parse GGUF metadata: {}", e))
+        // Handle local file using standard fs
+        let metadata =
+            fs::metadata(&path).map_err(|e| format!("Failed to get file metadata: {}", e))?;
+        Ok(metadata.len())
    }
 }
+
+#[tauri::command]
+pub async fn is_model_supported<R: Runtime>(
+    path: String,
+    ctx_size: Option<u32>,
+    app_handle: tauri::AppHandle<R>,
+) -> Result<ModelSupportStatus, String> {
+    // Get model size
+    let model_size = get_model_size(path.clone()).await?;
+
+    // Get system info
+    let system_info = get_system_info(app_handle.clone());
+
+    log::info!("modelSize: {}", model_size);
+
+    // Read GGUF metadata
+    let gguf = read_gguf_metadata(path.clone()).await?;
+
+    // Calculate KV cache size
+    let kv_cache_size = if let Some(ctx_size) = ctx_size {
+        log::info!("Using ctx_size: {}", ctx_size);
+        estimate_kv_cache_internal(gguf.metadata, Some(ctx_size as u64))
+            .await
+            .map_err(|e| e.to_string())?
+            .size
+    } else {
+        estimate_kv_cache_internal(gguf.metadata, None)
+            .await
+            .map_err(|e| e.to_string())?
+            .size
+    };
+
+    // Total memory consumption = model weights + kvcache
+    let total_required = model_size + kv_cache_size;
+    log::info!(
+        "isModelSupported: Total memory requirement: {} for {}; Got kvCacheSize: {} from BE",
+        total_required,
+        path,
+        kv_cache_size
+    );
+
+    const RESERVE_BYTES: u64 = 2288490189;
+    let total_system_memory = system_info.total_memory * 1024 * 1024;
+    // Calculate total VRAM from all GPUs
+    let total_vram: u64 = if system_info.gpus.is_empty() {
+        // On macOS with unified memory, GPU info may be empty
+        // Use total RAM as VRAM since memory is shared
+        log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
+        total_system_memory
+    } else {
+        system_info
+            .gpus
+            .iter()
+            .map(|g| g.total_memory * 1024 * 1024)
+            .sum::<u64>()
+    };
+
+    log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
+
+    let usable_vram = if total_vram > RESERVE_BYTES {
+        total_vram - RESERVE_BYTES
+    } else {
+        0
+    };
+
+    let usable_total_memory = if total_system_memory > RESERVE_BYTES {
+        (total_system_memory - RESERVE_BYTES) + usable_vram
+    } else {
+        0
+    };
+    log::info!("System RAM: {} bytes", &total_system_memory);
+    log::info!("Total VRAM: {} bytes", &total_vram);
+    log::info!("Usable total memory: {} bytes", &usable_total_memory);
+    log::info!("Usable VRAM: {} bytes", &usable_vram);
+    log::info!("Required: {} bytes", &total_required);
+
+    // Check if model fits in total memory at all (this is the hard limit)
+    if total_required > usable_total_memory {
+        return Ok(ModelSupportStatus::Red); // Truly impossible to run
+    }
+
+    // Check if everything fits in VRAM (ideal case)
+    if total_required <= usable_vram {
+        return Ok(ModelSupportStatus::Green);
+    }
+
+    // If we get here, it means:
+    // - Total requirement fits in combined memory
+    // - But doesn't fit entirely in VRAM
+    // This is the CPU-GPU hybrid scenario
+    Ok(ModelSupportStatus::Yellow)
+}
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/mod.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/mod.rs
@ -1,3 +1,5 @@
 pub mod commands;
 pub mod helpers;
 pub mod types;
+pub mod utils;
+pub mod model_planner;
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/model_planner.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/model_planner.rs
@ -0,0 +1,318 @@
+use crate::gguf::commands::get_model_size;
+use crate::gguf::utils::estimate_kv_cache_internal;
+use crate::gguf::utils::read_gguf_metadata_internal;
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use tauri::Runtime;
+use tauri_plugin_hardware::get_system_info;
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "camelCase")]
+pub struct ModelPlan {
+    pub gpu_layers: u64,
+    pub max_context_length: u64,
+    pub no_offload_kv_cache: bool,
+    pub offload_mmproj: bool,
+    pub batch_size: u64,
+    pub mode: ModelMode,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
+#[serde(rename_all = "UPPERCASE")]
+pub enum ModelMode {
+    GPU,
+    Hybrid,
+    CPU,
+    Unsupported,
+}
+
+#[tauri::command]
+pub async fn plan_model_load<R: Runtime>(
+    path: String,
+    memory_mode: String,
+    mmproj_path: Option<String>,
+    requested_ctx: Option<u64>,
+    app: tauri::AppHandle<R>,
+) -> Result<ModelPlan, String> {
+    let model_size = get_model_size(path.clone()).await?;
+    let sys_info = get_system_info(app.clone());
+    let gguf = read_gguf_metadata_internal(path.clone()).await?;
+
+    let mut mmproj_size: u64 = 0;
+    if let Some(ref mmproj) = mmproj_path {
+        mmproj_size = get_model_size(mmproj.clone()).await?;
+    }
+
+    let arch = gguf
+        .metadata
+        .get("general.architecture")
+        .ok_or("Missing architecture")?;
+    let repeating_layers: u64 = gguf
+        .metadata
+        .get(&format!("{arch}.block_count"))
+        .ok_or("Missing block_count")?
+        .parse()
+        .map_err(|_| "Invalid block_count")?;
+    let total_layers = repeating_layers + 1;
+    let layer_size = model_size / total_layers;
+
+    let kv_cache = estimate_kv_cache_internal(gguf.metadata.clone(), None)
+        .await
+        .map_err(|e| e.to_string())?;
+    let kv_cache_per_token = kv_cache.per_token_size;
+
+    if model_size == 0 || layer_size == 0 || kv_cache_per_token == 0 {
+        return Err("Invalid model/layer/cache sizes".into());
+    }
+
+    const RESERVE_BYTES: u64 = 2288490189;
+    const MIN_CONTEXT_LENGTH: u64 = 2048;
+
+    let model_max_ctx: u64 = gguf
+        .metadata
+        .get(&format!("{arch}.context_length"))
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(8192);
+
+    let memory_percentages = HashMap::from([("high", 0.7), ("medium", 0.5), ("low", 0.4)]);
+
+    let multiplier = *memory_percentages
+        .get(memory_mode.as_str())
+        .ok_or("Invalid memory mode")?;
+
+    log::info!("Got GPUs:\n{:?}", &sys_info.gpus);
+
+    let total_ram: u64 = sys_info.total_memory * 1024 * 1024;
+    log::info!(
+        "Total system memory reported from tauri_plugin_hardware(in bytes): {}",
+        &total_ram
+    );
+
+    let total_vram: u64 = if sys_info.gpus.is_empty() {
+        // On macOS with unified memory, GPU info may be empty
+        // Use total RAM as VRAM since memory is shared
+        log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
+        total_ram
+    } else {
+        sys_info
+            .gpus
+            .iter()
+            .map(|g| g.total_memory * 1024 * 1024)
+            .sum::<u64>()
+    };
+
+    log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
+    let usable_vram: u64 = if total_vram > RESERVE_BYTES {
+        (((total_vram - RESERVE_BYTES) as f64) * multiplier) as u64
+    } else {
+        0
+    };
+    log::info!("Usable vram calculated: {}", &usable_vram);
+
+    let usable_ram: u64 = if total_ram > RESERVE_BYTES {
+        (((total_ram - RESERVE_BYTES) as f64) * multiplier).max(0.0) as u64
+    } else {
+        0
+    };
+    log::info!("Usable ram calculated (in bytes): {}", &usable_ram);
+
+    let mut gpu_layers = 0;
+    let mut max_ctx_len = 0;
+    let mut no_offload_kv_cache = false;
+    let mut mode = ModelMode::Unsupported;
+    let mut offload_mmproj = false;
+    let mut batch_size = 2048;
+
+    let total_available_mem = usable_vram.saturating_add(usable_ram);
+    if model_size + mmproj_size > total_available_mem {
+        log::info!("Model not supported in this system!");
+        return Ok(ModelPlan {
+            gpu_layers: 0,
+            max_context_length: 0,
+            no_offload_kv_cache: true,
+            batch_size: 64,
+            mode: ModelMode::Unsupported,
+            offload_mmproj: false,
+        });
+    }
+    if mmproj_size > 0 {
+        offload_mmproj = true;
+    }
+
+    let kv_min_size = estimate_kv_cache_internal(gguf.metadata.clone(), Some(MIN_CONTEXT_LENGTH))
+        .await
+        .map_err(|e| e.to_string())?
+        .size;
+
+    if model_size + kv_min_size + mmproj_size <= usable_vram {
+        log::info!("Planning mode: Full GPU offload is possible.");
+        mode = ModelMode::GPU;
+        gpu_layers = total_layers;
+        let vram_left_for_ctx = usable_vram.saturating_sub(model_size);
+        let max_ctx_by_vram = (vram_left_for_ctx / kv_cache_per_token) as u64;
+        let requested_target = requested_ctx.unwrap_or(model_max_ctx).min(model_max_ctx);
+        max_ctx_len = requested_target.min(max_ctx_by_vram);
+        no_offload_kv_cache = false;
+        offload_mmproj = true;
+    } else {
+        let mut found_plan = false;
+
+        log::info!("Attempting VRAM-Maximized Hybrid plan (KV cache in VRAM only).");
+        for candidate_gpu_layers in (0..=total_layers).rev() {
+            let vram_used_by_layers = candidate_gpu_layers.saturating_mul(layer_size);
+            if vram_used_by_layers > usable_vram {
+                continue;
+            }
+
+            let ram_used_by_cpu_layers =
+                (total_layers.saturating_sub(candidate_gpu_layers)).saturating_mul(layer_size);
+            let ram_used_by_mmproj = if offload_mmproj { 0 } else { mmproj_size };
+            let required_ram_for_model = ram_used_by_cpu_layers.saturating_add(ram_used_by_mmproj);
+
+            if required_ram_for_model > usable_ram {
+                continue;
+            }
+
+            let vram_left_for_kv = usable_vram.saturating_sub(vram_used_by_layers);
+            let ctx_in_vram_only = (vram_left_for_kv / kv_cache_per_token) as u64;
+
+            if ctx_in_vram_only >= MIN_CONTEXT_LENGTH {
+                log::info!(
+                    "Found VRAM-Maximized Hybrid plan with {} GPU layers.",
+                    candidate_gpu_layers
+                );
+                mode = ModelMode::Hybrid;
+                gpu_layers = candidate_gpu_layers;
+                let requested_target = requested_ctx.unwrap_or(model_max_ctx).min(model_max_ctx);
+                max_ctx_len = requested_target.min(ctx_in_vram_only);
+                no_offload_kv_cache = false;
+                found_plan = true;
+                break;
+            }
+        }
+
+        if !found_plan {
+            log::info!("VRAM-Maximized plan not feasible. Falling back to Standard Hybrid (KV cache in VRAM+RAM).");
+            for candidate_gpu_layers in (0..=total_layers).rev() {
+                let vram_used_by_layers = candidate_gpu_layers.saturating_mul(layer_size);
+                if vram_used_by_layers > usable_vram {
+                    continue;
+                }
+                let vram_left_for_kv = usable_vram.saturating_sub(vram_used_by_layers);
+                let kv_in_vram = (vram_left_for_kv / kv_cache_per_token) as u64;
+
+                let ram_used_by_cpu_layers =
+                    (total_layers.saturating_sub(candidate_gpu_layers)).saturating_mul(layer_size);
+                let ram_used_by_mmproj = if offload_mmproj { 0 } else { mmproj_size };
+                let required_ram_for_model =
+                    ram_used_by_cpu_layers.saturating_add(ram_used_by_mmproj);
+
+                if required_ram_for_model > usable_ram {
+                    continue;
+                }
+
+                let available_ram_for_kv = usable_ram.saturating_sub(required_ram_for_model);
+                let kv_in_ram = (available_ram_for_kv / kv_cache_per_token) as u64;
+
+                let total_kv_tokens = kv_in_vram.saturating_add(kv_in_ram);
+
+                if total_kv_tokens >= MIN_CONTEXT_LENGTH {
+                    log::info!(
+                        "Found Standard Hybrid plan with {} GPU layers.",
+                        candidate_gpu_layers
+                    );
+                    mode = if candidate_gpu_layers > 0 {
+                        ModelMode::Hybrid
+                    } else {
+                        ModelMode::CPU
+                    };
+                    gpu_layers = candidate_gpu_layers;
+                    let requested_target =
+                        requested_ctx.unwrap_or(model_max_ctx).min(model_max_ctx);
+                    let max_possible_ctx = total_kv_tokens.min(model_max_ctx);
+                    max_ctx_len = requested_target.min(max_possible_ctx);
+                    no_offload_kv_cache = kv_in_ram > 0 && kv_in_vram == 0;
+                    found_plan = true;
+                    break;
+                }
+            }
+        }
+
+        if !found_plan {
+            log::info!("No hybrid plan found. Attempting CPU-only plan.");
+            if model_size + mmproj_size <= usable_ram {
+                let available_ram_for_kv = usable_ram.saturating_sub(model_size + mmproj_size);
+                let kv_tokens = (available_ram_for_kv / kv_cache_per_token) as u64;
+                if kv_tokens >= MIN_CONTEXT_LENGTH {
+                    mode = ModelMode::CPU;
+                    gpu_layers = 0;
+                    max_ctx_len = kv_tokens
+                        .min(requested_ctx.unwrap_or(model_max_ctx))
+                        .min(model_max_ctx);
+                    no_offload_kv_cache = true;
+                    offload_mmproj = false;
+                }
+            }
+        }
+    }
+
+    if let Some(req) = requested_ctx {
+        if req > 0 {
+            max_ctx_len = max_ctx_len.min(req);
+        }
+    }
+    max_ctx_len = max_ctx_len.min(model_max_ctx);
+
+    if max_ctx_len > 0 {
+        log::info!("Max context before power-of-2 adjustment: {}", max_ctx_len);
+        max_ctx_len = 1u64 << (63 - max_ctx_len.leading_zeros());
+        log::info!("Adjusted max context to power of 2: {}", max_ctx_len);
+    }
+
+    if mode == ModelMode::Unsupported {
+        if max_ctx_len >= MIN_CONTEXT_LENGTH {
+            // do nothing, plan is viable but wasn't assigned a mode
+        } else {
+            gpu_layers = 0;
+            max_ctx_len = 0;
+            offload_mmproj = false;
+        }
+    } else if max_ctx_len < MIN_CONTEXT_LENGTH {
+        log::info!(
+            "Final context length {} is less than minimum required {}. Marking as unsupported.",
+            max_ctx_len,
+            MIN_CONTEXT_LENGTH
+        );
+        mode = ModelMode::Unsupported;
+        gpu_layers = 0;
+        max_ctx_len = 0;
+        offload_mmproj = false;
+    }
+
+    if mode == ModelMode::Hybrid {
+        batch_size = 256;
+    } else if mode == ModelMode::CPU || no_offload_kv_cache || mode == ModelMode::Unsupported {
+        batch_size = 64;
+    }
+
+    if max_ctx_len > 0 {
+        batch_size = batch_size.min(max_ctx_len);
+    } else {
+        batch_size = 64;
+    }
+
+    if mode == ModelMode::CPU || no_offload_kv_cache {
+        offload_mmproj = false;
+    }
+
+    log::info!("Planned model load params: GPU Layers: {}, max_ctx_len: {}, kv_cache offload: {}, offload mmproj: {}, batch_size: {}",
+        gpu_layers, max_ctx_len, !no_offload_kv_cache, offload_mmproj, batch_size);
+    Ok(ModelPlan {
+        gpu_layers,
+        max_context_length: max_ctx_len,
+        no_offload_kv_cache,
+        offload_mmproj,
+        batch_size,
+        mode,
+    })
+}
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/types.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/types.rs
@ -1,4 +1,4 @@
-use serde::Serialize;
+use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::convert::TryFrom;
 use std::io;
@ -52,3 +52,42 @@ pub struct GgufMetadata {
    pub tensor_count: u64,
    pub metadata: HashMap<String, String>,
 }
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct KVCacheEstimate {
+    pub size: u64,
+    pub per_token_size: u64,
+}
+#[derive(Debug, thiserror::Error)]
+pub enum KVCacheError {
+    #[error("Invalid metadata: architecture not found")]
+    ArchitectureNotFound,
+    #[error("Invalid metadata: block_count not found or invalid")]
+    BlockCountInvalid,
+    #[error("Invalid metadata: head_count not found or invalid")]
+    HeadCountInvalid,
+    #[error("Invalid metadata: embedding_length not found or invalid")]
+    EmbeddingLengthInvalid,
+    #[error("Invalid metadata: context_length not found or invalid")]
+    ContextLengthInvalid,
+}
+
+impl serde::Serialize for KVCacheError {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.serialize_str(&self.to_string())
+    }
+}
+
+
+#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize)]
+pub enum ModelSupportStatus {
+    #[serde(rename = "RED")]
+    Red,
+    #[serde(rename = "YELLOW")]
+    Yellow,
+    #[serde(rename = "GREEN")]
+    Green,
+}
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/utils.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/utils.rs
@ -0,0 +1,164 @@
+use crate::gguf::helpers;
+use crate::gguf::types::{GgufMetadata, KVCacheError, KVCacheEstimate};
+use std::collections::HashMap;
+use std::fs::File;
+use std::io::BufReader;
+
+// read gguf metadata
+pub async fn read_gguf_metadata_internal(path: String) -> Result<GgufMetadata, String> {
+    if path.starts_with("http://") || path.starts_with("https://") {
+        // Remote: read in 2MB chunks until successful
+        let client = reqwest::Client::new();
+        let chunk_size = 2 * 1024 * 1024; // Fixed 2MB chunks
+        let max_total_size = 120 * 1024 * 1024; // Don't exceed 120MB total
+        let mut total_downloaded = 0;
+        let mut accumulated_data = Vec::new();
+
+        while total_downloaded < max_total_size {
+            let start = total_downloaded;
+            let end = std::cmp::min(start + chunk_size - 1, max_total_size - 1);
+
+            let resp = client
+                .get(&path)
+                .header("Range", format!("bytes={}-{}", start, end))
+                .send()
+                .await
+                .map_err(|e| format!("Failed to fetch chunk {}-{}: {}", start, end, e))?;
+
+            let chunk_data = resp
+                .bytes()
+                .await
+                .map_err(|e| format!("Failed to read chunk response: {}", e))?;
+
+            accumulated_data.extend_from_slice(&chunk_data);
+            total_downloaded += chunk_data.len();
+
+            // Try parsing after each chunk
+            let cursor = std::io::Cursor::new(&accumulated_data);
+            if let Ok(metadata) = helpers::read_gguf_metadata(cursor) {
+                return Ok(metadata);
+            }
+
+            // If we got less data than expected, we've reached EOF
+            if chunk_data.len() < chunk_size {
+                break;
+            }
+        }
+        Err("Could not parse GGUF metadata from downloaded data".to_string())
+    } else {
+        // Local: use streaming file reader
+        let file =
+            File::open(&path).map_err(|e| format!("Failed to open local file {}: {}", path, e))?;
+        let reader = BufReader::new(file);
+
+        helpers::read_gguf_metadata(reader)
+            .map_err(|e| format!("Failed to parse GGUF metadata: {}", e))
+    }
+}
+
+/// Estimate KVCache size from a given metadata
+pub async fn estimate_kv_cache_internal(
+    meta: HashMap<String, String>,
+    ctx_size: Option<u64>,
+) -> Result<KVCacheEstimate, KVCacheError> {
+    log::info!("Received ctx_size parameter: {:?}", ctx_size);
+    let arch = meta
+        .get("general.architecture")
+        .ok_or(KVCacheError::ArchitectureNotFound)?;
+
+    // Number of layers
+    let n_layer_key = format!("{}.block_count", arch);
+    let n_layer = meta
+        .get(&n_layer_key)
+        .and_then(|s| s.parse::<u64>().ok())
+        .filter(|&n| n > 0)
+        .ok_or(KVCacheError::BlockCountInvalid)?;
+
+    // Attention heads (use kv heads if present, else full heads)
+    let n_head_key = format!("{}.attention.head_count", arch);
+    let n_head_kv_key = format!("{}.attention.head_count_kv", arch);
+    let n_head = meta
+        .get(&n_head_kv_key)
+        .and_then(|s| s.parse::<u64>().ok())
+        .filter(|&n| n > 0)
+        .unwrap_or_else(|| {
+            meta.get(&n_head_key)
+                .and_then(|s| s.parse::<u64>().ok())
+                .unwrap_or(0)
+        });
+    if n_head == 0 {
+        return Err(KVCacheError::HeadCountInvalid);
+    }
+
+    // Key/value dimensions
+    let key_len_key = format!("{}.attention.key_length", arch);
+    let val_len_key = format!("{}.attention.value_length", arch);
+
+    let key_len = meta
+        .get(&key_len_key)
+        .and_then(|s| s.parse::<u64>().ok())
+        .unwrap_or(0);
+    let val_len = meta
+        .get(&val_len_key)
+        .and_then(|s| s.parse::<u64>().ok())
+        .unwrap_or(0);
+
+    if key_len == 0 || val_len == 0 {
+        return Err(KVCacheError::EmbeddingLengthInvalid);
+    }
+
+    // Context length
+    let max_ctx_key = format!("{}.context_length", arch);
+    let max_ctx = meta
+        .get(&max_ctx_key)
+        .and_then(|s| s.parse::<u64>().ok())
+        .filter(|&n| n > 0)
+        .ok_or(KVCacheError::ContextLengthInvalid)?;
+    let ctx_len = ctx_size.map(|size| size.min(max_ctx)).unwrap_or(max_ctx);
+
+    // Sliding window if present
+    let sliding_key = format!("{}.attention.sliding_window", arch);
+    let sliding_window = meta
+        .get(&sliding_key)
+        .and_then(|s| s.parse::<u64>().ok())
+        .filter(|&n| n > 0);
+
+    // Assume fp16
+    const BYTES_PER_ELEMENT: u64 = 2;
+
+    // Per-token KV size
+    let kv_per_token = n_layer * n_head * (key_len + val_len) * BYTES_PER_ELEMENT;
+
+    // Pure full-attention cost
+    let full_cost = ctx_len * kv_per_token;
+
+    // Pure sliding-window cost (tiny, only keeps last W tokens)
+    let sliding_cost = sliding_window.map(|w| w * kv_per_token);
+
+    // Middle estimate: average of sliding + full if sliding_window is present
+    let chosen_size = if let Some(slide) = sliding_cost {
+        let middle = (full_cost + slide) / 2;
+        log::info!(
+            "KV estimates -> sliding: {} bytes (~{:.2} MB), full: {} bytes (~{:.2} MB), middle: {} bytes (~{:.2} MB)",
+            slide,
+            slide as f64 / (1024.0 * 1024.0),
+            full_cost,
+            full_cost as f64 / (1024.0 * 1024.0),
+            middle,
+            middle as f64 / (1024.0 * 1024.0)
+        );
+        middle
+    } else {
+        log::info!(
+            "KV estimate (no SWA detected) -> full: {} bytes (~{:.2} MB)",
+            full_cost,
+            full_cost as f64 / (1024.0 * 1024.0)
+        );
+        full_cost
+    };
+
+    Ok(KVCacheEstimate {
+        size: chosen_size,
+        per_token_size: kv_per_token,
+    })
+}
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/lib.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/lib.rs
@ -33,6 +33,10 @@ pub fn init<R: Runtime>() -> TauriPlugin<R> {
            commands::get_session_by_model,
            // GGUF commands
            gguf::commands::read_gguf_metadata,
+            gguf::commands::estimate_kv_cache_size,
+            gguf::commands::get_model_size,
+            gguf::commands::is_model_supported,
+            gguf::model_planner::plan_model_load
        ])
        .setup(|app, _api| {
            // Initialize and manage the plugin state
--- a/src-tauri/src/core/filesystem/commands.rs
+++ b/src-tauri/src/core/filesystem/commands.rs
@ -33,6 +33,22 @@ pub fn mkdir<R: Runtime>(app_handle: tauri::AppHandle<R>, args: Vec<String>) ->
    fs::create_dir_all(&path).map_err(|e| e.to_string())
 }

+#[tauri::command]
+pub fn mv<R: Runtime>(app_handle: tauri::AppHandle<R>, args: Vec<String>) -> Result<(), String> {
+    if args.len() < 2 || args[0].is_empty() || args[1].is_empty() {
+        return Err("mv error: Invalid argument - source and destination required".to_string());
+    }
+
+    let source = resolve_path(app_handle.clone(), &args[0]);
+    let destination = resolve_path(app_handle, &args[1]);
+
+    if !source.exists() {
+        return Err("mv error: Source path does not exist".to_string());
+    }
+
+    fs::rename(&source, &destination).map_err(|e| e.to_string())
+}
+
 #[tauri::command]
 pub fn join_path<R: Runtime>(
    app_handle: tauri::AppHandle<R>,
@ -193,7 +209,7 @@ pub fn decompress(app: tauri::AppHandle, path: &str, output_dir: &str) -> Result
            fs::File::open(&path_buf).map_err(|e| e.to_string())?
        }
    };
-    
+
    #[cfg(not(windows))]
    let file = fs::File::open(&path_buf).map_err(|e| e.to_string())?;
    if path.ends_with(".tar.gz") {
@ -222,7 +238,10 @@ pub fn decompress(app: tauri::AppHandle, path: &str, output_dir: &str) -> Result
                {
                    use std::os::unix::fs::PermissionsExt;
                    if let Some(mode) = entry.unix_mode() {
-                        let _ = std::fs::set_permissions(&outpath, std::fs::Permissions::from_mode(mode));
+                        let _ = std::fs::set_permissions(
+                            &outpath,
+                            std::fs::Permissions::from_mode(mode),
+                        );
                    }
                }
            }
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@ -8,7 +8,6 @@ use core::{
 };
 use jan_utils::generate_app_token;
 use std::{collections::HashMap, sync::Arc};
-use tauri_plugin_deep_link::DeepLinkExt;
 use tauri::{Emitter, Manager, RunEvent};
 use tauri_plugin_llamacpp::cleanup_llama_processes;
 use tokio::sync::Mutex;
@ -54,6 +53,7 @@ pub fn run() {
            core::filesystem::commands::readdir_sync,
            core::filesystem::commands::read_file_sync,
            core::filesystem::commands::rm,
+            core::filesystem::commands::mv,
            core::filesystem::commands::file_stat,
            core::filesystem::commands::write_file_sync,
            core::filesystem::commands::write_yaml,
@ -163,6 +163,8 @@ pub fn run() {

            #[cfg(any(windows, target_os = "linux"))]
            {
+                use tauri_plugin_deep_link::DeepLinkExt;
+
                app.deep_link().register_all()?;
            }
            setup_mcp(app);
--- a/src-tauri/tauri.bundle.windows.nsis.template
+++ b/src-tauri/tauri.bundle.windows.nsis.template
--- a/src-tauri/tauri.windows.conf.json
+++ b/src-tauri/tauri.windows.conf.json
@ -1,10 +1,13 @@
 {
  "bundle": {
    "targets": ["nsis"],
-    "resources": ["resources/pre-install/**/*"],
+    "resources": ["resources/pre-install/**/*", "resources/lib/vulkan-1.dll", "resources/lib/vc_redist.x64.exe", "resources/LICENSE"],
    "externalBin": ["resources/bin/bun", "resources/bin/uv"],
    "windows": {
-      "signCommand": "powershell -ExecutionPolicy Bypass -File ./sign.ps1 %1",
+      "nsis": {
+        "installerHooks": "./windows/hooks.nsh",
+        "installerIcon": "icons/icon.ico"
+      },
      "webviewInstallMode": {
        "silent": true,
        "type": "downloadBootstrapper"
--- a/src-tauri/windows/hooks.nsh
+++ b/src-tauri/windows/hooks.nsh
@ -0,0 +1,65 @@
+!macro NSIS_HOOK_POSTINSTALL
+  ; Check if Visual C++ Redistributable is already installed
+  ReadRegStr $0 HKLM "SOFTWARE\Microsoft\VisualStudio\14.0\VC\Runtimes\x64" "Version"
+  ${If} $0 == ""
+    ; Try alternative registry location
+    ReadRegStr $0 HKLM "SOFTWARE\WOW6432Node\Microsoft\VisualStudio\14.0\VC\Runtimes\x64" "Version"
+  ${EndIf}
+
+  ${If} $0 == ""
+    ; VC++ Redistributable not found, need to install
+    DetailPrint "Visual C++ Redistributable not found, installing from bundled file..."
+
+    ; Install from bundled EXE if not installed
+    ${If} ${FileExists} "$INSTDIR\resources\lib\vc_redist.x64.exe"
+      DetailPrint "Installing Visual C++ Redistributable..."
+      ; Copy to TEMP folder and then execute installer
+      CopyFiles "$INSTDIR\resources\lib\vc_redist.x64.exe" "$TEMP\vc_redist.x64.exe"
+      ExecWait '"$TEMP\vc_redist.x64.exe" /quiet /norestart' $1
+
+      ; Check whether installation process exited successfully (code 0) or not
+      ${If} $1 == 0
+        DetailPrint "Visual C++ Redistributable installed successfully"
+      ${ElseIf} $1 == 1638
+        DetailPrint "Visual C++ Redistributable already installed (newer version)"
+      ${ElseIf} $1 == 3010
+        DetailPrint "Visual C++ Redistributable installed successfully (restart required)"
+      ${Else}
+        DetailPrint "Visual C++ installation failed with exit code: $1"
+      ${EndIf}
+
+      ; Clean up setup files from TEMP and your installed app
+      Delete "$TEMP\vc_redist.x64.exe"
+      Delete "$INSTDIR\resources\lib\vc_redist.x64.exe"
+    ${Else}
+      DetailPrint "Visual C++ Redistributable not found at expected location: $INSTDIR\resources\lib\vc_redist.x64.exe"
+    ${EndIf}
+  ${Else}
+    DetailPrint "Visual C++ Redistributable already installed (version: $0)"
+  ${EndIf}
+
+  ; ---- Copy LICENSE to install root ----
+  ${If} ${FileExists} "$INSTDIR\resources\LICENSE"
+    CopyFiles /SILENT "$INSTDIR\resources\LICENSE" "$INSTDIR\LICENSE"
+    DetailPrint "Copied LICENSE to install root"
+
+    ; Optional cleanup - remove from resources folder
+    Delete "$INSTDIR\resources\LICENSE"
+  ${Else}
+    DetailPrint "LICENSE not found at expected location: $INSTDIR\resources\LICENSE"
+  ${EndIf}
+
+  ; ---- Copy vulkan-1.dll to install root ----
+  ${If} ${FileExists} "$INSTDIR\resources\lib\vulkan-1.dll"
+    CopyFiles /SILENT "$INSTDIR\resources\lib\vulkan-1.dll" "$INSTDIR\vulkan-1.dll"
+    DetailPrint "Copied vulkan-1.dll to install root"
+    
+    ; Optional cleanup - remove from resources folder
+    Delete "$INSTDIR\resources\lib\vulkan-1.dll"
+
+    ; Only remove the lib directory if it's empty after removing both files
+    RMDir "$INSTDIR\resources\lib"
+  ${Else}
+    DetailPrint "vulkan-1.dll not found at expected location: $INSTDIR\resources\lib\vulkan-1.dll"
+  ${EndIf}
+!macroend
--- a/web-app/package.json
+++ b/web-app/package.json
@ -18,112 +18,112 @@
    "build:serve:web": "yarn build:web && yarn serve:web"
  },
  "dependencies": {
-    "@dnd-kit/core": "^6.3.1",
-    "@dnd-kit/modifiers": "^9.0.0",
-    "@dnd-kit/sortable": "^10.0.0",
+    "@dnd-kit/core": "6.3.1",
+    "@dnd-kit/modifiers": "9.0.0",
+    "@dnd-kit/sortable": "10.0.0",
    "@jan/extensions-web": "link:../extensions-web",
    "@janhq/core": "link:../core",
-    "@radix-ui/react-accordion": "^1.2.10",
-    "@radix-ui/react-avatar": "^1.1.10",
-    "@radix-ui/react-dialog": "^1.1.14",
-    "@radix-ui/react-dropdown-menu": "^2.1.15",
-    "@radix-ui/react-hover-card": "^1.1.14",
-    "@radix-ui/react-popover": "^1.1.13",
-    "@radix-ui/react-progress": "^1.1.4",
-    "@radix-ui/react-radio-group": "^1.3.7",
-    "@radix-ui/react-slider": "^1.3.2",
-    "@radix-ui/react-slot": "^1.2.0",
-    "@radix-ui/react-switch": "^1.2.2",
-    "@radix-ui/react-tooltip": "^1.2.4",
-    "@tabler/icons-react": "^3.33.0",
-    "@tailwindcss/vite": "^4.1.4",
-    "@tanstack/react-router": "^1.116.0",
-    "@tanstack/react-router-devtools": "^1.121.34",
-    "@tanstack/react-virtual": "^3.13.12",
-    "@tauri-apps/api": "^2.8.0",
+    "@radix-ui/react-accordion": "1.2.11",
+    "@radix-ui/react-avatar": "1.1.10",
+    "@radix-ui/react-dialog": "1.1.15",
+    "@radix-ui/react-dropdown-menu": "2.1.16",
+    "@radix-ui/react-hover-card": "1.1.14",
+    "@radix-ui/react-popover": "1.1.14",
+    "@radix-ui/react-progress": "1.1.4",
+    "@radix-ui/react-radio-group": "1.3.8",
+    "@radix-ui/react-slider": "1.3.2",
+    "@radix-ui/react-slot": "1.2.0",
+    "@radix-ui/react-switch": "1.2.2",
+    "@radix-ui/react-tooltip": "1.2.4",
+    "@tabler/icons-react": "3.34.0",
+    "@tailwindcss/vite": "4.1.4",
+    "@tanstack/react-router": "1.117.0",
+    "@tanstack/react-router-devtools": "1.121.34",
+    "@tanstack/react-virtual": "3.13.12",
+    "@tauri-apps/api": "2.8.0",
    "@tauri-apps/plugin-deep-link": "2.4.3",
-    "@tauri-apps/plugin-dialog": "^2.2.1",
-    "@tauri-apps/plugin-http": "^2.2.1",
-    "@tauri-apps/plugin-opener": "^2.2.7",
-    "@tauri-apps/plugin-os": "^2.2.1",
-    "@tauri-apps/plugin-updater": "^2.7.1",
-    "@types/react-syntax-highlighter": "^15.5.13",
-    "@types/uuid": "^10.0.0",
-    "@uiw/react-textarea-code-editor": "^3.1.1",
-    "class-variance-authority": "^0.7.1",
-    "culori": "^4.0.1",
-    "emoji-picker-react": "^4.12.2",
-    "framer-motion": "^12.23.12",
-    "fuse.js": "^7.1.0",
-    "fzf": "^0.5.2",
-    "i18next": "^25.0.1",
-    "katex": "^0.16.22",
-    "lodash.clonedeep": "^4.5.0",
-    "lodash.debounce": "^4.0.8",
-    "lucide-react": "^0.536.0",
-    "motion": "^12.10.5",
-    "next-themes": "^0.4.6",
-    "posthog-js": "^1.246.0",
-    "react": "^19.0.0",
-    "react-colorful": "^5.6.1",
-    "react-dom": "^19.0.0",
-    "react-i18next": "^15.5.1",
-    "react-joyride": "^2.9.3",
-    "react-markdown": "^10.1.0",
-    "react-resizable-panels": "^3.0.3",
-    "react-syntax-highlighter": "^15.6.1",
-    "react-syntax-highlighter-virtualized-renderer": "^1.1.0",
-    "react-textarea-autosize": "^8.5.9",
-    "rehype-katex": "^7.0.1",
-    "rehype-raw": "^7.0.0",
-    "remark-breaks": "^4.0.0",
-    "remark-emoji": "^5.0.1",
-    "remark-gfm": "^4.0.1",
-    "remark-math": "^6.0.0",
-    "sonner": "^2.0.3",
-    "tailwindcss": "^4.1.4",
+    "@tauri-apps/plugin-dialog": "2.2.2",
+    "@tauri-apps/plugin-http": "2.5.0",
+    "@tauri-apps/plugin-opener": "2.3.0",
+    "@tauri-apps/plugin-os": "2.2.1",
+    "@tauri-apps/plugin-updater": "2.8.1",
+    "@types/react-syntax-highlighter": "15.5.13",
+    "@types/uuid": "10.0.0",
+    "@uiw/react-textarea-code-editor": "3.1.1",
+    "class-variance-authority": "0.7.1",
+    "culori": "4.0.1",
+    "emoji-picker-react": "4.12.2",
+    "framer-motion": "12.23.12",
+    "fuse.js": "7.1.0",
+    "fzf": "0.5.2",
+    "i18next": "25.0.2",
+    "katex": "0.16.22",
+    "lodash.clonedeep": "4.5.0",
+    "lodash.debounce": "4.0.8",
+    "lucide-react": "0.536.0",
+    "motion": "12.18.1",
+    "next-themes": "0.4.6",
+    "posthog-js": "1.255.1",
+    "react": "19.0.0",
+    "react-colorful": "5.6.1",
+    "react-dom": "19.0.0",
+    "react-i18next": "15.5.1",
+    "react-joyride": "2.9.3",
+    "react-markdown": "10.1.0",
+    "react-resizable-panels": "3.0.5",
+    "react-syntax-highlighter": "15.6.1",
+    "react-syntax-highlighter-virtualized-renderer": "1.1.0",
+    "react-textarea-autosize": "8.5.9",
+    "rehype-katex": "7.0.1",
+    "rehype-raw": "7.0.0",
+    "remark-breaks": "4.0.0",
+    "remark-emoji": "5.0.1",
+    "remark-gfm": "4.0.1",
+    "remark-math": "6.0.0",
+    "sonner": "2.0.5",
+    "tailwindcss": "4.1.4",
    "token.js": "npm:token.js-fork@0.7.27",
-    "tw-animate-css": "^1.2.7",
-    "ulidx": "^2.4.1",
-    "unified": "^11.0.5",
-    "uuid": "^11.1.0",
-    "vaul": "^1.1.2",
-    "zustand": "^5.0.3"
+    "tw-animate-css": "1.2.8",
+    "ulidx": "2.4.1",
+    "unified": "11.0.5",
+    "uuid": "11.1.0",
+    "vaul": "1.1.2",
+    "zustand": "5.0.3"
  },
  "devDependencies": {
-    "@eslint/js": "^9.22.0",
-    "@tanstack/router-plugin": "^1.116.1",
-    "@testing-library/dom": "^10.4.0",
-    "@testing-library/jest-dom": "^6.6.3",
-    "@testing-library/react": "^16.3.0",
-    "@testing-library/user-event": "^14.6.1",
-    "@types/culori": "^2.1.1",
-    "@types/istanbul-lib-report": "^3",
-    "@types/istanbul-reports": "^3",
-    "@types/lodash.clonedeep": "^4",
-    "@types/lodash.debounce": "^4",
-    "@types/node": "^22.14.1",
-    "@types/react": "^19.0.10",
-    "@types/react-dom": "^19.0.4",
-    "@vitejs/plugin-react": "^4.3.4",
+    "@eslint/js": "8.57.0",
+    "@tanstack/router-plugin": "1.117.0",
+    "@testing-library/dom": "10.4.1",
+    "@testing-library/jest-dom": "6.8.0",
+    "@testing-library/react": "16.3.0",
+    "@testing-library/user-event": "14.6.1",
+    "@types/culori": "2.1.1",
+    "@types/istanbul-lib-report": "3.0.3",
+    "@types/istanbul-reports": "3.0.4",
+    "@types/lodash.clonedeep": "4.5.9",
+    "@types/lodash.debounce": "4.0.9",
+    "@types/node": "22.14.1",
+    "@types/react": "19.1.2",
+    "@types/react-dom": "19.1.2",
+    "@vitejs/plugin-react": "4.4.1",
    "@vitest/coverage-v8": "3.2.4",
-    "clsx": "^2.1.1",
-    "eslint": "^9.22.0",
-    "eslint-plugin-react-hooks": "^5.2.0",
-    "eslint-plugin-react-refresh": "^0.4.19",
-    "globals": "^16.0.0",
-    "istanbul-api": "^3.0.0",
-    "istanbul-lib-coverage": "^3.2.2",
-    "istanbul-lib-report": "^3.0.1",
-    "istanbul-reports": "^3.1.7",
-    "jsdom": "^26.1.0",
-    "serve": "^14.2.4",
-    "tailwind-merge": "^3.3.1",
-    "typescript": "~5.8.3",
-    "typescript-eslint": "^8.26.1",
-    "vite": "^6.3.0",
-    "vite-plugin-node-polyfills": "^0.23.0",
-    "vite-plugin-pwa": "^1.0.3",
-    "vitest": "^3.1.3"
+    "clsx": "2.1.1",
+    "eslint": "9.25.1",
+    "eslint-plugin-react-hooks": "5.2.0",
+    "eslint-plugin-react-refresh": "0.4.20",
+    "globals": "16.0.0",
+    "istanbul-api": "3.0.0",
+    "istanbul-lib-coverage": "2.0.5",
+    "istanbul-lib-report": "2.0.8",
+    "istanbul-reports": "3.1.7",
+    "jsdom": "26.1.0",
+    "serve": "14.2.5",
+    "tailwind-merge": "3.3.1",
+    "typescript": "5.9.2",
+    "typescript-eslint": "8.31.0",
+    "vite": "6.3.2",
+    "vite-plugin-node-polyfills": "0.23.0",
+    "vite-plugin-pwa": "1.0.3",
+    "vitest": "3.2.4"
  }
 }
--- a/web-app/src/components/ui/tests/dialog.test.tsx
+++ b/web-app/src/components/ui/tests/dialog.test.tsx
@ -2,6 +2,7 @@ import { render, screen, fireEvent } from '@testing-library/react'
 import { describe, it, expect, vi } from 'vitest'
 import userEvent from '@testing-library/user-event'
 import React from 'react'
+import '@testing-library/jest-dom'
 import {
  Dialog,
  DialogTrigger,
@ -117,7 +118,7 @@ describe('Dialog Components', () => {

  it('applies proper classes to dialog content', async () => {
    const user = userEvent.setup()
-    
+
    render(
      <Dialog>
        <DialogTrigger>Open Dialog</DialogTrigger>
@ -128,27 +129,38 @@ describe('Dialog Components', () => {
        </DialogContent>
      </Dialog>
    )
-    
+
    await user.click(screen.getByText('Open Dialog'))
-    
+
    const dialogContent = screen.getByRole('dialog')
    expect(dialogContent).toHaveClass(
      'bg-main-view',
+      'max-h-[calc(100%-80px)]',
+      'overflow-auto',
+      'border-main-view-fg/10',
+      'text-main-view-fg',
      'fixed',
      'top-[50%]',
      'left-[50%]',
-      'z-50',
+      'z-[90]',
+      'grid',
+      'w-full',
+      'max-w-[calc(100%-2rem)]',
      'translate-x-[-50%]',
      'translate-y-[-50%]',
-      'border',
+      'gap-4',
      'rounded-lg',
-      'shadow-lg'
+      'border',
+      'p-6',
+      'shadow-lg',
+      'duration-200',
+      'sm:max-w-lg'
    )
  })

  it('applies proper classes to dialog header', async () => {
    const user = userEvent.setup()
-    
+
    render(
      <Dialog>
        <DialogTrigger>Open Dialog</DialogTrigger>
@ -159,11 +171,11 @@ describe('Dialog Components', () => {
        </DialogContent>
      </Dialog>
    )
-    
+
    await user.click(screen.getByText('Open Dialog'))
-    
+
    const dialogHeader = screen.getByText('Dialog Title').closest('div')
-    expect(dialogHeader).toHaveClass('flex', 'flex-col', 'gap-2', 'text-center')
+    expect(dialogHeader).toHaveClass('flex', 'flex-col', 'gap-2', 'text-center', 'sm:text-left')
  })

  it('applies proper classes to dialog title', async () => {
@ -299,7 +311,7 @@ describe('Dialog Components', () => {
  it('supports onOpenChange callback', async () => {
    const onOpenChange = vi.fn()
    const user = userEvent.setup()
-    
+
    render(
      <Dialog onOpenChange={onOpenChange}>
        <DialogTrigger>Open Dialog</DialogTrigger>
@ -310,9 +322,98 @@ describe('Dialog Components', () => {
        </DialogContent>
      </Dialog>
    )
-    
+
    await user.click(screen.getByText('Open Dialog'))
-    
+
    expect(onOpenChange).toHaveBeenCalledWith(true)
  })
+
+  it('can hide close button when showCloseButton is false', async () => {
+    const user = userEvent.setup()
+
+    render(
+      <Dialog>
+        <DialogTrigger>Open Dialog</DialogTrigger>
+        <DialogContent showCloseButton={false}>
+          <DialogHeader>
+            <DialogTitle>Dialog Title</DialogTitle>
+          </DialogHeader>
+        </DialogContent>
+      </Dialog>
+    )
+
+    await user.click(screen.getByText('Open Dialog'))
+
+    expect(screen.queryByRole('button', { name: /close/i })).not.toBeInTheDocument()
+  })
+
+  it('shows close button by default', async () => {
+    const user = userEvent.setup()
+
+    render(
+      <Dialog>
+        <DialogTrigger>Open Dialog</DialogTrigger>
+        <DialogContent>
+          <DialogHeader>
+            <DialogTitle>Dialog Title</DialogTitle>
+          </DialogHeader>
+        </DialogContent>
+      </Dialog>
+    )
+
+    await user.click(screen.getByText('Open Dialog'))
+
+    expect(screen.getByRole('button', { name: /close/i })).toBeInTheDocument()
+  })
+
+  it('accepts aria-describedby prop', async () => {
+    const user = userEvent.setup()
+
+    render(
+      <Dialog>
+        <DialogTrigger>Open Dialog</DialogTrigger>
+        <DialogContent aria-describedby="custom-description">
+          <DialogHeader>
+            <DialogTitle>Dialog Title</DialogTitle>
+          </DialogHeader>
+          <p id="custom-description">Custom description text</p>
+        </DialogContent>
+      </Dialog>
+    )
+
+    await user.click(screen.getByText('Open Dialog'))
+
+    const dialogContent = screen.getByRole('dialog')
+    expect(dialogContent).toHaveAttribute('aria-describedby', 'custom-description')
+  })
+
+  it('applies data-slot attributes to components', async () => {
+    const user = userEvent.setup()
+
+    render(
+      <Dialog>
+        <DialogTrigger>Open Dialog</DialogTrigger>
+        <DialogContent>
+          <DialogHeader>
+            <DialogTitle>Dialog Title</DialogTitle>
+            <DialogDescription>Dialog description</DialogDescription>
+          </DialogHeader>
+          <div>Dialog body content</div>
+          <DialogFooter>
+            <button>Footer button</button>
+          </DialogFooter>
+        </DialogContent>
+      </Dialog>
+    )
+
+    expect(screen.getByText('Open Dialog')).toHaveAttribute('data-slot', 'dialog-trigger')
+
+    await user.click(screen.getByText('Open Dialog'))
+
+    expect(screen.getByRole('dialog')).toHaveAttribute('data-slot', 'dialog-content')
+    expect(screen.getByText('Dialog Title').closest('div')).toHaveAttribute('data-slot', 'dialog-header')
+    expect(screen.getByText('Dialog Title')).toHaveAttribute('data-slot', 'dialog-title')
+    expect(screen.getByText('Dialog description')).toHaveAttribute('data-slot', 'dialog-description')
+    expect(screen.getByText('Footer button').closest('div')).toHaveAttribute('data-slot', 'dialog-footer')
+  })
 })
--- a/web-app/src/components/ui/dialog.tsx
+++ b/web-app/src/components/ui/dialog.tsx
@ -37,7 +37,7 @@ function DialogOverlay({
    <DialogPrimitive.Overlay
      data-slot="dialog-overlay"
      className={cn(
-        'data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-50 bg-main-view/80 backdrop-blur-sm',
+        'data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-[80] bg-main-view/80 backdrop-blur-sm',
        className
      )}
      {...props}
@ -67,7 +67,7 @@ function DialogContent({
        data-slot="dialog-content"
        aria-describedby={ariaDescribedBy}
        className={cn(
-          'bg-main-view max-h-[calc(100%-80px)] overflow-auto border-main-view-fg/10 text-main-view-fg data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-50 grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg',
+          'bg-main-view max-h-[calc(100%-80px)] overflow-auto border-main-view-fg/10 text-main-view-fg data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-[90] grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg',
          className
        )}
        {...props}
--- a/web-app/src/constants/localStorage.ts
+++ b/web-app/src/constants/localStorage.ts
@ -21,4 +21,5 @@ export const localStorageKey = {
  lastUsedAssistant: 'last-used-assistant',
  favoriteModels: 'favorite-models',
  setupCompleted: 'setup-completed',
+  threadManagement: 'thread-management',
 }
--- a/web-app/src/constants/routes.ts
+++ b/web-app/src/constants/routes.ts
@ -3,6 +3,8 @@ export const route = {
  home: '/',
  appLogs: '/logs',
  assistant: '/assistant',
+  project: '/project',
+  projectDetail: '/project/$projectId',
  settings: {
    index: '/settings',
    model_providers: '/settings/providers',
--- a/web-app/src/containers/ChatInput.tsx
+++ b/web-app/src/containers/ChatInput.tsx
@ -4,6 +4,7 @@ import TextareaAutosize from 'react-textarea-autosize'
 import { cn } from '@/lib/utils'
 import { usePrompt } from '@/hooks/usePrompt'
 import { useThreads } from '@/hooks/useThreads'
+import { useThreadManagement } from '@/hooks/useThreadManagement'
 import { useCallback, useEffect, useRef, useState } from 'react'
 import { Button } from '@/components/ui/button'
 import {
@ -43,9 +44,15 @@ type ChatInputProps = {
  showSpeedToken?: boolean
  model?: ThreadModel
  initialMessage?: boolean
+  projectId?: string
 }

-const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
+const ChatInput = ({
+  model,
+  className,
+  initialMessage,
+  projectId,
+}: ChatInputProps) => {
  const textareaRef = useRef<HTMLTextAreaElement>(null)
  const [isFocused, setIsFocused] = useState(false)
  const [rows, setRows] = useState(1)
@ -58,6 +65,8 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
  const prompt = usePrompt((state) => state.prompt)
  const setPrompt = usePrompt((state) => state.setPrompt)
  const currentThreadId = useThreads((state) => state.currentThreadId)
+  const updateThread = useThreads((state) => state.updateThread)
+  const { getFolderById } = useThreadManagement()
  const { t } = useTranslation()
  const spellCheckChatInput = useGeneralSetting(
    (state) => state.spellCheckChatInput
@ -177,6 +186,28 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
      uploadedFiles.length > 0 ? uploadedFiles : undefined
    )
    setUploadedFiles([])
+
+    // Handle project assignment for new threads
+    if (projectId && !currentThreadId) {
+      const project = getFolderById(projectId)
+      if (project) {
+        // Use setTimeout to ensure the thread is created first
+        setTimeout(() => {
+          const newCurrentThreadId = useThreads.getState().currentThreadId
+          if (newCurrentThreadId) {
+            updateThread(newCurrentThreadId, {
+              metadata: {
+                project: {
+                  id: project.id,
+                  name: project.name,
+                  updated_at: project.updated_at,
+                },
+              },
+            })
+          }
+        }, 100)
+      }
+    }
  }

  useEffect(() => {
--- a/web-app/src/containers/DownloadButton.tsx
+++ b/web-app/src/containers/DownloadButton.tsx
@ -0,0 +1,142 @@
+import { Button } from '@/components/ui/button'
+import { Progress } from '@/components/ui/progress'
+import { useDownloadStore } from '@/hooks/useDownloadStore'
+import { useGeneralSetting } from '@/hooks/useGeneralSetting'
+import { useModelProvider } from '@/hooks/useModelProvider'
+import { useServiceHub } from '@/hooks/useServiceHub'
+import { useTranslation } from '@/i18n'
+import { extractModelName } from '@/lib/models'
+import { cn, sanitizeModelId } from '@/lib/utils'
+import { CatalogModel } from '@/services/models/types'
+import { useCallback, useMemo } from 'react'
+import { useShallow } from 'zustand/shallow'
+
+type ModelProps = {
+  model: CatalogModel
+  handleUseModel: (modelId: string) => void
+}
+const defaultModelQuantizations = ['iq4_xs', 'q4_k_m']
+
+export function DownloadButtonPlaceholder({
+  model,
+  handleUseModel,
+}: ModelProps) {
+  const { downloads, localDownloadingModels, addLocalDownloadingModel } =
+    useDownloadStore(
+      useShallow((state) => ({
+        downloads: state.downloads,
+        localDownloadingModels: state.localDownloadingModels,
+        addLocalDownloadingModel: state.addLocalDownloadingModel,
+      }))
+    )
+  const { t } = useTranslation()
+  const getProviderByName = useModelProvider((state) => state.getProviderByName)
+  const llamaProvider = getProviderByName('llamacpp')
+
+  const serviceHub = useServiceHub()
+  const huggingfaceToken = useGeneralSetting((state) => state.huggingfaceToken)
+
+  const quant =
+    model.quants.find((e) =>
+      defaultModelQuantizations.some((m) =>
+        e.model_id.toLowerCase().includes(m)
+      )
+    ) ?? model.quants[0]
+
+  const modelId = quant?.model_id || model.model_name
+
+  const downloadProcesses = useMemo(
+    () =>
+      Object.values(downloads).map((download) => ({
+        id: download.name,
+        name: download.name,
+        progress: download.progress,
+        current: download.current,
+        total: download.total,
+      })),
+    [downloads]
+  )
+
+  const isRecommendedModel = useCallback((modelId: string) => {
+    return (extractModelName(modelId)?.toLowerCase() ===
+      'jan-nano-gguf') as boolean
+  }, [])
+
+  if (model.quants.length === 0) {
+    return (
+      <div className="flex items-center gap-2">
+        <Button
+          size="sm"
+          onClick={() => {
+            window.open(`https://huggingface.co/${model.model_name}`, '_blank')
+          }}
+        >
+          View on HuggingFace
+        </Button>
+      </div>
+    )
+  }
+
+  const modelUrl = quant?.path || modelId
+  const isDownloading =
+    localDownloadingModels.has(modelId) ||
+    downloadProcesses.some((e) => e.id === modelId)
+
+  const downloadProgress =
+    downloadProcesses.find((e) => e.id === modelId)?.progress || 0
+  const isDownloaded = llamaProvider?.models.some(
+    (m: { id: string }) =>
+      m.id === modelId ||
+      m.id === `${model.developer}/${sanitizeModelId(modelId)}`
+  )
+  const isRecommended = isRecommendedModel(model.model_name)
+
+  const handleDownload = () => {
+    // Immediately set local downloading state
+    addLocalDownloadingModel(modelId)
+    const mmprojPath = (
+      model.mmproj_models?.find(
+        (e) => e.model_id.toLowerCase() === 'mmproj-f16'
+      ) || model.mmproj_models?.[0]
+    )?.path
+    serviceHub
+      .models()
+      .pullModelWithMetadata(modelId, modelUrl, mmprojPath, huggingfaceToken)
+  }
+
+  return (
+    <div
+      className={cn(
+        'flex items-center',
+        isRecommended && 'hub-download-button-step'
+      )}
+    >
+      {isDownloading && !isDownloaded && (
+        <div className={cn('flex items-center gap-2 w-20')}>
+          <Progress value={downloadProgress * 100} />
+          <span className="text-xs text-center text-main-view-fg/70">
+            {Math.round(downloadProgress * 100)}%
+          </span>
+        </div>
+      )}
+      {isDownloaded ? (
+        <Button
+          size="sm"
+          onClick={() => handleUseModel(modelId)}
+          data-test-id={`hub-model-${modelId}`}
+        >
+          {t('hub:use')}
+        </Button>
+      ) : (
+        <Button
+          data-test-id={`hub-model-${modelId}`}
+          size="sm"
+          onClick={handleDownload}
+          className={cn(isDownloading && 'hidden')}
+        >
+          {t('hub:download')}
+        </Button>
+      )}
+    </div>
+  )
+}
--- a/web-app/src/containers/DownloadManegement.tsx
+++ b/web-app/src/containers/DownloadManegement.tsx
@ -400,20 +400,33 @@ export function DownloadManagement() {
                          className="text-main-view-fg/70 cursor-pointer"
                          title="Cancel download"
                          onClick={() => {
-                            serviceHub.models().abortDownload(download.name).then(() => {
-                              toast.info(
-                                t('common:toast.downloadCancelled.title'),
-                                {
-                                  id: 'cancel-download',
-                                  description: t(
-                                    'common:toast.downloadCancelled.description'
-                                  ),
-                                }
-                              )
-                              if (downloadProcesses.length === 0) {
-                                setIsPopoverOpen(false)
-                              }
-                            })
+                            // TODO: Consolidate cancellation logic
+                            if (download.id.startsWith('llamacpp')) {
+                              const downloadManager =
+                                window.core.extensionManager.getByName(
+                                  '@janhq/download-extension'
+                                )
+
+                              downloadManager.cancelDownload(download.id)
+                            } else {
+                              serviceHub
+                                .models()
+                                .abortDownload(download.name)
+                                .then(() => {
+                                  toast.info(
+                                    t('common:toast.downloadCancelled.title'),
+                                    {
+                                      id: 'cancel-download',
+                                      description: t(
+                                        'common:toast.downloadCancelled.description'
+                                      ),
+                                    }
+                                  )
+                                  if (downloadProcesses.length === 0) {
+                                    setIsPopoverOpen(false)
+                                  }
+                                })
+                            }
                          }}
                        />
                      </div>
--- a/web-app/src/containers/LeftPanel.tsx
+++ b/web-app/src/containers/LeftPanel.tsx
@ -4,14 +4,18 @@ import { cn } from '@/lib/utils'
 import {
  IconLayoutSidebar,
  IconDots,
-  IconCirclePlusFilled,
-  IconSettingsFilled,
+  IconCirclePlus,
+  IconSettings,
  IconStar,
-  IconMessageFilled,
-  IconAppsFilled,
+  IconFolderPlus,
+  IconMessage,
+  IconApps,
  IconX,
  IconSearch,
-  IconClipboardSmileFilled,
+  IconClipboardSmile,
+  IconFolder,
+  IconPencil,
+  IconTrash,
 } from '@tabler/icons-react'
 import { route } from '@/constants/routes'
 import ThreadList from './ThreadList'
@ -28,6 +32,7 @@ import { UserProfileMenu } from '@/containers/auth/UserProfileMenu'
 import { useAuth } from '@/hooks/useAuth'

 import { useThreads } from '@/hooks/useThreads'
+import { useThreadManagement } from '@/hooks/useThreadManagement'

 import { useTranslation } from '@/i18n/react-i18next-compat'
 import { useMemo, useState, useEffect, useRef } from 'react'
@ -35,40 +40,42 @@ import { toast } from 'sonner'
 import { DownloadManagement } from '@/containers/DownloadManegement'
 import { useSmallScreen } from '@/hooks/useMediaQuery'
 import { useClickOutside } from '@/hooks/useClickOutside'
-import { useDownloadStore } from '@/hooks/useDownloadStore'
+
 import { DeleteAllThreadsDialog } from '@/containers/dialogs'
+import AddProjectDialog from '@/containers/dialogs/AddProjectDialog'
+import { DeleteProjectDialog } from '@/containers/dialogs/DeleteProjectDialog'

 const mainMenus = [
  {
    title: 'common:newChat',
-    icon: IconCirclePlusFilled,
+    icon: IconCirclePlus,
    route: route.home,
    isEnabled: true,
  },
+  {
+    title: 'common:projects.title',
+    icon: IconFolderPlus,
+    route: route.project,
+    isEnabled: true,
+  },
  {
    title: 'common:assistants',
-    icon: IconClipboardSmileFilled,
+    icon: IconClipboardSmile,
    route: route.assistant,
    isEnabled: PlatformFeatures[PlatformFeature.ASSISTANTS],
  },
  {
    title: 'common:hub',
-    icon: IconAppsFilled,
+    icon: IconApps,
    route: route.hub.index,
    isEnabled: PlatformFeatures[PlatformFeature.MODEL_HUB],
  },
  {
    title: 'common:settings',
-    icon: IconSettingsFilled,
+    icon: IconSettings,
    route: route.settings.general,
    isEnabled: true,
  },
-  {
-    title: 'common:authentication',
-    icon: null,
-    route: null,
-    isEnabled: PlatformFeatures[PlatformFeature.AUTHENTICATION],
-  },
 ]

 const LeftPanel = () => {
@ -122,7 +129,7 @@ const LeftPanel = () => {
      ) {
        if (currentIsSmallScreen && open) {
          setLeftPanel(false)
-        } else if(!open) {
+        } else if (!open) {
          setLeftPanel(true)
        }
        prevScreenSizeRef.current = currentIsSmallScreen
@ -152,20 +159,65 @@ const LeftPanel = () => {
  const getFilteredThreads = useThreads((state) => state.getFilteredThreads)
  const threads = useThreads((state) => state.threads)

+  const { folders, addFolder, updateFolder, deleteFolder, getFolderById } =
+    useThreadManagement()
+
+  // Project dialog states
+  const [projectDialogOpen, setProjectDialogOpen] = useState(false)
+  const [editingProjectKey, setEditingProjectKey] = useState<string | null>(
+    null
+  )
+  const [deleteProjectConfirmOpen, setDeleteProjectConfirmOpen] =
+    useState(false)
+  const [deletingProjectId, setDeletingProjectId] = useState<string | null>(
+    null
+  )
+
  const filteredThreads = useMemo(() => {
    return getFilteredThreads(searchTerm)
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [getFilteredThreads, searchTerm, threads])

+  const filteredProjects = useMemo(() => {
+    if (!searchTerm) return folders
+    return folders.filter((folder) =>
+      folder.name.toLowerCase().includes(searchTerm.toLowerCase())
+    )
+  }, [folders, searchTerm])
+
  // Memoize categorized threads based on filteredThreads
  const favoritedThreads = useMemo(() => {
    return filteredThreads.filter((t) => t.isFavorite)
  }, [filteredThreads])

  const unFavoritedThreads = useMemo(() => {
-    return filteredThreads.filter((t) => !t.isFavorite)
+    return filteredThreads.filter((t) => !t.isFavorite && !t.metadata?.project)
  }, [filteredThreads])

+  // Project handlers
+  const handleProjectDelete = (id: string) => {
+    setDeletingProjectId(id)
+    setDeleteProjectConfirmOpen(true)
+  }
+
+  const confirmProjectDelete = () => {
+    if (deletingProjectId) {
+      deleteFolder(deletingProjectId)
+      setDeleteProjectConfirmOpen(false)
+      setDeletingProjectId(null)
+    }
+  }
+
+  const handleProjectSave = (name: string) => {
+    if (editingProjectKey) {
+      updateFolder(editingProjectKey, name)
+    } else {
+      addFolder(name)
+    }
+    setProjectDialogOpen(false)
+    setEditingProjectKey(null)
+  }
+
  // Disable body scroll when panel is open on small screens
  useEffect(() => {
    if (isSmallScreen && open) {
@ -179,8 +231,6 @@ const LeftPanel = () => {
    }
  }, [isSmallScreen, open])

-  const { downloads, localDownloadingModels } = useDownloadStore()
-
  return (
    <>
      {/* Backdrop overlay for small screens */}
@ -262,22 +312,12 @@ const LeftPanel = () => {
          )}
        </div>

-        <div className="flex flex-col justify-between overflow-hidden mt-0 !h-[calc(100%-42px)]">
-          <div
-            className={cn(
-              'flex flex-col',
-              Object.keys(downloads).length > 0 || localDownloadingModels.size > 0
-                ? 'h-[calc(100%-200px)]'
-                : 'h-[calc(100%-140px)]'
-            )}
-          >
+        <div className="flex flex-col gap-y-1 overflow-hidden mt-0 !h-[calc(100%-42px)]">
+          <div className="space-y-1 py-1">
            {IS_MACOS && (
              <div
                ref={searchContainerMacRef}
-                className={cn(
-                  'relative mb-4 mt-1',
-                  isResizableContext ? 'mx-2' : 'mx-1'
-                )}
+                className={cn('relative mb-2 mt-1 mx-1')}
                data-ignore-outside-clicks
              >
                <IconSearch className="absolute size-4 top-1/2 left-2 -translate-y-1/2 text-left-panel-fg/50" />
@ -303,7 +343,151 @@ const LeftPanel = () => {
                )}
              </div>
            )}
-            <div className="flex flex-col w-full overflow-y-auto overflow-x-hidden">
+
+            {mainMenus.map((menu) => {
+              if (!menu.isEnabled) {
+                return null
+              }
+
+              // Handle authentication menu specially
+              if (menu.title === 'common:authentication') {
+                return (
+                  <div key={menu.title}>
+                    <div className="mx-1 my-2 border-t border-left-panel-fg/5" />
+                    {isAuthenticated ? (
+                      <UserProfileMenu />
+                    ) : (
+                      <AuthLoginButton />
+                    )}
+                  </div>
+                )
+              }
+
+              // Regular menu items must have route and icon
+              if (!menu.route || !menu.icon) return null
+
+              const isActive = (() => {
+                // Settings routes
+                if (menu.route.includes(route.settings.index)) {
+                  return currentPath.includes(route.settings.index)
+                }
+
+                // Default exact match for other routes
+                return currentPath === menu.route
+              })()
+              return (
+                <Link
+                  key={menu.title}
+                  to={menu.route}
+                  onClick={() => isSmallScreen && setLeftPanel(false)}
+                  data-test-id={`menu-${menu.title}`}
+                  activeOptions={{ exact: true }}
+                  className={cn(
+                    'flex items-center gap-1.5 cursor-pointer hover:bg-left-panel-fg/10 py-1 px-1 rounded',
+                    isActive && 'bg-left-panel-fg/10'
+                  )}
+                >
+                  <menu.icon size={18} className="text-left-panel-fg/70" />
+                  <span className="font-medium text-left-panel-fg/90">
+                    {t(menu.title)}
+                  </span>
+                </Link>
+              )
+            })}
+          </div>
+
+          {filteredProjects.length > 0 && (
+            <div className="space-y-1 py-1">
+              <div className="flex items-center justify-between mb-2">
+                <span className="block text-xs text-left-panel-fg/50 px-1 font-semibold">
+                  {t('common:projects.title')}
+                </span>
+              </div>
+              <div className="flex flex-col max-h-[140px] overflow-y-scroll">
+                {filteredProjects
+                  .slice()
+                  .sort((a, b) => b.updated_at - a.updated_at)
+                  .map((folder) => {
+                    const ProjectItem = () => {
+                      const [openDropdown, setOpenDropdown] = useState(false)
+                      const isProjectActive =
+                        currentPath === `/project/${folder.id}`
+
+                      return (
+                        <div key={folder.id} className="mb-1">
+                          <div
+                            className={cn(
+                              'rounded hover:bg-left-panel-fg/10 flex items-center justify-between gap-2 px-1.5 group/project-list transition-all cursor-pointer',
+                              isProjectActive && 'bg-left-panel-fg/10'
+                            )}
+                          >
+                            <Link
+                              to="/project/$projectId"
+                              params={{ projectId: folder.id }}
+                              onClick={() =>
+                                isSmallScreen && setLeftPanel(false)
+                              }
+                              className="py-1 pr-2 truncate flex items-center gap-2 flex-1"
+                            >
+                              <IconFolder
+                                size={16}
+                                className="text-left-panel-fg/70 shrink-0"
+                              />
+                              <span className="text-sm text-left-panel-fg/90 truncate">
+                                {folder.name}
+                              </span>
+                            </Link>
+                            <div className="flex items-center">
+                              <DropdownMenu
+                                open={openDropdown}
+                                onOpenChange={(open) => setOpenDropdown(open)}
+                              >
+                                <DropdownMenuTrigger asChild>
+                                  <IconDots
+                                    size={14}
+                                    className="text-left-panel-fg/60 shrink-0 cursor-pointer px-0.5 -mr-1 data-[state=open]:bg-left-panel-fg/10 rounded group-hover/project-list:data-[state=closed]:size-5 size-5 data-[state=closed]:size-0"
+                                    onClick={(e) => {
+                                      e.preventDefault()
+                                      e.stopPropagation()
+                                    }}
+                                  />
+                                </DropdownMenuTrigger>
+                                <DropdownMenuContent side="bottom" align="end">
+                                  <DropdownMenuItem
+                                    onClick={(e) => {
+                                      e.stopPropagation()
+                                      setEditingProjectKey(folder.id)
+                                      setProjectDialogOpen(true)
+                                    }}
+                                  >
+                                    <IconPencil size={16} />
+                                    <span>Edit</span>
+                                  </DropdownMenuItem>
+                                  <DropdownMenuItem
+                                    onClick={(e) => {
+                                      e.stopPropagation()
+                                      handleProjectDelete(folder.id)
+                                    }}
+                                  >
+                                    <IconTrash size={16} />
+                                    <span>Delete</span>
+                                  </DropdownMenuItem>
+                                </DropdownMenuContent>
+                              </DropdownMenu>
+                            </div>
+                          </div>
+                        </div>
+                      )
+                    }
+
+                    return <ProjectItem key={folder.id} />
+                  })}
+              </div>
+            </div>
+          )}
+
+          <div className="flex flex-col h-full overflow-y-scroll w-[calc(100%+6px)]">
+            <div className="flex flex-col w-full h-full overflow-y-auto overflow-x-hidden">
              <div className="h-full w-full overflow-y-auto">
                {favoritedThreads.length > 0 && (
                  <>
@ -379,7 +563,9 @@ const LeftPanel = () => {
                          </button>
                        </DropdownMenuTrigger>
                        <DropdownMenuContent side="bottom" align="end">
-                          <DeleteAllThreadsDialog onDeleteAll={deleteAllThreads} />
+                          <DeleteAllThreadsDialog
+                            onDeleteAll={deleteAllThreads}
+                          />
                        </DropdownMenuContent>
                      </DropdownMenu>
                    </div>
@ -404,7 +590,7 @@ const LeftPanel = () => {
                  <>
                    <div className="px-1 mt-2">
                      <div className="flex items-center gap-1 text-left-panel-fg/80">
-                        <IconMessageFilled size={18} />
+                        <IconMessage size={18} />
                        <h6 className="font-medium text-base">
                          {t('common:noThreadsYet')}
                        </h6>
@ -421,58 +607,38 @@ const LeftPanel = () => {
                </div>
              </div>
            </div>
-          </div>
+            {PlatformFeatures[PlatformFeature.AUTHENTICATION] && (
+              <div className="space-y-1 shrink-0 py-1">
+                <div>
+                  <div className="mx-1 my-2 border-t border-left-panel-fg/5" />
+                  {isAuthenticated ? <UserProfileMenu /> : <AuthLoginButton />}
+                </div>
+              </div>
+            )}

-          <div className="space-y-1 shrink-0 py-1 mt-2">
-            {mainMenus.map((menu) => {
-              if (!menu.isEnabled) {
-                return null
-              }
-
-              // Handle authentication menu specially
-              if (menu.title === 'common:authentication') {
-                return (
-                  <div key={menu.title}>
-                    <div className="mx-1 my-2 border-t border-left-panel-fg/5" />
-                    {isAuthenticated ? (
-                      <UserProfileMenu />
-                    ) : (
-                      <AuthLoginButton />
-                    )}
-                  </div>
-                )
-              }
-
-              // Regular menu items must have route and icon
-              if (!menu.route || !menu.icon) return null
-
-              const isActive =
-                currentPath.includes(route.settings.index) &&
-                menu.route.includes(route.settings.index)
-              return (
-                <Link
-                  key={menu.title}
-                  to={menu.route}
-                  onClick={() => isSmallScreen && setLeftPanel(false)}
-                  data-test-id={`menu-${menu.title}`}
-                  className={cn(
-                    'flex items-center gap-1.5 cursor-pointer hover:bg-left-panel-fg/10 py-1 px-1 rounded',
-                    isActive
-                      ? 'bg-left-panel-fg/10'
-                      : '[&.active]:bg-left-panel-fg/10'
-                  )}
-                >
-                  <menu.icon size={18} className="text-left-panel-fg/70" />
-                  <span className="font-medium text-left-panel-fg/90">
-                    {t(menu.title)}
-                  </span>
-                </Link>
-              )
-            })}
            <DownloadManagement />
          </div>
        </div>
      </aside>
+
+      {/* Project Dialogs */}
+      <AddProjectDialog
+        open={projectDialogOpen}
+        onOpenChange={setProjectDialogOpen}
+        editingKey={editingProjectKey}
+        initialData={
+          editingProjectKey ? getFolderById(editingProjectKey) : undefined
+        }
+        onSave={handleProjectSave}
+      />
+      <DeleteProjectDialog
+        open={deleteProjectConfirmOpen}
+        onOpenChange={setDeleteProjectConfirmOpen}
+        onConfirm={confirmProjectDelete}
+        projectName={
+          deletingProjectId ? getFolderById(deletingProjectId)?.name : undefined
+        }
+      />
    </>
  )
 }
--- a/Show More
+++ b/Show More