Merge remote-tracking branch 'origin/dev' into mobile/dev

# Conflicts: # web-app/src/routeTree.gen.ts # web-app/src/routes/index.tsx
2025-09-26 11:09:50 +07:00 · 2025-09-26 11:09:50 +07:00 · a0aa0074f4
commit a0aa0074f4
parent c4228855b5 75396dbd06
86 changed files with 4312 additions and 1359 deletions
--- a/.github/workflows/template-tauri-build-linux-x64-external.yml
+++ b/.github/workflows/template-tauri-build-linux-x64-external.yml
@ -79,8 +79,33 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json

-          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
+          
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
+
          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/Cargo.toml---------"
+          cat ./src-tauri/Cargo.toml
+
+          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"

          if [ "${{ inputs.channel }}" != "stable" ]; then
            jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
--- a/.github/workflows/template-tauri-build-linux-x64-flatpak.yml
+++ b/.github/workflows/template-tauri-build-linux-x64-flatpak.yml
@ -100,13 +100,36 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json

-          # Temporarily enable devtool on prod build
-          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
-          cat ./src-tauri/Cargo.toml
+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
+          
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml

          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/Cargo.toml---------"
          cat ./src-tauri/Cargo.toml

+          # Temporarily enable devtool on prod build
+          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
+          cat ./src-tauri/Cargo.toml  
+
          # Change app name for beta and nightly builds
          if [ "${{ inputs.channel }}" != "stable" ]; then
            jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
--- a/.github/workflows/template-tauri-build-linux-x64.yml
+++ b/.github/workflows/template-tauri-build-linux-x64.yml
@ -53,7 +53,7 @@ on:
        value: ${{ jobs.build-linux-x64.outputs.APPIMAGE_FILE_NAME }}
 jobs:
  build-linux-x64:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    outputs:
      DEB_SIG: ${{ steps.packageinfo.outputs.DEB_SIG }}
      APPIMAGE_SIG: ${{ steps.packageinfo.outputs.APPIMAGE_SIG }}
@ -117,11 +117,34 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json

-          # Temporarily enable devtool on prod build
-          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
-          cat ./src-tauri/Cargo.toml
+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
+          
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml

          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/Cargo.toml---------"
+          cat ./src-tauri/Cargo.toml
+
+          # Temporarily enable devtool on prod build
+          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
          cat ./src-tauri/Cargo.toml

          # Change app name for beta and nightly builds
--- a/.github/workflows/template-tauri-build-macos-external.yml
+++ b/.github/workflows/template-tauri-build-macos-external.yml
@ -42,31 +42,6 @@ jobs:
        run: |
          cargo install ctoml

-      - name: Create bun and uv universal
-        run: |
-          mkdir -p ./src-tauri/resources/bin/
-          cd ./src-tauri/resources/bin/
-          curl -L -o bun-darwin-x64.zip https://github.com/oven-sh/bun/releases/download/bun-v1.2.10/bun-darwin-x64.zip
-          curl -L -o bun-darwin-aarch64.zip https://github.com/oven-sh/bun/releases/download/bun-v1.2.10/bun-darwin-aarch64.zip
-          unzip bun-darwin-x64.zip
-          unzip bun-darwin-aarch64.zip
-          lipo -create -output bun-universal-apple-darwin bun-darwin-x64/bun bun-darwin-aarch64/bun
-          cp -f bun-darwin-aarch64/bun bun-aarch64-apple-darwin 
-          cp -f bun-darwin-x64/bun bun-x86_64-apple-darwin
-          cp -f bun-universal-apple-darwin bun
-
-          curl -L -o uv-x86_64.tar.gz https://github.com/astral-sh/uv/releases/download/0.6.17/uv-x86_64-apple-darwin.tar.gz
-          curl -L -o uv-arm64.tar.gz https://github.com/astral-sh/uv/releases/download/0.6.17/uv-aarch64-apple-darwin.tar.gz
-          tar -xzf uv-x86_64.tar.gz
-          tar -xzf uv-arm64.tar.gz
-          mv uv-x86_64-apple-darwin uv-x86_64
-          mv uv-aarch64-apple-darwin uv-aarch64
-          lipo -create -output uv-universal-apple-darwin uv-x86_64/uv uv-aarch64/uv
-          cp -f uv-x86_64/uv uv-x86_64-apple-darwin
-          cp -f uv-aarch64/uv uv-aarch64-apple-darwin
-          cp -f uv-universal-apple-darwin uv
-          ls -la
-
      - name: Update app version
        run: |
          echo "Version: ${{ inputs.new_version }}"
@ -74,8 +49,35 @@ jobs:
          mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json
+          
+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
+          
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
+
          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/Cargo.toml---------"
+          cat ./src-tauri/Cargo.toml
+          
          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
+          
          if [ "${{ inputs.channel }}" != "stable" ]; then
            jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
            mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
--- a/.github/workflows/template-tauri-build-macos.yml
+++ b/.github/workflows/template-tauri-build-macos.yml
@ -101,7 +101,30 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json

+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
+          
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
+
          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/Cargo.toml---------"
          cat ./src-tauri/Cargo.toml

          # Temporarily enable devtool on prod build
--- a/.github/workflows/template-tauri-build-windows-x64-external.yml
+++ b/.github/workflows/template-tauri-build-windows-x64-external.yml
@ -54,9 +54,32 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json

+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
+          
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
+
          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
-          echo "---------Cargo.toml---------"
+          echo "---------./src-tauri/Cargo.toml---------"
          cat ./src-tauri/Cargo.toml
+
          if [ "${{ inputs.channel }}" != "stable" ]; then
            jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
            mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
--- a/.github/workflows/template-tauri-build-windows-x64.yml
+++ b/.github/workflows/template-tauri-build-windows-x64.yml
@ -97,9 +97,31 @@ jobs:
          mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
          mv /tmp/package.json web-app/package.json
+
+          # Update tauri plugin versions
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
          
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
+
+          jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
+          mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
+
+          ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
+          
+          ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
+          echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
+          cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
+
          ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
-          echo "---------Cargo.toml---------"
+          echo "---------./src-tauri/Cargo.toml---------"
          cat ./src-tauri/Cargo.toml

          # Add sign commands to tauri.windows.conf.json
--- a/3
+++ b/3
@ -72,6 +72,9 @@ lint: install-and-build
 test: lint
 	yarn download:bin
 	yarn download:lib
+ifeq ($(OS),Windows_NT)
+	yarn download:windows-installer
+endif
 	yarn test
 	yarn copy:assets:tauri
 	yarn build:icon
--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@ -240,6 +240,12 @@ export abstract class AIEngine extends BaseExtension {
    EngineManager.instance().register(this)
  }

+  /**
+   * Gets model info
+   * @param modelId
+   */
+  abstract get(modelId: string): Promise<modelInfo | undefined>
+
  /**
   * Lists available models
   */
@ -283,11 +289,6 @@ export abstract class AIEngine extends BaseExtension {
   */
  abstract getLoadedModels(): Promise<string[]>

-  /**
-   * Optional method to get the underlying chat client
-   */
-  getChatClient?(sessionId: string): any
-
  /**
   * Check if a tool is supported by the model
   * @param modelId
--- a/docs/_redirects
+++ b/docs/_redirects
@ -0,0 +1,79 @@
+/about                                          /handbook  302
+/about/                                         /handbook  302
+/about/community                                /handbook  302
+/about/handbook                                 /handbook  302
+/about/handbook/analytics                       /handbook  302
+/about/handbook/project-management              /handbook  302
+/about/handbook/strategy                        /handbook  302
+/about/handbook/website-docs                    /handbook  302
+/about/how-we-work/product-design               /handbook  302
+/about/how-we-work/strategy                     /handbook  302
+/about/investors                                /handbook  302
+/about/roadmap                                  /handbook  302
+/about/team                                     /handbook  302
+/about/vision                                   /handbook  302
+/about/wall-of-love                             /handbook  302
+/handbook/contributing-to-jan/                  /handbook 302
+/handbook/core-contributors/how-we-hire/        /handbook 302
+/handbook/engineering-excellence/              /handbook 302
+/handbook/engineering/                          /handbook 302
+/handbook/product-and-community/                /handbook 302
+/handbook/products-and-innovations/             /handbook 302
+/handbook/what-we-do/our-approach-to-design/    /handbook 302
+/how-we-work/product-design                     /handbook 302    
+/handbook/product-and-community/approaches-to-beta-testing-and-user-engagement/      /handbook 302
+/cortex/assistants                              /docs/ 302
+/cortex/build-extension                         /docs/ 302
+/cortex/built-in/tensorrt-llm                   /docs/ 302
+/cortex/cli/kill                                /docs/ 302
+/cortex/command-line                            /docs/ 302
+/cortex/cortex-openvino                         /docs/ 302
+/cortex/cortex-python                           /docs/ 302
+/cortex/cortex-tensorrt-llm                     /docs/ 302
+/cortex/desktop-installation/linux              /docs/ 302
+/cortex/embeddings                              /docs/ 302
+/cortex/ext-architecture                        /docs/ 302
+/cortex/fine-tuning                             /docs/ 302
+/cortex/fine-tuning/overview                    /docs/ 302
+/cortex/function-calling                        /docs/ 302
+/cortex/installation/linux                      /docs/ 302
+/cortex/installation/mac                        /docs/ 302
+/cortex/model-operations                        /docs/ 302
+/cortex/model-operations/overview               /docs/ 302
+/cortex/rag/overview                            /docs/ 302
+/cortex/server                                  /docs/ 302
+/docs/tools/retrieval                           /docs/ 302
+/developer/framework/engineering/chats          /docs/ 302
+/developer/framework/engineering/threads/       /docs/ 302
+/developer/framework/product/chat               /docs/ 302
+/docs/extensions                                /docs/ 302
+/docs/shortcuts                                 /docs/ 302
+/docs/models                                    /docs/ 302
+/integrations/function-calling/interpreter      /docs/ 302
+/docs/built-in/llama-cpp                        /docs/desktop/llama-cpp 302
+/docs/install-engines                           /docs/desktop/llama-cpp 302
+/docs/local-api                                 /docs/desktop/api-server 302
+/docs/local-engines/llama-cpp                   /docs/desktop/llama-cpp 302
+/docs/models/manage-models                      /docs/desktop/manage-models 302
+/cortex/vision                                  /handbook/open-superintelligence 302
+/docs/models/model-parameters                   /docs/desktop/model-parameters 302
+/docs/remote-models/generic-openai              /docs/desktop/remote-models/openai 302
+/docs/threads                                   /changelog/2024-01-16-settings-options-right-panel 302 
+/guides/integrations/continue/                  /docs/desktop/server-examples/continue-dev 302
+/continue-dev                                   /docs/desktop/server-examples/continue-dev 302
+/integrations                                   /docs/desktop/server-examples/continue-dev 302
+/integrations/coding/continue-dev               /docs/desktop/server-examples/continue-dev 302
+/integrations/continue/                         /docs/desktop/server-examples/continue-dev 302
+/integrations/coding/tabby                      /docs/desktop/server-examples/tabby 302
+/integrations/messaging/llmcord                 /docs/desktop/server-examples/llmcord 302
+/integrations/workflow-automation/n8n           /docs/desktop/server-examples/n8n 302
+/local-server/continue-dev                      /docs/desktop/server-examples/continue-dev 302
+/local-server/data-folder                       /docs/desktop/desktop/data-folder 302
+/local-server/llama-cpp                         /docs/desktop/desktop/llama-cpp 302
+/local-server/n8n                               /docs/desktop/server-examples/n8n 302
+/local-server/settings                          /docs/desktop/server-settings 302
+/local-server/tabby                             /docs/desktop/server-examples/tabby 302
+/local-server/troubleshooting                   /docs/desktop/troubleshooting 302
+/mcp                                            /docs/desktop/mcp 302
+/quickstart                                     /docs/desktop/quickstart 302
+/server-examples/continue-dev                   /docs/desktop/server-examples/continue-dev 302
--- a/docs/src/components/FooterMenu/index.tsx
+++ b/docs/src/components/FooterMenu/index.tsx
@ -77,9 +77,9 @@ export default function Footer() {
  return (
    <footer className="py-4 w-full">
      <div className="mx-auto">
-        <div className="grid grid-cols-1 md:grid-cols-6 gap-8">
+        <div className="grid grid-cols-1 lg:grid-cols-6 gap-8">
          {/* Jan Logo and Newsletter */}
-          <div className="md:col-span-2">
+          <div className="md:col-span-1 lg:col-span-2">
            <h2 className="text-[52px] font-bold mb-6">Jan</h2>
            <div>
              <div className="flex items-center gap-2 mb-3">
@ -138,7 +138,7 @@ export default function Footer() {
          {/* Menu Columns */}
          {FOOTER_MENUS.map((menu) => (
            <div key={menu.title} className="">
-              <h3 className="text-lg mb-4 font-bold">{menu.title}</h3>
+              <h3 className="text-base mb-4 font-bold">{menu.title}</h3>
              <ul className="space-y-2">
                {menu.links.map((link) => (
                  <li key={link.name}>
--- a/docs/src/components/Home/index.tsx
+++ b/docs/src/components/Home/index.tsx
@ -124,7 +124,7 @@ const Home = () => {
                  </h1>
                </div>
                <p className="px-4 lg:px-0 mt-2 text-lg lg:text-2xl font-medium leading-relaxed text-white animate-fade-in-up delay-500 -tracking-[0.6px]">
-                  Jan is the open-source ChatGPT replacement.
+                  The best of open-source AI in an easy-to-use product.
                </p>
              </div>
              <div className="flex px-4 flex-col lg:flex-row items-center gap-4 w-full justify-center text-center animate-fade-in-up delay-600 mt-8 lg:mt-10">
--- a/docs/src/pages/post/_assets/clock-speed-effect.png
+++ b/docs/src/pages/post/_assets/clock-speed-effect.png
--- a/docs/src/pages/post/_assets/cover-kernel-benchmarking.png
+++ b/docs/src/pages/post/_assets/cover-kernel-benchmarking.png
--- a/docs/src/pages/post/_assets/exceed-sol.png
+++ b/docs/src/pages/post/_assets/exceed-sol.png
--- a/docs/src/pages/post/_assets/fixed-l2.png
+++ b/docs/src/pages/post/_assets/fixed-l2.png
--- a/docs/src/pages/post/_assets/ncu-bench.png
+++ b/docs/src/pages/post/_assets/ncu-bench.png
--- a/docs/src/pages/post/_assets/ncu-compare.png
+++ b/docs/src/pages/post/_assets/ncu-compare.png
--- a/docs/src/pages/post/_assets/small-timed-bug.png
+++ b/docs/src/pages/post/_assets/small-timed-bug.png
--- a/docs/src/pages/post/_assets/speechmatics-events.svg
+++ b/docs/src/pages/post/_assets/speechmatics-events.svg
--- a/docs/src/pages/post/_assets/speechmatics-too-fast.png
+++ b/docs/src/pages/post/_assets/speechmatics-too-fast.png
--- a/docs/src/pages/post/how-we-benchmark-kernels.mdx
+++ b/docs/src/pages/post/how-we-benchmark-kernels.mdx
@ -0,0 +1,268 @@
+---
+title: "How we (try to) benchmark GPU kernels accurately"
+description: "We present the process behind how we decided to benchmark GPU kernels and iteratively improved our benchmarking pipeline"
+tags: ""
+categories: research
+ogImage: "./_assets/cover-kernel-benchmarking.png"
+date: 2025-09-17
+---
+
+# How we (try to) benchmark GPU kernels accurately
+ 
+If you've read any other kernel benchmarking article before, then a lot of the information in this blogpost might be second-hand nature to you already. In fact, we would like to start off by thanking the blood, sweat and tears poured into many other kernel benchmarking guides made before us, which helped guide us in writing better benchmarking code, as well as in the creation of this blogpost.
+
+Over here at Menlo, we recently acquired some [RTX PRO 6000 Blackwell Workstation Edition](https://www.nvidia.com/en-sg/products/workstations/professional-desktop-gpus/rtx-pro-6000/), and we are trying to make LLM inference engines like [vLLM](https://github.com/vllm-project/vllm) run faster on it. We've been writing our own kernels specifically for the RTX PRO 6000, and seeing if we can improve inference times on our hardware.
+
+This blog will detail more of our own processes of how our ML Efficiency team identified problems in our benchmarking code, and how we iterated on it following the various cool benchmarking guides out there! Without further ado, let's start benchmarking from simple programs, to GPU kernels.
+
+
+## Introduction to kernels and benchmarking
+
+For those new to GPU programming, a [kernel](https://modal.com/gpu-glossary/device-software/kernel) is a piece of CUDA code that programmers write to execute a desired sequence of operations on the GPU. These kernels are launched once and is executed by threads that run concurrently, and we tend to launch these kernels from a [thread block grid](https://modal.com/gpu-glossary/device-software/thread-block-grid), which executes our kernel across multiple [Streaming Multiprocessors (SMs)](https://modal.com/gpu-glossary/device-hardware/streaming-multiprocessor) across the whole GPU.
+
+Benchmarking is a fundamental aspect of high-performance computing. It enables us to quantitatively compare kernel performance across different problem sizes and understand how various hyperparameters impact execution speed. For GPU kernel development, benchmarking serves to assist us in iteratively optimizing our kernels to make them utilize the GPU better. 
+
+That being said, **accurate kernel benchmarking** is much more important, as benchmarking kernels that run on the GPU can become very complex, and there are many traps one might fall into if not enough care is taken when writing benchmarking scripts. A great alternative is to use tools that NVIDIA offers via their [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit), such as [Nsight CUDA Profiling Tools Interface](https://developer.nvidia.com/cupti) (cupti) or use the [Nsight Compute CLI](https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html) (`ncu`), which provide accurate measurements to benchmarking various characteristics of kernels. For us, we wanted to use Python as it made it more convenient to sweep across different problem shapes and kernels quickly, but that meant we also had to learn how to properly benchmark kernels from scratch.
+
+We will showcase some examples of how we can benchmark kernels on the GPU. Moreover, we have chosen Python for most of our benchmarking code, as most of our own codebase is in Python, making it simple to integrate in.
+
+## Benchmarking CUDA programs
+Pytorch provides a very basic API to help time `torch` programs, by following along this [tutorial](https://docs.pytorch.org/tutorials/recipes/recipes/benchmark.html).
+
+We can see a basic implementation could be something as simple as:
+
+```python
+def batched_dot_mul_sum(a, b):
+    '''Computes batched dot by multiplying and summing'''
+    return a.mul(b).sum(-1)
+
+num_threads = torch.get_num_threads()
+print(f'Benchmarking on {num_threads} threads')
+
+t0 = benchmark.Timer(
+    stmt='batched_dot_mul_sum(x, x)',
+    setup='from __main__ import batched_dot_mul_sum',
+    globals={'x': x},
+    num_threads=num_threads,
+    label='Multithreaded batch dot',
+    sub_label='Implemented using mul and sum')
+```
+
+When benchmarking kernels, there are a few tips that we should do, in order to ensure that we accurately benchmark our kernels.
+
+### 1.  Always benchmark the code not with settings from your machine, but with **settings the user will see**.
+  Benchmarking how fast your kernels take on a 3090 is meaningless if you are serving your models on a H100 DGX node. It is always a good idea to benchmark your kernels on the hardware you plan to serve on.
+
+### 2. **Warmup your kernels** 
+Take a look at this snippet from the tutorial.
+  ```
+   mul_sum(x, x):   27.6 μs
+   mul_sum(x, x):   25.3 μs
+   bmm(x, x):      2775.5 μs
+   bmm(x, x):       22.4 μs
+  ```
+  The first kernel for `bmm` takes way longer to run. This is because most of the time is spent loading [cuBLAS](https://developer.nvidia.com/cublas) kernels when running for the first time. 
+
+  Warming up your kernels can be as simple as running the kernel before timing it. This helps to preliminarily load these kernels so that we only measure how long it takes for the kernel to run.
+
+### 3. `torch.cuda.synchronize` and CUDA Events
+
+Now we'll also introduce a new API, which is the standard to benchmarking kernels. [CUDA events](https://docs.pytorch.org/docs/stable/generated/torch.cuda.Event.html) are awesome for a variety of reasons. The simplest reason is that it measures timing from the perspective from the GPU, whereas `time.time()` and `time.perf_counter()` measure time from the perspective of the CPU.
+
+Moreover, its simplistic API allows you to call benchmarking code like this:
+
+```python
+steps = 10
+start_events = [torch.cuda.Event(enable_timing=True) for _ in range(steps)]
+end_events = [torch.cuda.Event(enable_timing=True) for _ in range(steps)]
+
+for i in range(steps):
+    start_events[i].record()
+    run_kernel()
+    end_events[i].record()
+
+torch.cuda.synchronize()
+times = [s.elapsed_time(e) for s, e in zip(start_events, end_events)]
+```
+
+The `torch.cuda.synchronize` tells the CPU to wait for the work on the GPU to finish, so that it can calculate the elapsed time after synchronization, which can be visualised here:
+
+![image](./_assets/speechmatics-events.svg)
+_Figure 1: Illustration taken from https://www.speechmatics.com/company/articles-and-news/timing-operations-in-pytorch_
+
+
+### 4. Flush your L2 Cache
+
+#### What is the L2 Cache
+When data is read or written from [HBM or GDDR](https://www.exxactcorp.com/blog/hpc/gddr6-vs-hbm-gpu-memory), it goes through the [L2 cache](https://docs.nvidia.com/cuda/cuda-c-programming-guide/#architecture) first, which is shared by all [streaming multiprocessors (SM)](https://modal.com/gpu-glossary/device-hardware/streaming-multiprocessor). This L2 cache caches data accesses to local and global memory, and helps reuse data instead of loading it to shared memory again (which can be slow!).
+
+Moreover, unlike the L1 cache, which is present on each SM, all SMs share the same L2 cache!
+
+
+#### Why we need to flush the L2 Cache
+
+Following this [guide](https://guillesanbri.com/CUDA-Benchmarks/#caches), if you had warmed up or run the kernel before, some of the intermediate data might be stored on the L2 cache, which means the kernel might be deceptively faster. 
+
+However, in a real world setting, you want to measure the time it takes realistically for the kernel to run, and more often than not, when running large models, you will be running more than 1 kernel. This means your cache will probably thrash often, and not have data from a specific kernel stored for reuse. Thus, to simulate this behaviour, we would flush the L2 cache beforehand to eliminate "help" from the L2 cache.
+
+Moreover, this also makes it much easier when calculating data reuse for the kernel, as now any L2 cache usage is independent of other kernels or runs.
+
+#### Example of not flushing L2 cache
+Previously when we were initially benchmarking our kernels, we had a small mistake of not flushing the L2 cache.
+
+![image](./_assets/exceed-sol.png)
+_Figure 2: Our SOL %  (which is a percentage of our observed maximum speed) is over 100% for the row for shape [2, 19456, 2560]._
+
+#### How to flush the L2 Cache
+
+To flush it, we should add the following lines:
+
+```python
+l2_size = torch.cuda.get_device_properties().L2_cache_size
+cache = torch.empty(l2_size, dtype=torch.uint8, device="cuda")
+#<your benchmarking code here>
+cache.zero_()  # flush L2 cache
+
+# You should flush your L2 cache within the benchmarking code if you're repeating the same process multiple times
+```
+
+This instantiates data the size of the L2 cache, and by zeroing it in place, we call a write operation which goes through the L2 cache and flushes it.
+
+After flushing the L2 cache, we get a more sensible result here:
+
+![image](./_assets/fixed-l2.png)
+_Figure 3: New SOL% has all values under 100% now after flushing L2 cache._
+
+### 5. Timing short-lived kernels
+
+Initially, we used [Triton's](https://triton-lang.org/main/getting-started/installation.html) [`do_bench`](https://triton-lang.org/main/python-api/generated/triton.testing.do_bench.html) for benchmarking, as it has done everything we have mentioned above, such as warmup, CUDA Events and flushing L2 cache. However, we observed an issue with accurately benchmarking our kernels on smaller shapes. On smaller shapes, the kernel might be too fast, so it may finish before CPU issues a CUDA end event in Python.
+
+![image](./_assets/speechmatics-too-fast.png)
+_Figure 4: Taken from [Speechmatics](https://www.speechmatics.com/company/articles-and-news/timing-operations-in-pytorch), kernel is faster than CUDA event end launch, and therefore the true timing for the kernel is not recorded._
+
+This results in kernels that look very slow:
+
+![image](./_assets/small-timed-bug.png)
+_Figure 5: Side by side comparison of Python benchmark latencies vs `ncu`'s timing (right) for shape [2, 19456,2560]. `ncu` records a much faster duration of 71.36 μs compared to Python's 103.9 μs_
+
+To fix this, we wrote a custom `do_bench_cuda()`, that inserts a dummy, untimed FP32 matmul before benchmarking each shape, so that the CPU has enough time to enqueue the CUDA end event.
+
+This led to more accurate latencies for our small M kernels.
+
+![image](./_assets/fixed-l2.png)
+_Figure 6: There is a significant improvement in SOL% after inserting the dummy matmul._
+
+We then also repeat the benchmark function for each shape on 5 copies of input/output data to make the CUDA event duration longer.
+
+In the end, this is our `do_bench_cuda` function that we used to benchmark our kernels.
+
+```python
+import statistics
+
+import torch
+
+def do_bench_cuda(f, n_warmup: int = 10, n_repeats: int = 20):
+    l2_size = torch.cuda.get_device_properties().L2_cache_size
+    cache = torch.empty(l2_size, dtype=torch.uint8, device="cuda")
+
+    # Matmul in case of short lived CUDA Events
+    A = torch.randn(4096, 4096, dtype=torch.float32, device="cuda")
+    B = torch.randn(4096, 4096, dtype=torch.float32, device="cuda")
+    A @ B
+
+    # L2 cache flush + Warmup
+    for _ in range(n_warmup):
+        cache.zero_()
+        f()
+
+    start_list = [torch.cuda.Event(enable_timing=True) for _ in range(n_repeats)]
+    end_list = [torch.cuda.Event(enable_timing=True) for _ in range(n_repeats)]
+
+    torch.cuda.synchronize()
+
+    for start, end in zip(start_list, end_list):
+        cache.zero_()  # flush L2 cache
+        A @ B  # add a heavy task to fill GPU pipeline
+
+        start.record()
+        f()
+        end.record()
+
+    torch.cuda.synchronize()
+    timings = [start.elapsed_time(end) for start, end in zip(start_list, end_list)]
+    return statistics.median(timings)
+```
+
+
+
+### 6. Clock Speed
+
+This was a silent problem, and it was very hard to find out that this was causing issues on our kernels. We initially found discrepancies between `ncu`'s latency (676.64 μs) and `do_bench_cuda` latency (535 μs) when profiling on the shape [2048, 19456, 2560], as `do_bench` was reporting ~140 μs faster timings than `ncu`'s latency.
+
+As seen, although most of our codebase for benchmarking kernels in Python, developer errors might occur, and it is always good to have a point of reference for the accurate kernel timing. Nsight Compute CLI (`ncu` for short) is a tool that can help us measure the latency of our kernels accurately, and the values obtained from here is a good figure to sanity-check our own benchmarking code. 
+
+#### 6.1 Clock Speed
+
+Firstly, we suspected that clock speed could play a part in causing the discrepancy between `ncu`'s timings and our own benchmarking code. Clock speed can affect benchmarking times as it is the rate at which the GPU's processing units operate, and a higher clock speed translates to more operations per second, which can both speed up and slow down the kernel depending on how it was implemented.
+
+![image](./_assets/clock-speed-effect.png)
+_Figure 7: Taken from [GPU Mode Lecture 56](https://www.youtube.com/watch?v=CtrqBmYtSEk). We can see clock speed affects kernel performance. For problem shape of 1024, it got faster after increasing clock speed, while for problem shape of 384, it became slower after clock speed increased._
+
+Looking at this [forum post](https://forums.developer.nvidia.com/t/nsight-compute-clock-speed-during-profiling/208646/3), we realised that one of the issues causing the discrepancy was because `ncu` by default locks the clock speed to the GPU base clock speed. We tried investigating by locking the clock speed to base clock speed, and also tried locking to max clock speed using `nvidia-smi -ac=<memClk>,<smClk>`. According to the GPU Mode lecture, this was not a proper solution.
+
+This is due to the following reasons:
+- Locking to max clock speed doesn't help as it just sets the ceiling of the GPU performance, our GPU can always go back to base clock speed of ~ 2287 Hz instead of boosted clock speed of 2617 Hz.
+
+- Locking to base clock speed is also not meaningful, as it does not properly reflect the performance and experience users will get on our kernels, which at best will run on boosted clock speed.
+
+However, we did find out that we should set the `ncu` `--clock-control` to `None`, so that it would not limit itself to just the base clock speeds. This helped improve the latency on `ncu` from 676.64 μs to 575 μs when profiled on the same problem shape  of [2048, 19456, 2560].
+
+#### 6.2 Discrepancies after `clock-control`
+
+At the time of writing, we have observed that `ncu` sometimes gives different latency results on the same benchmarking code with the same problem shapes. The cause of this is because when we set `clock-control` speed to `None`, the GPU clock speed is stochastic, and therefore affect the latency of the kernels measured. A more holistic approach would be to also benchmark kernels across different fixed clock speeds. 
+![image](./_assets/ncu-compare.png)
+_Figure 8: On the same benchmarking code and problem shapes, we can see vast deviations in duration, which is caused by the differences in SM Frequency. This resonates with the graphs shown in Figure 7._
+
+As a result, there can be some discrepancy in `ncu`'s and our own benchmark timings. To figure out if your discrepancy is caused by the SM frequency, you can use the relationship
+that FLOPS is directly proportional to SM clock, so their durations are inversely proportional.
+
+In our case:
+`544 / 2.14 (SM freq for 575 μs kernel) * 2.28 (SM freq for 544 μs kernel) = ~579`, so most of the discrepancy was coming from the SM frequency being different. 
+
+The final command we used was:
+
+`ncu -s 5 -k $kernel_name --clock-control none python3 benchmarks/bench_mm.py --profile 2048 19456 2560`
+
+Explanation of arguments:
+`-s`: Number of kernels skipped 
+`-k`: name of kernel to profile
+`--clock-control`: Whether clock speed is controlled 
+
+Below is a side to side comparison of `ncu`'s benchmarked latency and our script after all the adjustments made.
+
+![image](./_assets/ncu-bench.png)
+_Figure 9: Side by side comparison of the above `ncu` command (left) (measuring shape [2048,19456,2560]) with our own Python benchmarking script (right). We can see at most 10us difference between `Duration` in `ncu` and our benchmarking script's `Latency (us)` measurement._
+
+## Conclusion and TLDR;
+
+TLDR, when benchmarking
+1. Make sure to use the hardware you intend to deploy on
+2. Warmup before benchmarking your kernels
+3. Use CUDA events
+4. Flush your L2 Cache
+5. Use a dummy matmul to make timings more accurate for short lived kernels
+6. Ensure your clock speed doesn't cause inconsistent readings
+
+We hope that this helps anyone who are interested in benchmarking their own kernels, or are interested in how GPU kernels are benchmarked. Happy benchmarking!
+
+### Acknowledgements and Related Resources:
+
+We would like to give thanks and credit to the many resources and guides that we used in our own journey of finding out how to best benchmark our kernels on our GPUs, and a lot of our work would probably not be possible if it was not for these amazing guides.
+
+- GPU Mode Lecture 56, presented by Georgii Evtushenko : https://www.youtube.com/watch?v=CtrqBmYtSEk
+- https://www.spatters.ca/mma-matmul (Benchmarking using ncu for matrix multiplications)
+- https://www.speechmatics.com/company/articles-and-news/timing-operations-in-pytorch (CUDA Events)
+- https://guillesanbri.com/CUDA-Benchmarks/ (Good resource for introduction to benchmarking)
+- https://modal.com/gpu-glossary/device-hardware/cuda-device-architecture (Glossary of Architecture in general)
+- https://docs.nvidia.com/cuda/cuda-c-programming-guide/#global-memory-5-x (L2 cache explanation)
+- https://cvw.cac.cornell.edu/gpu-architecture/gpu-memory/memory_types (L1 vs L2 cache)
--- a/extensions-web/src/jan-provider-web/provider.ts
+++ b/extensions-web/src/jan-provider-web/provider.ts
@ -22,7 +22,7 @@ export default class JanProviderWeb extends AIEngine {

  override async onLoad() {
    console.log('Loading Jan Provider Extension...')
-    
+
    try {
      // Initialize authentication and fetch models
      await janApiClient.initialize()
@ -37,20 +37,43 @@ export default class JanProviderWeb extends AIEngine {

  override async onUnload() {
    console.log('Unloading Jan Provider Extension...')
-    
+
    // Clear all sessions
    for (const sessionId of this.activeSessions.keys()) {
      await this.unload(sessionId)
    }
-    
+
    janProviderStore.reset()
    console.log('Jan Provider Extension unloaded')
  }

+  async get(modelId: string): Promise<modelInfo | undefined> {
+    return janApiClient
+      .getModels()
+      .then((list) => list.find((e) => e.id === modelId))
+      .then((model) =>
+        model
+          ? {
+              id: model.id,
+              name: model.id, // Use ID as name for now
+              quant_type: undefined,
+              providerId: this.provider,
+              port: 443, // HTTPS port for API
+              sizeBytes: 0, // Size not provided by Jan API
+              tags: [],
+              path: undefined, // Remote model, no local path
+              owned_by: model.owned_by,
+              object: model.object,
+              capabilities: ['tools'], // Jan models support both tools via MCP
+            }
+          : undefined
+      )
+  }
+
  async list(): Promise<modelInfo[]> {
    try {
      const janModels = await janApiClient.getModels()
-      
+
      return janModels.map((model) => ({
        id: model.id,
        name: model.id, // Use ID as name for now
@ -75,7 +98,7 @@ export default class JanProviderWeb extends AIEngine {
      // For Jan API, we don't actually "load" models in the traditional sense
      // We just create a session reference for tracking
      const sessionId = `jan-${modelId}-${Date.now()}`
-      
+
      const sessionInfo: SessionInfo = {
        pid: Date.now(), // Use timestamp as pseudo-PID
        port: 443, // HTTPS port
@ -85,8 +108,10 @@ export default class JanProviderWeb extends AIEngine {
      }

      this.activeSessions.set(sessionId, sessionInfo)
-      
-      console.log(`Jan model session created: ${sessionId} for model ${modelId}`)
+
+      console.log(
+        `Jan model session created: ${sessionId} for model ${modelId}`
+      )
      return sessionInfo
    } catch (error) {
      console.error(`Failed to load Jan model ${modelId}:`, error)
@ -97,23 +122,23 @@ export default class JanProviderWeb extends AIEngine {
  async unload(sessionId: string): Promise<UnloadResult> {
    try {
      const session = this.activeSessions.get(sessionId)
-      
+
      if (!session) {
        return {
          success: false,
-          error: `Session ${sessionId} not found`
+          error: `Session ${sessionId} not found`,
        }
      }

      this.activeSessions.delete(sessionId)
      console.log(`Jan model session unloaded: ${sessionId}`)
-      
+
      return { success: true }
    } catch (error) {
      console.error(`Failed to unload Jan session ${sessionId}:`, error)
      return {
        success: false,
-        error: error instanceof Error ? error.message : 'Unknown error'
+        error: error instanceof Error ? error.message : 'Unknown error',
      }
    }
  }
@ -136,9 +161,12 @@ export default class JanProviderWeb extends AIEngine {
      }

      // Convert core chat completion request to Jan API format
-      const janMessages: JanChatMessage[] = opts.messages.map(msg => ({
+      const janMessages: JanChatMessage[] = opts.messages.map((msg) => ({
        role: msg.role as 'system' | 'user' | 'assistant',
-        content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
+        content:
+          typeof msg.content === 'string'
+            ? msg.content
+            : JSON.stringify(msg.content),
      }))

      const janRequest = {
@ -162,18 +190,18 @@ export default class JanProviderWeb extends AIEngine {
      } else {
        // Return single response
        const response = await janApiClient.createChatCompletion(janRequest)
-        
+
        // Check if aborted after completion
        if (abortController?.signal?.aborted) {
          throw new Error('Request was aborted')
        }
-        
+
        return {
          id: response.id,
          object: 'chat.completion' as const,
          created: response.created,
          model: response.model,
-          choices: response.choices.map(choice => ({
+          choices: response.choices.map((choice) => ({
            index: choice.index,
            message: {
              role: choice.message.role,
@ -182,7 +210,12 @@ export default class JanProviderWeb extends AIEngine {
              reasoning_content: choice.message.reasoning_content,
              tool_calls: choice.message.tool_calls,
            },
-            finish_reason: (choice.finish_reason || 'stop') as 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call',
+            finish_reason: (choice.finish_reason || 'stop') as
+              | 'stop'
+              | 'length'
+              | 'tool_calls'
+              | 'content_filter'
+              | 'function_call',
          })),
          usage: response.usage,
        }
@ -193,7 +226,10 @@ export default class JanProviderWeb extends AIEngine {
    }
  }

-  private async *createStreamingGenerator(janRequest: any, abortController?: AbortController) {
+  private async *createStreamingGenerator(
+    janRequest: any,
+    abortController?: AbortController
+  ) {
    let resolve: () => void
    let reject: (error: Error) => void
    const chunks: any[] = []
@ -231,7 +267,7 @@ export default class JanProviderWeb extends AIEngine {
            object: chunk.object,
            created: chunk.created,
            model: chunk.model,
-            choices: chunk.choices.map(choice => ({
+            choices: chunk.choices.map((choice) => ({
              index: choice.index,
              delta: {
                role: choice.delta.role,
@ -261,14 +297,14 @@ export default class JanProviderWeb extends AIEngine {
        if (abortController?.signal?.aborted) {
          throw new Error('Request was aborted')
        }
-        
+
        while (yieldedIndex < chunks.length) {
          yield chunks[yieldedIndex]
          yieldedIndex++
        }
-        
+
        // Wait a bit before checking again
-        await new Promise(resolve => setTimeout(resolve, 10))
+        await new Promise((resolve) => setTimeout(resolve, 10))
      }

      // Yield any remaining chunks
@ -291,24 +327,32 @@ export default class JanProviderWeb extends AIEngine {
  }

  async delete(modelId: string): Promise<void> {
-    throw new Error(`Delete operation not supported for remote Jan API model: ${modelId}`)
+    throw new Error(
+      `Delete operation not supported for remote Jan API model: ${modelId}`
+    )
  }

  async import(modelId: string, _opts: ImportOptions): Promise<void> {
-    throw new Error(`Import operation not supported for remote Jan API model: ${modelId}`)
+    throw new Error(
+      `Import operation not supported for remote Jan API model: ${modelId}`
+    )
  }

  async abortImport(modelId: string): Promise<void> {
-    throw new Error(`Abort import operation not supported for remote Jan API model: ${modelId}`)
+    throw new Error(
+      `Abort import operation not supported for remote Jan API model: ${modelId}`
+    )
  }

  async getLoadedModels(): Promise<string[]> {
-    return Array.from(this.activeSessions.values()).map(session => session.model_id)
+    return Array.from(this.activeSessions.values()).map(
+      (session) => session.model_id
+    )
  }

  async isToolSupported(modelId: string): Promise<boolean> {
    // Jan models support tool calls via MCP
-    console.log(`Checking tool support for Jan model ${modelId}: supported`);
-    return true;
+    console.log(`Checking tool support for Jan model ${modelId}: supported`)
+    return true
  }
-}
+}
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@ -96,18 +96,6 @@
      "textAlign": "right"
    }
  },
-  {
-    "key": "batch_size",
-    "title": "Batch Size",
-    "description": "Logical maximum batch size for processing prompts.",
-    "controllerType": "input",
-    "controllerProps": {
-      "value": 2048,
-      "placeholder": "2048",
-      "type": "number",
-      "textAlign": "right"
-    }
-  },
  {
    "key": "ubatch_size",
    "title": "uBatch Size",
--- a/extensions/llamacpp-extension/src/backend.ts
+++ b/extensions/llamacpp-extension/src/backend.ts
@ -46,7 +46,6 @@ export async function getLocalInstalledBackends(): Promise<
      }
    }
  }
-  console.debug(local)
  return local
 }

@ -319,7 +318,10 @@ export async function downloadBackend(
    events.emit('onFileDownloadSuccess', { modelId: taskId, downloadType })
  } catch (error) {
    // Fallback: if GitHub fails, retry once with CDN
-    if (source === 'github') {
+    if (
+      source === 'github' &&
+      error?.toString() !== 'Error: Download cancelled'
+    ) {
      console.warn(`GitHub download failed, falling back to CDN:`, error)
      return await downloadBackend(backend, version, 'cdn')
    }
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -37,7 +37,13 @@ import {
 import { invoke } from '@tauri-apps/api/core'
 import { getProxyConfig } from './util'
 import { basename } from '@tauri-apps/api/path'
-import { readGgufMetadata } from '@janhq/tauri-plugin-llamacpp-api'
+import {
+  readGgufMetadata,
+  estimateKVCacheSize,
+  getModelSize,
+  isModelSupported,
+  planModelLoadInternal,
+} from '@janhq/tauri-plugin-llamacpp-api'
 import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'

 // Error message constant - matches web-app/src/utils/error.ts
@ -82,6 +88,7 @@ type ModelPlan = {
  maxContextLength: number
  noOffloadKVCache: boolean
  offloadMmproj?: boolean
+  batchSize: number
  mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
 }

@ -922,6 +929,30 @@ export default class llamacpp_extension extends AIEngine {
    return hash
  }

+  override async get(modelId: string): Promise<modelInfo | undefined> {
+    const modelPath = await joinPath([
+      await this.getProviderPath(),
+      'models',
+      modelId,
+    ])
+    const path = await joinPath([modelPath, 'model.yml'])
+
+    if (!(await fs.existsSync(path))) return undefined
+
+    const modelConfig = await invoke<ModelConfig>('read_yaml', {
+      path,
+    })
+
+    return {
+      id: modelId,
+      name: modelConfig.name ?? modelId,
+      quant_type: undefined, // TODO: parse quantization type from model.yml or model.gguf
+      providerId: this.provider,
+      port: 0, // port is not known until the model is loaded
+      sizeBytes: modelConfig.size_bytes ?? 0,
+    } as modelInfo
+  }
+
  // Implement the required LocalProvider interface methods
  override async list(): Promise<modelInfo[]> {
    const modelsDir = await joinPath([await this.getProviderPath(), 'models'])
@ -1085,7 +1116,10 @@ export default class llamacpp_extension extends AIEngine {
    const archiveName = await basename(path)
    logger.info(`Installing backend from path: ${path}`)

-    if (!(await fs.existsSync(path)) || (!path.endsWith('tar.gz') && !path.endsWith('zip'))) {
+    if (
+      !(await fs.existsSync(path)) ||
+      (!path.endsWith('tar.gz') && !path.endsWith('zip'))
+    ) {
      logger.error(`Invalid path or file ${path}`)
      throw new Error(`Invalid path or file ${path}`)
    }
@ -1979,11 +2013,6 @@ export default class llamacpp_extension extends AIEngine {
    return responseData as EmbeddingResponse
  }

-  // Optional method for direct client access
-  override getChatClient(sessionId: string): any {
-    throw new Error('method not implemented yet')
-  }
-
  /**
   * Check if a tool is supported by the model
   * Currently read from GGUF chat_template
@ -2046,7 +2075,7 @@ export default class llamacpp_extension extends AIEngine {
    path: string,
    meta: Record<string, string>
  ): Promise<{ layerSize: number; totalLayers: number }> {
-    const modelSize = await this.getModelSize(path)
+    const modelSize = await getModelSize(path)
    const arch = meta['general.architecture']
    const totalLayers = Number(meta[`${arch}.block_count`]) + 2 // 1 for lm_head layer and 1 for embedding layer
    if (!totalLayers) throw new Error('Invalid metadata: block_count not found')
@ -2062,335 +2091,27 @@ export default class llamacpp_extension extends AIEngine {
      /^\/\/[^/]+/.test(norm) // UNC path //server/share
    )
  }
-
+  /*
+    * if (!this.isAbsolutePath(path))
+      path = await joinPath([await getJanDataFolderPath(), path])
+    if (mmprojPath && !this.isAbsolutePath(mmprojPath))
+      mmprojPath = await joinPath([await getJanDataFolderPath(), path])
+  */
  async planModelLoad(
    path: string,
    mmprojPath?: string,
    requestedCtx?: number
  ): Promise<ModelPlan> {
-    if (!this.isAbsolutePath(path))
+    if (!this.isAbsolutePath(path)) {
      path = await joinPath([await getJanDataFolderPath(), path])
+    }
    if (mmprojPath && !this.isAbsolutePath(mmprojPath))
      mmprojPath = await joinPath([await getJanDataFolderPath(), path])
-    const modelSize = await this.getModelSize(path)
-    const memoryInfo = await this.getTotalSystemMemory()
-    const gguf = await readGgufMetadata(path)
-
-    // Get mmproj size if provided
-    let mmprojSize = 0
-    if (mmprojPath) {
-      mmprojSize = await this.getModelSize(mmprojPath)
-    }
-
-    const { layerSize, totalLayers } = await this.getLayerSize(
-      path,
-      gguf.metadata
-    )
-
-    const kvCachePerToken = (await this.estimateKVCache(gguf.metadata))
-      .perTokenSize
-
-    logger.info(
-      `Model size: ${modelSize}, Layer size: ${layerSize}, Total layers: ${totalLayers}, KV cache per token: ${kvCachePerToken}`
-    )
-
-    // Validate critical values
-    if (!modelSize || modelSize <= 0) {
-      throw new Error(`Invalid model size: ${modelSize}`)
-    }
-    if (!kvCachePerToken || kvCachePerToken <= 0) {
-      throw new Error(`Invalid KV cache per token: ${kvCachePerToken}`)
-    }
-    if (!layerSize || layerSize <= 0) {
-      throw new Error(`Invalid layer size: ${layerSize}`)
-    }
-
-    // Reserve memory for OS, other applications, and fixed engine overhead.
-    const VRAM_RESERVE_GB = 0.5
-    const VRAM_RESERVE_BYTES = VRAM_RESERVE_GB * 1024 * 1024 * 1024
-    const ENGINE_FIXED_OVERHEAD_BYTES = 0.2 * 1024 * 1024 * 1024 // For scratch buffers etc.
-
-    // Get model's maximum context length
-    const arch = gguf.metadata['general.architecture']
-    const modelMaxContextLength =
-      Number(gguf.metadata[`${arch}.context_length`]) || 8192
-
-    const MIN_CONTEXT_LENGTH = 1024
-
-    // Memory percentages applied to both VRAM and RAM
-    const memoryPercentages = { high: 0.7, medium: 0.5, low: 0.4 }
-
-    logger.info(
-      `Memory info - Total (VRAM + RAM): ${memoryInfo.totalMemory}, Total VRAM: ${memoryInfo.totalVRAM}, Mode: ${this.memoryMode}`
-    )
-
-    if (!memoryInfo.totalMemory || isNaN(memoryInfo.totalMemory)) {
-      throw new Error(`Invalid total memory: ${memoryInfo.totalMemory}`)
-    }
-    if (!memoryInfo.totalVRAM || isNaN(memoryInfo.totalVRAM)) {
-      throw new Error(`Invalid total VRAM: ${memoryInfo.totalVRAM}`)
-    }
-    if (!this.memoryMode || !(this.memoryMode in memoryPercentages)) {
-      throw new Error(
-        `Invalid memory mode: ${this.memoryMode}. Must be 'high', 'medium', or 'low'`
-      )
-    }
-
-    // Apply memory mode to both VRAM and RAM separately
-    const memoryModeMultiplier = memoryPercentages[this.memoryMode]
-    const usableVRAM = Math.max(
-      0,
-      memoryInfo.totalVRAM * memoryModeMultiplier -
-        VRAM_RESERVE_BYTES -
-        ENGINE_FIXED_OVERHEAD_BYTES
-    )
-
-    const actualSystemRAM = Math.max(0, memoryInfo.totalRAM)
-    const usableSystemMemory = actualSystemRAM * memoryModeMultiplier
-
-    logger.info(
-      `Actual System RAM: ${actualSystemRAM}, Usable VRAM for plan: ${usableVRAM}, Usable System Memory: ${usableSystemMemory}`
-    )
-
-    let gpuLayers = 0
-    let maxContextLength = 0
-    let noOffloadKVCache = false
-    let mode: ModelPlan['mode'] = 'Unsupported'
-    let offloadMmproj = false
-
-    let remainingVRAM = usableVRAM
-    if (mmprojSize > 0 && mmprojSize <= remainingVRAM) {
-      offloadMmproj = true
-      remainingVRAM -= mmprojSize
-    }
-    const vramForMinContext = (
-      await this.estimateKVCache(gguf.metadata, MIN_CONTEXT_LENGTH)
-    ).size
-
-    const ramForModel = modelSize + (offloadMmproj ? 0 : mmprojSize)
-    if (ramForModel + vramForMinContext > usableSystemMemory + usableVRAM) {
-      logger.error(
-        `Model unsupported. Not enough resources for model and min context.`
-      )
-      return {
-        gpuLayers: 0,
-        maxContextLength: 0,
-        noOffloadKVCache: true,
-        mode: 'Unsupported',
-        offloadMmproj: false,
-      }
-    }
-
-    const targetContext = Math.min(
-      requestedCtx || modelMaxContextLength,
-      modelMaxContextLength
-    )
-
-    let targetContextSize = (
-      await this.estimateKVCache(gguf.metadata, targetContext)
-    ).size
-
-    // Use `kvCachePerToken` for all VRAM calculations
-    if (modelSize + targetContextSize <= remainingVRAM) {
-      mode = 'GPU'
-      gpuLayers = totalLayers
-      maxContextLength = targetContext
-      noOffloadKVCache = false
-      logger.info(
-        'Planning: Ideal case fits. All layers and target context in VRAM.'
-      )
-    } else if (modelSize <= remainingVRAM) {
-      mode = 'GPU'
-      gpuLayers = totalLayers
-      noOffloadKVCache = false
-      const vramLeftForContext = remainingVRAM - modelSize
-      maxContextLength = Math.floor(vramLeftForContext / kvCachePerToken)
-
-      // Add safety check to prevent OOM
-      const safetyBuffer = 0.9 // Use 90% of calculated context to be safe
-      maxContextLength = Math.floor(maxContextLength * safetyBuffer)
-
-      logger.info(
-        `Planning: All layers fit in VRAM, but context must be reduced. VRAM left: ${vramLeftForContext}, kvCachePerToken: ${kvCachePerToken}, calculated context: ${maxContextLength}`
-      )
-    } else {
-      const vramAvailableForLayers = remainingVRAM - vramForMinContext
-
-      if (vramAvailableForLayers >= layerSize) {
-        mode = 'Hybrid'
-        gpuLayers = Math.min(
-          Math.floor(vramAvailableForLayers / layerSize),
-          totalLayers
-        )
-        noOffloadKVCache = false
-        const vramUsedByLayers = gpuLayers * layerSize
-        const vramLeftForContext = remainingVRAM - vramUsedByLayers
-        maxContextLength = Math.floor(vramLeftForContext / kvCachePerToken)
-
-        logger.info(
-          'Planning: Hybrid mode. Offloading layers to fit context in VRAM.'
-        )
-      }
-    }
-
-    // Fallback logic: try different configurations if no VRAM-based plan worked
-    if (mode === 'Unsupported') {
-      logger.info('Planning: Trying fallback configurations...')
-
-      // Try putting some layers on GPU with KV cache in RAM
-      const possibleGpuLayers = Math.floor(remainingVRAM / layerSize)
-      if (possibleGpuLayers > 0) {
-        gpuLayers = Math.min(possibleGpuLayers, totalLayers)
-        const ramUsedByCpuLayers = (totalLayers - gpuLayers) * layerSize
-        const ramUsedByMmproj = !offloadMmproj ? mmprojSize : 0
-        const availableRamForKv =
-          usableSystemMemory - (ramUsedByCpuLayers + ramUsedByMmproj)
-        // Note: Use `kvCachePerToken` for RAM calculation, as the overhead is GPU-specific
-        const contextInRam = Math.floor(availableRamForKv / kvCachePerToken)
-
-        if (contextInRam >= MIN_CONTEXT_LENGTH) {
-          mode = 'Hybrid'
-          maxContextLength = contextInRam
-          noOffloadKVCache = true
-          logger.info(
-            `Planning: Fallback hybrid - GPU layers: ${gpuLayers}, Context in RAM: ${maxContextLength}`
-          )
-        }
-      }
-
-      // If still unsupported, try pure CPU mode
-      if (mode === 'Unsupported') {
-        gpuLayers = 0
-        noOffloadKVCache = true
-        offloadMmproj = false
-        const ramUsedByModel = modelSize + mmprojSize
-        const availableRamForKv = usableSystemMemory - ramUsedByModel
-        maxContextLength = Math.floor(availableRamForKv / kvCachePerToken)
-        if (maxContextLength >= MIN_CONTEXT_LENGTH) {
-          mode = 'CPU'
-          logger.info(`Planning: CPU mode - Context: ${maxContextLength}`)
-        }
-      }
-    }
-
-    if (mode === 'CPU' || noOffloadKVCache) {
-      offloadMmproj = false
-    }
-
-    if (requestedCtx && requestedCtx > 0) {
-      maxContextLength = Math.min(maxContextLength, requestedCtx)
-    }
-
-    maxContextLength = Math.min(maxContextLength, modelMaxContextLength)
-
-    if (maxContextLength < MIN_CONTEXT_LENGTH) {
-      mode = 'Unsupported'
-    }
-
-    if (mode === 'Unsupported') {
-      gpuLayers = 0
-      maxContextLength = 0
-    }
-
-    maxContextLength = isNaN(maxContextLength)
-      ? 0
-      : Math.floor(maxContextLength)
-
-    const mmprojInfo = mmprojPath
-      ? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(
-          2
-        )}MB, offloadMmproj=${offloadMmproj}`
-      : ''
-
-    logger.info(
-      `Final plan for ${path}: gpuLayers=${gpuLayers}/${totalLayers}, ` +
-        `maxContextLength=${maxContextLength}, noOffloadKVCache=${noOffloadKVCache}, ` +
-        `mode=${mode}${mmprojInfo}`
-    )
-
-    return {
-      gpuLayers,
-      maxContextLength,
-      noOffloadKVCache,
-      mode,
-      offloadMmproj,
-    }
-  }
-  /**
-   * estimate KVCache size from a given metadata
-   */
-  private async estimateKVCache(
-    meta: Record<string, string>,
-    ctx_size?: number
-  ): Promise<{ size: number; perTokenSize: number }> {
-    const arch = meta['general.architecture']
-    if (!arch) throw new Error('Invalid metadata: architecture not found')
-
-    const nLayer = Number(meta[`${arch}.block_count`])
-    if (!nLayer) throw new Error('Invalid metadata: block_count not found')
-
-    const nHead = Number(meta[`${arch}.attention.head_count`])
-    if (!nHead) throw new Error('Invalid metadata: head_count not found')
-
-    // Try to get key/value lengths first (more accurate)
-    const keyLen = Number(meta[`${arch}.attention.key_length`])
-    const valLen = Number(meta[`${arch}.attention.value_length`])
-
-    let headDim: number
-
-    if (keyLen && valLen) {
-      // Use explicit key/value lengths if available
-      logger.info(
-        `Using explicit key_length: ${keyLen}, value_length: ${valLen}`
-      )
-      headDim = keyLen + valLen
-    } else {
-      // Fall back to embedding_length estimation
-      const embeddingLen = Number(meta[`${arch}.embedding_length`])
-      if (!embeddingLen)
-        throw new Error('Invalid metadata: embedding_length not found')
-
-      // Standard transformer: head_dim = embedding_dim / num_heads
-      // For KV cache: we need both K and V, so 2 * head_dim per head
-      headDim = (embeddingLen / nHead) * 2
-      logger.info(
-        `Using embedding_length estimation: ${embeddingLen}, calculated head_dim: ${headDim}`
-      )
-    }
-
-    const maxCtx = Number(meta[`${arch}.context_length`])
-    if (!maxCtx) throw new Error('Invalid metadata: context_length not found')
-
-    // b) If the user supplied a value, clamp it to the model's max
-    let ctxLen = ctx_size ? Math.min(ctx_size, maxCtx) : maxCtx
-
-    logger.info(`Final context length used for KV size: ${ctxLen}`)
-    logger.info(`nLayer: ${nLayer}, nHead: ${nHead}, headDim (K+V): ${headDim}`)
-
-    logger.info(`ctxLen: ${ctxLen}`)
-    logger.info(`nLayer: ${nLayer}`)
-    logger.info(`nHead: ${nHead}`)
-    logger.info(`headDim: ${headDim}`)
-
-    // Consider f16 by default
-    // Can be extended by checking cache-type-v and cache-type-k
-    // but we are checking overall compatibility with the default settings
-    // fp16 = 8 bits * 2 = 16
-    const bytesPerElement = 2
-
-    // Total KV cache size per token = nHead * headDim * bytesPerElement * nLayer
-    const kvPerToken = nHead * headDim * bytesPerElement * nLayer
-
-    return { size: ctxLen * kvPerToken, perTokenSize: kvPerToken }
-  }
-
-  private async getModelSize(path: string): Promise<number> {
-    if (path.startsWith('https://')) {
-      const res = await fetch(path, { method: 'HEAD' })
-      const len = res.headers.get('content-length')
-      return len ? parseInt(len, 10) : 0
-    } else {
-      return (await fs.fileStat(path)).size
+    try {
+      const result = await planModelLoadInternal(path, this.memoryMode, mmprojPath, requestedCtx)
+      return result
+    } catch (e) {
+      throw new Error(String(e))
    }
  }

@ -2404,50 +2125,11 @@ export default class llamacpp_extension extends AIEngine {
   */
  async isModelSupported(
    path: string,
-    ctx_size?: number
+    ctxSize?: number
  ): Promise<'RED' | 'YELLOW' | 'GREEN'> {
    try {
-      const modelSize = await this.getModelSize(path)
-      const memoryInfo = await this.getTotalSystemMemory()
-
-      logger.info(`modelSize: ${modelSize}`)
-
-      const gguf = await readGgufMetadata(path)
-      let kvCacheSize: number
-      if (ctx_size) {
-        kvCacheSize = (await this.estimateKVCache(gguf.metadata, ctx_size)).size
-      } else {
-        kvCacheSize = (await this.estimateKVCache(gguf.metadata)).size
-      }
-
-      // Total memory consumption = model weights + kvcache
-      const totalRequired = modelSize + kvCacheSize
-      logger.info(
-        `isModelSupported: Total memory requirement: ${totalRequired} for ${path}`
-      )
-
-      // Use 80% of total memory as the usable limit
-      const USABLE_MEMORY_PERCENTAGE = 0.9
-      const usableTotalMemory =
-        memoryInfo.totalRAM * USABLE_MEMORY_PERCENTAGE +
-        memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
-      const usableVRAM = memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
-
-      // Check if model fits in total memory at all (this is the hard limit)
-      if (totalRequired > usableTotalMemory) {
-        return 'RED' // Truly impossible to run
-      }
-
-      // Check if everything fits in VRAM (ideal case)
-      if (totalRequired <= usableVRAM) {
-        return 'GREEN'
-      }
-
-      // If we get here, it means:
-      // - Total requirement fits in combined memory
-      // - But doesn't fit entirely in VRAM
-      // This is the CPU-GPU hybrid scenario
-      return 'YELLOW'
+      const result = await isModelSupported(path, Number(ctxSize))
+      return result
    } catch (e) {
      throw new Error(String(e))
    }
@ -2601,7 +2283,8 @@ export default class llamacpp_extension extends AIEngine {
    metadata: Record<string, string>
  ): Promise<number> {
    // Extract vision parameters from metadata
-    const projectionDim = Math.floor(Number(metadata['clip.vision.projection_dim']) / 10) || 256
+    const projectionDim =
+      Math.floor(Number(metadata['clip.vision.projection_dim']) / 10) || 256

    // Count images in messages
    let imageCount = 0
--- a/package.json
+++ b/package.json
@ -35,7 +35,8 @@
    "copy:assets:mobile": "cpx \"pre-install/*.tgz\" \"src-tauri/resources/pre-install/\" && cpx \"LICENSE\" \"src-tauri/resources/\"",
    "download:lib": "node ./scripts/download-lib.mjs",
    "download:bin": "node ./scripts/download-bin.mjs",
-    "build:tauri:win32": "yarn download:bin && yarn download:lib && yarn tauri build",
+    "download:windows-installer": "node ./scripts/download-win-installer-deps.mjs",
+    "build:tauri:win32": "yarn download:bin && yarn download:lib && yarn download:windows-installer && yarn tauri build",
    "build:tauri:linux": "yarn download:bin && yarn download:lib && NO_STRIP=1 ./src-tauri/build-utils/shim-linuxdeploy.sh yarn tauri build && ./src-tauri/build-utils/buildAppImage.sh",
    "build:tauri:darwin": "yarn download:bin && yarn tauri build --target universal-apple-darwin",
    "build:tauri": "yarn build:icon && yarn copy:assets:tauri && run-script-os",
--- a/scripts/download-lib.mjs
+++ b/scripts/download-lib.mjs
@ -77,25 +77,6 @@ async function main() {
    // Expect EEXIST error
  }

-  // Download VC++ Redistributable 17
-  if (platform == 'win32') {
-    const vcFilename = 'vc_redist.x64.exe'
-    const vcUrl = 'https://aka.ms/vs/17/release/vc_redist.x64.exe'
-    
-    console.log(`Downloading VC++ Redistributable...`)
-    const vcSavePath = path.join(tempDir, vcFilename)
-    if (!fs.existsSync(vcSavePath)) {
-      await download(vcUrl, vcSavePath)
-    }
-
-    // copy to tauri resources
-    try {
-      copySync(vcSavePath, libDir)
-    } catch (err) {
-      // Expect EEXIST error
-    }
-  }
-
  console.log('Downloads completed.')
 }

--- a/scripts/download-win-installer-deps.mjs
+++ b/scripts/download-win-installer-deps.mjs
@ -0,0 +1,83 @@
+console.log('Downloading Windows installer dependencies...')
+// scripts/download-win-installer-deps.mjs
+import https from 'https'
+import fs, { mkdirSync } from 'fs'
+import os from 'os'
+import path from 'path'
+import { copySync } from 'cpx'
+
+function download(url, dest) {
+  return new Promise((resolve, reject) => {
+    console.log(`Downloading ${url} to ${dest}`)
+    const file = fs.createWriteStream(dest)
+    https
+      .get(url, (response) => {
+        console.log(`Response status code: ${response.statusCode}`)
+        if (
+          response.statusCode >= 300 &&
+          response.statusCode < 400 &&
+          response.headers.location
+        ) {
+          // Handle redirect
+          const redirectURL = response.headers.location
+          console.log(`Redirecting to ${redirectURL}`)
+          download(redirectURL, dest).then(resolve, reject) // Recursive call
+          return
+        } else if (response.statusCode !== 200) {
+          reject(`Failed to get '${url}' (${response.statusCode})`)
+          return
+        }
+        response.pipe(file)
+        file.on('finish', () => {
+          file.close(resolve)
+        })
+      })
+      .on('error', (err) => {
+        fs.unlink(dest, () => reject(err.message))
+      })
+  })
+}
+
+async function main() {
+  console.log('Starting Windows installer dependencies download')
+  const platform = os.platform() // 'darwin', 'linux', 'win32'
+  const arch = os.arch() // 'x64', 'arm64', etc.
+
+  if (arch != 'x64') return
+
+
+  const libDir = 'src-tauri/resources/lib'
+  const tempDir = 'scripts/dist'
+
+  try {
+    mkdirSync('scripts/dist')
+  } catch (err) {
+    // Expect EEXIST error if the directory already exists
+  }
+
+  // Download VC++ Redistributable 17
+  if (platform == 'win32') {
+    const vcFilename = 'vc_redist.x64.exe'
+    const vcUrl = 'https://aka.ms/vs/17/release/vc_redist.x64.exe'
+    
+    console.log(`Downloading VC++ Redistributable...`)
+    const vcSavePath = path.join(tempDir, vcFilename)
+    if (!fs.existsSync(vcSavePath)) {
+      await download(vcUrl, vcSavePath)
+    }
+
+    // copy to tauri resources
+    try {
+      copySync(vcSavePath, libDir)
+    } catch (err) {
+      // Expect EEXIST error
+    }
+  }
+
+  console.log('Windows installer dependencies downloads completed.')
+}
+
+main().catch((err) => {
+  console.error('Error:', err)
+  process.exit(1)
+})
--- a/src-tauri/plugins/tauri-plugin-hardware/src/lib.rs
+++ b/src-tauri/plugins/tauri-plugin-hardware/src/lib.rs
@ -15,6 +15,8 @@ use tauri::Runtime;

 static SYSTEM_INFO: OnceLock<SystemInfo> = OnceLock::new();

+pub use commands::get_system_info;
+
 /// Initialize the hardware plugin
 pub fn init<R: Runtime>() -> tauri::plugin::TauriPlugin<R> {
    tauri::plugin::Builder::new("hardware")
--- a/src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
@ -24,6 +24,7 @@ tauri = { version = "2.5.0", default-features = false, features = [] }
 thiserror = "2.0.12"
 tokio = { version = "1", features = ["full"] }
 reqwest = { version = "0.11", features = ["json", "blocking", "stream"] }
+tauri-plugin-hardware = { path = "../tauri-plugin-hardware" }

 # Unix-specific dependencies
 [target.'cfg(unix)'.dependencies]
--- a/src-tauri/plugins/tauri-plugin-llamacpp/build.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/build.rs
@ -14,6 +14,10 @@ const COMMANDS: &[&str] = &[
    "get_session_by_model",
    // GGUF commands
    "read_gguf_metadata",
+    "estimate_kv_cache_size",
+    "get_model_size",
+    "is_model_supported",
+    "plan_model_load"
 ];

 fn main() {
--- a/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts
@ -2,28 +2,28 @@ import { invoke } from '@tauri-apps/api/core'

 // Types
 export interface SessionInfo {
-  pid: number;
-  port: number;
-  model_id: string;
-  model_path: string;
-  api_key: string;
+  pid: number
+  port: number
+  model_id: string
+  model_path: string
+  api_key: string
 }

 export interface DeviceInfo {
-  id: string;
-  name: string;
-  memory: number;
+  id: string
+  name: string
+  memory: number
 }

 export interface GgufMetadata {
-  version: number;
-  tensor_count: number;
-  metadata: Record<string, string>;
+  version: number
+  tensor_count: number
+  metadata: Record<string, string>
 }

 // Cleanup commands
 export async function cleanupLlamaProcesses(): Promise<void> {
-  return await invoke('plugin:llamacpp|cleanup_llama_processes');
+  return await invoke('plugin:llamacpp|cleanup_llama_processes')
 }

 // LlamaCpp server commands
@ -35,12 +35,12 @@ export async function loadLlamaModel(
  return await invoke('plugin:llamacpp|load_llama_model', {
    backendPath,
    libraryPath,
-    args
-  });
+    args,
+  })
 }

 export async function unloadLlamaModel(pid: number): Promise<void> {
-  return await invoke('plugin:llamacpp|unload_llama_model', { pid });
+  return await invoke('plugin:llamacpp|unload_llama_model', { pid })
 }

 export async function getDevices(
@ -49,8 +49,8 @@ export async function getDevices(
 ): Promise<DeviceInfo[]> {
  return await invoke('plugin:llamacpp|get_devices', {
    backendPath,
-    libraryPath
-  });
+    libraryPath,
+  })
 }

 export async function generateApiKey(
@ -59,35 +59,84 @@ export async function generateApiKey(
 ): Promise<string> {
  return await invoke('plugin:llamacpp|generate_api_key', {
    modelId,
-    apiSecret
-  });
+    apiSecret,
+  })
 }

 export async function isProcessRunning(pid: number): Promise<boolean> {
-  return await invoke('plugin:llamacpp|is_process_running', { pid });
+  return await invoke('plugin:llamacpp|is_process_running', { pid })
 }

 export async function getRandomPort(): Promise<number> {
-  return await invoke('plugin:llamacpp|get_random_port');
+  return await invoke('plugin:llamacpp|get_random_port')
 }

-export async function findSessionByModel(modelId: string): Promise<SessionInfo | null> {
-  return await invoke('plugin:llamacpp|find_session_by_model', { modelId });
+export async function findSessionByModel(
+  modelId: string
+): Promise<SessionInfo | null> {
+  return await invoke('plugin:llamacpp|find_session_by_model', { modelId })
 }

 export async function getLoadedModels(): Promise<string[]> {
-  return await invoke('plugin:llamacpp|get_loaded_models');
+  return await invoke('plugin:llamacpp|get_loaded_models')
 }

 export async function getAllSessions(): Promise<SessionInfo[]> {
-  return await invoke('plugin:llamacpp|get_all_sessions');
+  return await invoke('plugin:llamacpp|get_all_sessions')
 }

-export async function getSessionByModel(modelId: string): Promise<SessionInfo | null> {
-  return await invoke('plugin:llamacpp|get_session_by_model', { modelId });
+export async function getSessionByModel(
+  modelId: string
+): Promise<SessionInfo | null> {
+  return await invoke('plugin:llamacpp|get_session_by_model', { modelId })
 }

 // GGUF commands
 export async function readGgufMetadata(path: string): Promise<GgufMetadata> {
-  return await invoke('plugin:llamacpp|read_gguf_metadata', { path });
+  return await invoke('plugin:llamacpp|read_gguf_metadata', { path })
+}
+
+export async function estimateKVCacheSize(
+  meta: Record<string, string>,
+  ctxSize?: number
+): Promise<{ size: number; per_token_size: number }> {
+  return await invoke('plugin:llamacpp|estimate_kv_cache_size', {
+    meta,
+    ctxSize,
+  })
+}
+
+export async function getModelSize(path: string): Promise<number> {
+  return await invoke('plugin:llamacpp|get_model_size', { path })
+}
+
+export async function isModelSupported(
+  path: string,
+  ctxSize?: number
+): Promise<'RED' | 'YELLOW' | 'GREEN'> {
+  return await invoke('plugin:llamacpp|is_model_supported', {
+    path,
+    ctxSize,
+  })
+}
+
+export async function planModelLoadInternal(
+  path: string,
+  memoryMode: string,
+  mmprojPath?: string,
+  requestedContext?: number
+): Promise<{
+  gpuLayers: number
+  maxContextLength: number
+  noOffloadKVCache: boolean
+  offloadMmproj?: boolean
+  batchSize: number
+  mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
+}> {
+  return await invoke('plugin:llamacpp|plan_model_load', {
+    path,
+    memoryMode,
+    mmprojPath,
+    requestedContext,
+  })
 }
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/estimate_kv_cache_size.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/estimate_kv_cache_size.toml
@ -0,0 +1,13 @@
+# Automatically generated - DO NOT EDIT!
+
+"$schema" = "../../schemas/schema.json"
+
+[[permission]]
+identifier = "allow-estimate-kv-cache-size"
+description = "Enables the estimate_kv_cache_size command without any pre-configured scope."
+commands.allow = ["estimate_kv_cache_size"]
+
+[[permission]]
+identifier = "deny-estimate-kv-cache-size"
+description = "Denies the estimate_kv_cache_size command without any pre-configured scope."
+commands.deny = ["estimate_kv_cache_size"]
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/get_model_size.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/get_model_size.toml
@ -0,0 +1,13 @@
+# Automatically generated - DO NOT EDIT!
+
+"$schema" = "../../schemas/schema.json"
+
+[[permission]]
+identifier = "allow-get-model-size"
+description = "Enables the get_model_size command without any pre-configured scope."
+commands.allow = ["get_model_size"]
+
+[[permission]]
+identifier = "deny-get-model-size"
+description = "Denies the get_model_size command without any pre-configured scope."
+commands.deny = ["get_model_size"]
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/is_model_supported.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/is_model_supported.toml
@ -0,0 +1,13 @@
+# Automatically generated - DO NOT EDIT!
+
+"$schema" = "../../schemas/schema.json"
+
+[[permission]]
+identifier = "allow-is-model-supported"
+description = "Enables the is_model_supported command without any pre-configured scope."
+commands.allow = ["is_model_supported"]
+
+[[permission]]
+identifier = "deny-is-model-supported"
+description = "Denies the is_model_supported command without any pre-configured scope."
+commands.deny = ["is_model_supported"]
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/plan_model_load.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/commands/plan_model_load.toml
@ -0,0 +1,13 @@
+# Automatically generated - DO NOT EDIT!
+
+"$schema" = "../../schemas/schema.json"
+
+[[permission]]
+identifier = "allow-plan-model-load"
+description = "Enables the plan_model_load command without any pre-configured scope."
+commands.allow = ["plan_model_load"]
+
+[[permission]]
+identifier = "deny-plan-model-load"
+description = "Denies the plan_model_load command without any pre-configured scope."
+commands.deny = ["plan_model_load"]
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/reference.md
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/autogenerated/reference.md
@ -16,6 +16,10 @@ Default permissions for the llamacpp plugin
 - `allow-get-all-sessions`
 - `allow-get-session-by-model`
 - `allow-read-gguf-metadata`
+- `allow-estimate-kv-cache-size`
+- `allow-get-model-size`
+- `allow-is-model-supported`
+- `allow-plan-model-load`

 ## Permission Table

@ -55,6 +59,32 @@ Denies the cleanup_llama_processes command without any pre-configured scope.
 <tr>
 <td>

+`llamacpp:allow-estimate-kv-cache-size`
+
+</td>
+<td>
+
+Enables the estimate_kv_cache_size command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
+`llamacpp:deny-estimate-kv-cache-size`
+
+</td>
+<td>
+
+Denies the estimate_kv_cache_size command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
 `llamacpp:allow-find-session-by-model`

 </td>
@ -185,6 +215,32 @@ Denies the get_loaded_models command without any pre-configured scope.
 <tr>
 <td>

+`llamacpp:allow-get-model-size`
+
+</td>
+<td>
+
+Enables the get_model_size command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
+`llamacpp:deny-get-model-size`
+
+</td>
+<td>
+
+Denies the get_model_size command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
 `llamacpp:allow-get-random-port`

 </td>
@ -237,6 +293,32 @@ Denies the get_session_by_model command without any pre-configured scope.
 <tr>
 <td>

+`llamacpp:allow-is-model-supported`
+
+</td>
+<td>
+
+Enables the is_model_supported command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
+`llamacpp:deny-is-model-supported`
+
+</td>
+<td>
+
+Denies the is_model_supported command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
 `llamacpp:allow-is-process-running`

 </td>
@ -289,6 +371,32 @@ Denies the load_llama_model command without any pre-configured scope.
 <tr>
 <td>

+`llamacpp:allow-plan-model-load`
+
+</td>
+<td>
+
+Enables the plan_model_load command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
+`llamacpp:deny-plan-model-load`
+
+</td>
+<td>
+
+Denies the plan_model_load command without any pre-configured scope.
+
+</td>
+</tr>
+
+<tr>
+<td>
+
 `llamacpp:allow-read-gguf-metadata`

 </td>
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/default.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/default.toml
@ -3,10 +3,10 @@ description = "Default permissions for the llamacpp plugin"
 permissions = [
    # Cleanup commands
    "allow-cleanup-llama-processes",
-    
+
    # LlamaCpp server commands
    "allow-load-llama-model",
-    "allow-unload-llama-model", 
+    "allow-unload-llama-model",
    "allow-get-devices",
    "allow-generate-api-key",
    "allow-is-process-running",
@ -15,7 +15,11 @@ permissions = [
    "allow-get-loaded-models",
    "allow-get-all-sessions",
    "allow-get-session-by-model",
-    
+
    # GGUF commands
-    "allow-read-gguf-metadata"
+    "allow-read-gguf-metadata",
+    "allow-estimate-kv-cache-size",
+    "allow-get-model-size",
+    "allow-is-model-supported",
+    "allow-plan-model-load"
 ]
--- a/src-tauri/plugins/tauri-plugin-llamacpp/permissions/schemas/schema.json
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/permissions/schemas/schema.json
@ -306,6 +306,18 @@
          "const": "deny-cleanup-llama-processes",
          "markdownDescription": "Denies the cleanup_llama_processes command without any pre-configured scope."
        },
+        {
+          "description": "Enables the estimate_kv_cache_size command without any pre-configured scope.",
+          "type": "string",
+          "const": "allow-estimate-kv-cache-size",
+          "markdownDescription": "Enables the estimate_kv_cache_size command without any pre-configured scope."
+        },
+        {
+          "description": "Denies the estimate_kv_cache_size command without any pre-configured scope.",
+          "type": "string",
+          "const": "deny-estimate-kv-cache-size",
+          "markdownDescription": "Denies the estimate_kv_cache_size command without any pre-configured scope."
+        },
        {
          "description": "Enables the find_session_by_model command without any pre-configured scope.",
          "type": "string",
@ -366,6 +378,18 @@
          "const": "deny-get-loaded-models",
          "markdownDescription": "Denies the get_loaded_models command without any pre-configured scope."
        },
+        {
+          "description": "Enables the get_model_size command without any pre-configured scope.",
+          "type": "string",
+          "const": "allow-get-model-size",
+          "markdownDescription": "Enables the get_model_size command without any pre-configured scope."
+        },
+        {
+          "description": "Denies the get_model_size command without any pre-configured scope.",
+          "type": "string",
+          "const": "deny-get-model-size",
+          "markdownDescription": "Denies the get_model_size command without any pre-configured scope."
+        },
        {
          "description": "Enables the get_random_port command without any pre-configured scope.",
          "type": "string",
@ -390,6 +414,18 @@
          "const": "deny-get-session-by-model",
          "markdownDescription": "Denies the get_session_by_model command without any pre-configured scope."
        },
+        {
+          "description": "Enables the is_model_supported command without any pre-configured scope.",
+          "type": "string",
+          "const": "allow-is-model-supported",
+          "markdownDescription": "Enables the is_model_supported command without any pre-configured scope."
+        },
+        {
+          "description": "Denies the is_model_supported command without any pre-configured scope.",
+          "type": "string",
+          "const": "deny-is-model-supported",
+          "markdownDescription": "Denies the is_model_supported command without any pre-configured scope."
+        },
        {
          "description": "Enables the is_process_running command without any pre-configured scope.",
          "type": "string",
@ -414,6 +450,18 @@
          "const": "deny-load-llama-model",
          "markdownDescription": "Denies the load_llama_model command without any pre-configured scope."
        },
+        {
+          "description": "Enables the plan_model_load command without any pre-configured scope.",
+          "type": "string",
+          "const": "allow-plan-model-load",
+          "markdownDescription": "Enables the plan_model_load command without any pre-configured scope."
+        },
+        {
+          "description": "Denies the plan_model_load command without any pre-configured scope.",
+          "type": "string",
+          "const": "deny-plan-model-load",
+          "markdownDescription": "Denies the plan_model_load command without any pre-configured scope."
+        },
        {
          "description": "Enables the read_gguf_metadata command without any pre-configured scope.",
          "type": "string",
@ -439,10 +487,10 @@
          "markdownDescription": "Denies the unload_llama_model command without any pre-configured scope."
        },
        {
-          "description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`",
+          "description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`",
          "type": "string",
          "const": "default",
-          "markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`"
+          "markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`"
        }
      ]
    }
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/commands.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/commands.rs
@ -1,58 +1,141 @@
-use super::helpers;
 use super::types::GgufMetadata;
-use reqwest;
-use std::fs::File;
-use std::io::BufReader;
-
+use super::utils::{estimate_kv_cache_internal, read_gguf_metadata_internal};
+use crate::gguf::types::{KVCacheError, KVCacheEstimate, ModelSupportStatus};
+use std::collections::HashMap;
+use std::fs;
+use tauri::Runtime;
+use tauri_plugin_hardware::get_system_info;
 /// Read GGUF metadata from a model file
 #[tauri::command]
 pub async fn read_gguf_metadata(path: String) -> Result<GgufMetadata, String> {
-    if path.starts_with("http://") || path.starts_with("https://") {
-        // Remote: read in 2MB chunks until successful
+    return read_gguf_metadata_internal(path).await;
+}
+
+#[tauri::command]
+pub async fn estimate_kv_cache_size(
+    meta: HashMap<String, String>,
+    ctx_size: Option<u64>,
+) -> Result<KVCacheEstimate, KVCacheError> {
+    estimate_kv_cache_internal(meta, ctx_size).await
+}
+
+#[tauri::command]
+pub async fn get_model_size(path: String) -> Result<u64, String> {
+    if path.starts_with("https://") {
+        // Handle remote URL
        let client = reqwest::Client::new();
-        let chunk_size = 2 * 1024 * 1024; // Fixed 2MB chunks
-        let max_total_size = 120 * 1024 * 1024; // Don't exceed 120MB total
-        let mut total_downloaded = 0;
-        let mut accumulated_data = Vec::new();
+        let response = client
+            .head(&path)
+            .send()
+            .await
+            .map_err(|e| format!("Failed to fetch HEAD request: {}", e))?;

-        while total_downloaded < max_total_size {
-            let start = total_downloaded;
-            let end = std::cmp::min(start + chunk_size - 1, max_total_size - 1);
-
-            let resp = client
-                .get(&path)
-                .header("Range", format!("bytes={}-{}", start, end))
-                .send()
-                .await
-                .map_err(|e| format!("Failed to fetch chunk {}-{}: {}", start, end, e))?;
-
-            let chunk_data = resp
-                .bytes()
-                .await
-                .map_err(|e| format!("Failed to read chunk response: {}", e))?;
-
-            accumulated_data.extend_from_slice(&chunk_data);
-            total_downloaded += chunk_data.len();
-
-            // Try parsing after each chunk
-            let cursor = std::io::Cursor::new(&accumulated_data);
-            if let Ok(metadata) = helpers::read_gguf_metadata(cursor) {
-                return Ok(metadata);
-            }
-
-            // If we got less data than expected, we've reached EOF
-            if chunk_data.len() < chunk_size {
-                break;
-            }
+        if let Some(content_length) = response.headers().get("content-length") {
+            let content_length_str = content_length
+                .to_str()
+                .map_err(|e| format!("Invalid content-length header: {}", e))?;
+            content_length_str
+                .parse::<u64>()
+                .map_err(|e| format!("Failed to parse content-length: {}", e))
+        } else {
+            Ok(0)
        }
-        Err("Could not parse GGUF metadata from downloaded data".to_string())
    } else {
-        // Local: use streaming file reader
-        let file =
-            File::open(&path).map_err(|e| format!("Failed to open local file {}: {}", path, e))?;
-        let reader = BufReader::new(file);
-
-        helpers::read_gguf_metadata(reader)
-            .map_err(|e| format!("Failed to parse GGUF metadata: {}", e))
+        // Handle local file using standard fs
+        let metadata =
+            fs::metadata(&path).map_err(|e| format!("Failed to get file metadata: {}", e))?;
+        Ok(metadata.len())
    }
 }
+
+#[tauri::command]
+pub async fn is_model_supported<R: Runtime>(
+    path: String,
+    ctx_size: Option<u32>,
+    app_handle: tauri::AppHandle<R>,
+) -> Result<ModelSupportStatus, String> {
+    // Get model size
+    let model_size = get_model_size(path.clone()).await?;
+
+    // Get system info
+    let system_info = get_system_info(app_handle.clone());
+
+    log::info!("modelSize: {}", model_size);
+
+    // Read GGUF metadata
+    let gguf = read_gguf_metadata(path.clone()).await?;
+
+    // Calculate KV cache size
+    let kv_cache_size = if let Some(ctx_size) = ctx_size {
+        log::info!("Using ctx_size: {}", ctx_size);
+        estimate_kv_cache_internal(gguf.metadata, Some(ctx_size as u64))
+            .await
+            .map_err(|e| e.to_string())?
+            .size
+    } else {
+        estimate_kv_cache_internal(gguf.metadata, None)
+            .await
+            .map_err(|e| e.to_string())?
+            .size
+    };
+
+    // Total memory consumption = model weights + kvcache
+    let total_required = model_size + kv_cache_size;
+    log::info!(
+        "isModelSupported: Total memory requirement: {} for {}; Got kvCacheSize: {} from BE",
+        total_required,
+        path,
+        kv_cache_size
+    );
+
+    const RESERVE_BYTES: u64 = 2288490189;
+    let total_system_memory = system_info.total_memory * 1024 * 1024;
+    // Calculate total VRAM from all GPUs
+    let total_vram: u64 = if system_info.gpus.is_empty() {
+        // On macOS with unified memory, GPU info may be empty
+        // Use total RAM as VRAM since memory is shared
+        log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
+        total_system_memory
+    } else {
+        system_info
+            .gpus
+            .iter()
+            .map(|g| g.total_memory * 1024 * 1024)
+            .sum::<u64>()
+    };
+
+    log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
+
+    let usable_vram = if total_vram > RESERVE_BYTES {
+        total_vram - RESERVE_BYTES
+    } else {
+        0
+    };
+
+    let usable_total_memory = if total_system_memory > RESERVE_BYTES {
+        (total_system_memory - RESERVE_BYTES) + usable_vram
+    } else {
+        0
+    };
+    log::info!("System RAM: {} bytes", &total_system_memory);
+    log::info!("Total VRAM: {} bytes", &total_vram);
+    log::info!("Usable total memory: {} bytes", &usable_total_memory);
+    log::info!("Usable VRAM: {} bytes", &usable_vram);
+    log::info!("Required: {} bytes", &total_required);
+
+    // Check if model fits in total memory at all (this is the hard limit)
+    if total_required > usable_total_memory {
+        return Ok(ModelSupportStatus::Red); // Truly impossible to run
+    }
+
+    // Check if everything fits in VRAM (ideal case)
+    if total_required <= usable_vram {
+        return Ok(ModelSupportStatus::Green);
+    }
+
+    // If we get here, it means:
+    // - Total requirement fits in combined memory
+    // - But doesn't fit entirely in VRAM
+    // This is the CPU-GPU hybrid scenario
+    Ok(ModelSupportStatus::Yellow)
+}
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/mod.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/mod.rs
@ -1,3 +1,5 @@
 pub mod commands;
 pub mod helpers;
 pub mod types;
+pub mod utils;
+pub mod model_planner;
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/model_planner.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/model_planner.rs
@ -0,0 +1,318 @@
+use crate::gguf::commands::get_model_size;
+use crate::gguf::utils::estimate_kv_cache_internal;
+use crate::gguf::utils::read_gguf_metadata_internal;
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use tauri::Runtime;
+use tauri_plugin_hardware::get_system_info;
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "camelCase")]
+pub struct ModelPlan {
+    pub gpu_layers: u64,
+    pub max_context_length: u64,
+    pub no_offload_kv_cache: bool,
+    pub offload_mmproj: bool,
+    pub batch_size: u64,
+    pub mode: ModelMode,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
+#[serde(rename_all = "UPPERCASE")]
+pub enum ModelMode {
+    GPU,
+    Hybrid,
+    CPU,
+    Unsupported,
+}
+
+#[tauri::command]
+pub async fn plan_model_load<R: Runtime>(
+    path: String,
+    memory_mode: String,
+    mmproj_path: Option<String>,
+    requested_ctx: Option<u64>,
+    app: tauri::AppHandle<R>,
+) -> Result<ModelPlan, String> {
+    let model_size = get_model_size(path.clone()).await?;
+    let sys_info = get_system_info(app.clone());
+    let gguf = read_gguf_metadata_internal(path.clone()).await?;
+
+    let mut mmproj_size: u64 = 0;
+    if let Some(ref mmproj) = mmproj_path {
+        mmproj_size = get_model_size(mmproj.clone()).await?;
+    }
+
+    let arch = gguf
+        .metadata
+        .get("general.architecture")
+        .ok_or("Missing architecture")?;
+    let repeating_layers: u64 = gguf
+        .metadata
+        .get(&format!("{arch}.block_count"))
+        .ok_or("Missing block_count")?
+        .parse()
+        .map_err(|_| "Invalid block_count")?;
+    let total_layers = repeating_layers + 1;
+    let layer_size = model_size / total_layers;
+
+    let kv_cache = estimate_kv_cache_internal(gguf.metadata.clone(), None)
+        .await
+        .map_err(|e| e.to_string())?;
+    let kv_cache_per_token = kv_cache.per_token_size;
+
+    if model_size == 0 || layer_size == 0 || kv_cache_per_token == 0 {
+        return Err("Invalid model/layer/cache sizes".into());
+    }
+
+    const RESERVE_BYTES: u64 = 2288490189;
+    const MIN_CONTEXT_LENGTH: u64 = 2048;
+
+    let model_max_ctx: u64 = gguf
+        .metadata
+        .get(&format!("{arch}.context_length"))
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(8192);
+
+    let memory_percentages = HashMap::from([("high", 0.7), ("medium", 0.5), ("low", 0.4)]);
+
+    let multiplier = *memory_percentages
+        .get(memory_mode.as_str())
+        .ok_or("Invalid memory mode")?;
+
+    log::info!("Got GPUs:\n{:?}", &sys_info.gpus);
+
+    let total_ram: u64 = sys_info.total_memory * 1024 * 1024;
+    log::info!(
+        "Total system memory reported from tauri_plugin_hardware(in bytes): {}",
+        &total_ram
+    );
+
+    let total_vram: u64 = if sys_info.gpus.is_empty() {
+        // On macOS with unified memory, GPU info may be empty
+        // Use total RAM as VRAM since memory is shared
+        log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
+        total_ram
+    } else {
+        sys_info
+            .gpus
+            .iter()
+            .map(|g| g.total_memory * 1024 * 1024)
+            .sum::<u64>()
+    };
+
+    log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
+    let usable_vram: u64 = if total_vram > RESERVE_BYTES {
+        (((total_vram - RESERVE_BYTES) as f64) * multiplier) as u64
+    } else {
+        0
+    };
+    log::info!("Usable vram calculated: {}", &usable_vram);
+
+    let usable_ram: u64 = if total_ram > RESERVE_BYTES {
+        (((total_ram - RESERVE_BYTES) as f64) * multiplier).max(0.0) as u64
+    } else {
+        0
+    };
+    log::info!("Usable ram calculated (in bytes): {}", &usable_ram);
+
+    let mut gpu_layers = 0;
+    let mut max_ctx_len = 0;
+    let mut no_offload_kv_cache = false;
+    let mut mode = ModelMode::Unsupported;
+    let mut offload_mmproj = false;
+    let mut batch_size = 2048;
+
+    let total_available_mem = usable_vram.saturating_add(usable_ram);
+    if model_size + mmproj_size > total_available_mem {
+        log::info!("Model not supported in this system!");
+        return Ok(ModelPlan {
+            gpu_layers: 0,
+            max_context_length: 0,
+            no_offload_kv_cache: true,
+            batch_size: 64,
+            mode: ModelMode::Unsupported,
+            offload_mmproj: false,
+        });
+    }
+    if mmproj_size > 0 {
+        offload_mmproj = true;
+    }
+
+    let kv_min_size = estimate_kv_cache_internal(gguf.metadata.clone(), Some(MIN_CONTEXT_LENGTH))
+        .await
+        .map_err(|e| e.to_string())?
+        .size;
+
+    if model_size + kv_min_size + mmproj_size <= usable_vram {
+        log::info!("Planning mode: Full GPU offload is possible.");
+        mode = ModelMode::GPU;
+        gpu_layers = total_layers;
+        let vram_left_for_ctx = usable_vram.saturating_sub(model_size);
+        let max_ctx_by_vram = (vram_left_for_ctx / kv_cache_per_token) as u64;
+        let requested_target = requested_ctx.unwrap_or(model_max_ctx).min(model_max_ctx);
+        max_ctx_len = requested_target.min(max_ctx_by_vram);
+        no_offload_kv_cache = false;
+        offload_mmproj = true;
+    } else {
+        let mut found_plan = false;
+
+        log::info!("Attempting VRAM-Maximized Hybrid plan (KV cache in VRAM only).");
+        for candidate_gpu_layers in (0..=total_layers).rev() {
+            let vram_used_by_layers = candidate_gpu_layers.saturating_mul(layer_size);
+            if vram_used_by_layers > usable_vram {
+                continue;
+            }
+
+            let ram_used_by_cpu_layers =
+                (total_layers.saturating_sub(candidate_gpu_layers)).saturating_mul(layer_size);
+            let ram_used_by_mmproj = if offload_mmproj { 0 } else { mmproj_size };
+            let required_ram_for_model = ram_used_by_cpu_layers.saturating_add(ram_used_by_mmproj);
+
+            if required_ram_for_model > usable_ram {
+                continue;
+            }
+
+            let vram_left_for_kv = usable_vram.saturating_sub(vram_used_by_layers);
+            let ctx_in_vram_only = (vram_left_for_kv / kv_cache_per_token) as u64;
+
+            if ctx_in_vram_only >= MIN_CONTEXT_LENGTH {
+                log::info!(
+                    "Found VRAM-Maximized Hybrid plan with {} GPU layers.",
+                    candidate_gpu_layers
+                );
+                mode = ModelMode::Hybrid;
+                gpu_layers = candidate_gpu_layers;
+                let requested_target = requested_ctx.unwrap_or(model_max_ctx).min(model_max_ctx);
+                max_ctx_len = requested_target.min(ctx_in_vram_only);
+                no_offload_kv_cache = false;
+                found_plan = true;
+                break;
+            }
+        }
+
+        if !found_plan {
+            log::info!("VRAM-Maximized plan not feasible. Falling back to Standard Hybrid (KV cache in VRAM+RAM).");
+            for candidate_gpu_layers in (0..=total_layers).rev() {
+                let vram_used_by_layers = candidate_gpu_layers.saturating_mul(layer_size);
+                if vram_used_by_layers > usable_vram {
+                    continue;
+                }
+                let vram_left_for_kv = usable_vram.saturating_sub(vram_used_by_layers);
+                let kv_in_vram = (vram_left_for_kv / kv_cache_per_token) as u64;
+
+                let ram_used_by_cpu_layers =
+                    (total_layers.saturating_sub(candidate_gpu_layers)).saturating_mul(layer_size);
+                let ram_used_by_mmproj = if offload_mmproj { 0 } else { mmproj_size };
+                let required_ram_for_model =
+                    ram_used_by_cpu_layers.saturating_add(ram_used_by_mmproj);
+
+                if required_ram_for_model > usable_ram {
+                    continue;
+                }
+
+                let available_ram_for_kv = usable_ram.saturating_sub(required_ram_for_model);
+                let kv_in_ram = (available_ram_for_kv / kv_cache_per_token) as u64;
+
+                let total_kv_tokens = kv_in_vram.saturating_add(kv_in_ram);
+
+                if total_kv_tokens >= MIN_CONTEXT_LENGTH {
+                    log::info!(
+                        "Found Standard Hybrid plan with {} GPU layers.",
+                        candidate_gpu_layers
+                    );
+                    mode = if candidate_gpu_layers > 0 {
+                        ModelMode::Hybrid
+                    } else {
+                        ModelMode::CPU
+                    };
+                    gpu_layers = candidate_gpu_layers;
+                    let requested_target =
+                        requested_ctx.unwrap_or(model_max_ctx).min(model_max_ctx);
+                    let max_possible_ctx = total_kv_tokens.min(model_max_ctx);
+                    max_ctx_len = requested_target.min(max_possible_ctx);
+                    no_offload_kv_cache = kv_in_ram > 0 && kv_in_vram == 0;
+                    found_plan = true;
+                    break;
+                }
+            }
+        }
+
+        if !found_plan {
+            log::info!("No hybrid plan found. Attempting CPU-only plan.");
+            if model_size + mmproj_size <= usable_ram {
+                let available_ram_for_kv = usable_ram.saturating_sub(model_size + mmproj_size);
+                let kv_tokens = (available_ram_for_kv / kv_cache_per_token) as u64;
+                if kv_tokens >= MIN_CONTEXT_LENGTH {
+                    mode = ModelMode::CPU;
+                    gpu_layers = 0;
+                    max_ctx_len = kv_tokens
+                        .min(requested_ctx.unwrap_or(model_max_ctx))
+                        .min(model_max_ctx);
+                    no_offload_kv_cache = true;
+                    offload_mmproj = false;
+                }
+            }
+        }
+    }
+
+    if let Some(req) = requested_ctx {
+        if req > 0 {
+            max_ctx_len = max_ctx_len.min(req);
+        }
+    }
+    max_ctx_len = max_ctx_len.min(model_max_ctx);
+
+    if max_ctx_len > 0 {
+        log::info!("Max context before power-of-2 adjustment: {}", max_ctx_len);
+        max_ctx_len = 1u64 << (63 - max_ctx_len.leading_zeros());
+        log::info!("Adjusted max context to power of 2: {}", max_ctx_len);
+    }
+
+    if mode == ModelMode::Unsupported {
+        if max_ctx_len >= MIN_CONTEXT_LENGTH {
+            // do nothing, plan is viable but wasn't assigned a mode
+        } else {
+            gpu_layers = 0;
+            max_ctx_len = 0;
+            offload_mmproj = false;
+        }
+    } else if max_ctx_len < MIN_CONTEXT_LENGTH {
+        log::info!(
+            "Final context length {} is less than minimum required {}. Marking as unsupported.",
+            max_ctx_len,
+            MIN_CONTEXT_LENGTH
+        );
+        mode = ModelMode::Unsupported;
+        gpu_layers = 0;
+        max_ctx_len = 0;
+        offload_mmproj = false;
+    }
+
+    if mode == ModelMode::Hybrid {
+        batch_size = 256;
+    } else if mode == ModelMode::CPU || no_offload_kv_cache || mode == ModelMode::Unsupported {
+        batch_size = 64;
+    }
+
+    if max_ctx_len > 0 {
+        batch_size = batch_size.min(max_ctx_len);
+    } else {
+        batch_size = 64;
+    }
+
+    if mode == ModelMode::CPU || no_offload_kv_cache {
+        offload_mmproj = false;
+    }
+
+    log::info!("Planned model load params: GPU Layers: {}, max_ctx_len: {}, kv_cache offload: {}, offload mmproj: {}, batch_size: {}",
+        gpu_layers, max_ctx_len, !no_offload_kv_cache, offload_mmproj, batch_size);
+    Ok(ModelPlan {
+        gpu_layers,
+        max_context_length: max_ctx_len,
+        no_offload_kv_cache,
+        offload_mmproj,
+        batch_size,
+        mode,
+    })
+}
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/types.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/types.rs
@ -1,4 +1,4 @@
-use serde::Serialize;
+use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::convert::TryFrom;
 use std::io;
@ -52,3 +52,42 @@ pub struct GgufMetadata {
    pub tensor_count: u64,
    pub metadata: HashMap<String, String>,
 }
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct KVCacheEstimate {
+    pub size: u64,
+    pub per_token_size: u64,
+}
+#[derive(Debug, thiserror::Error)]
+pub enum KVCacheError {
+    #[error("Invalid metadata: architecture not found")]
+    ArchitectureNotFound,
+    #[error("Invalid metadata: block_count not found or invalid")]
+    BlockCountInvalid,
+    #[error("Invalid metadata: head_count not found or invalid")]
+    HeadCountInvalid,
+    #[error("Invalid metadata: embedding_length not found or invalid")]
+    EmbeddingLengthInvalid,
+    #[error("Invalid metadata: context_length not found or invalid")]
+    ContextLengthInvalid,
+}
+
+impl serde::Serialize for KVCacheError {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.serialize_str(&self.to_string())
+    }
+}
+
+
+#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize)]
+pub enum ModelSupportStatus {
+    #[serde(rename = "RED")]
+    Red,
+    #[serde(rename = "YELLOW")]
+    Yellow,
+    #[serde(rename = "GREEN")]
+    Green,
+}
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/utils.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/utils.rs
@ -0,0 +1,164 @@
+use crate::gguf::helpers;
+use crate::gguf::types::{GgufMetadata, KVCacheError, KVCacheEstimate};
+use std::collections::HashMap;
+use std::fs::File;
+use std::io::BufReader;
+
+// read gguf metadata
+pub async fn read_gguf_metadata_internal(path: String) -> Result<GgufMetadata, String> {
+    if path.starts_with("http://") || path.starts_with("https://") {
+        // Remote: read in 2MB chunks until successful
+        let client = reqwest::Client::new();
+        let chunk_size = 2 * 1024 * 1024; // Fixed 2MB chunks
+        let max_total_size = 120 * 1024 * 1024; // Don't exceed 120MB total
+        let mut total_downloaded = 0;
+        let mut accumulated_data = Vec::new();
+
+        while total_downloaded < max_total_size {
+            let start = total_downloaded;
+            let end = std::cmp::min(start + chunk_size - 1, max_total_size - 1);
+
+            let resp = client
+                .get(&path)
+                .header("Range", format!("bytes={}-{}", start, end))
+                .send()
+                .await
+                .map_err(|e| format!("Failed to fetch chunk {}-{}: {}", start, end, e))?;
+
+            let chunk_data = resp
+                .bytes()
+                .await
+                .map_err(|e| format!("Failed to read chunk response: {}", e))?;
+
+            accumulated_data.extend_from_slice(&chunk_data);
+            total_downloaded += chunk_data.len();
+
+            // Try parsing after each chunk
+            let cursor = std::io::Cursor::new(&accumulated_data);
+            if let Ok(metadata) = helpers::read_gguf_metadata(cursor) {
+                return Ok(metadata);
+            }
+
+            // If we got less data than expected, we've reached EOF
+            if chunk_data.len() < chunk_size {
+                break;
+            }
+        }
+        Err("Could not parse GGUF metadata from downloaded data".to_string())
+    } else {
+        // Local: use streaming file reader
+        let file =
+            File::open(&path).map_err(|e| format!("Failed to open local file {}: {}", path, e))?;
+        let reader = BufReader::new(file);
+
+        helpers::read_gguf_metadata(reader)
+            .map_err(|e| format!("Failed to parse GGUF metadata: {}", e))
+    }
+}
+
+/// Estimate KVCache size from a given metadata
+pub async fn estimate_kv_cache_internal(
+    meta: HashMap<String, String>,
+    ctx_size: Option<u64>,
+) -> Result<KVCacheEstimate, KVCacheError> {
+    log::info!("Received ctx_size parameter: {:?}", ctx_size);
+    let arch = meta
+        .get("general.architecture")
+        .ok_or(KVCacheError::ArchitectureNotFound)?;
+
+    // Number of layers
+    let n_layer_key = format!("{}.block_count", arch);
+    let n_layer = meta
+        .get(&n_layer_key)
+        .and_then(|s| s.parse::<u64>().ok())
+        .filter(|&n| n > 0)
+        .ok_or(KVCacheError::BlockCountInvalid)?;
+
+    // Attention heads (use kv heads if present, else full heads)
+    let n_head_key = format!("{}.attention.head_count", arch);
+    let n_head_kv_key = format!("{}.attention.head_count_kv", arch);
+    let n_head = meta
+        .get(&n_head_kv_key)
+        .and_then(|s| s.parse::<u64>().ok())
+        .filter(|&n| n > 0)
+        .unwrap_or_else(|| {
+            meta.get(&n_head_key)
+                .and_then(|s| s.parse::<u64>().ok())
+                .unwrap_or(0)
+        });
+    if n_head == 0 {
+        return Err(KVCacheError::HeadCountInvalid);
+    }
+
+    // Key/value dimensions
+    let key_len_key = format!("{}.attention.key_length", arch);
+    let val_len_key = format!("{}.attention.value_length", arch);
+
+    let key_len = meta
+        .get(&key_len_key)
+        .and_then(|s| s.parse::<u64>().ok())
+        .unwrap_or(0);
+    let val_len = meta
+        .get(&val_len_key)
+        .and_then(|s| s.parse::<u64>().ok())
+        .unwrap_or(0);
+
+    if key_len == 0 || val_len == 0 {
+        return Err(KVCacheError::EmbeddingLengthInvalid);
+    }
+
+    // Context length
+    let max_ctx_key = format!("{}.context_length", arch);
+    let max_ctx = meta
+        .get(&max_ctx_key)
+        .and_then(|s| s.parse::<u64>().ok())
+        .filter(|&n| n > 0)
+        .ok_or(KVCacheError::ContextLengthInvalid)?;
+    let ctx_len = ctx_size.map(|size| size.min(max_ctx)).unwrap_or(max_ctx);
+
+    // Sliding window if present
+    let sliding_key = format!("{}.attention.sliding_window", arch);
+    let sliding_window = meta
+        .get(&sliding_key)
+        .and_then(|s| s.parse::<u64>().ok())
+        .filter(|&n| n > 0);
+
+    // Assume fp16
+    const BYTES_PER_ELEMENT: u64 = 2;
+
+    // Per-token KV size
+    let kv_per_token = n_layer * n_head * (key_len + val_len) * BYTES_PER_ELEMENT;
+
+    // Pure full-attention cost
+    let full_cost = ctx_len * kv_per_token;
+
+    // Pure sliding-window cost (tiny, only keeps last W tokens)
+    let sliding_cost = sliding_window.map(|w| w * kv_per_token);
+
+    // Middle estimate: average of sliding + full if sliding_window is present
+    let chosen_size = if let Some(slide) = sliding_cost {
+        let middle = (full_cost + slide) / 2;
+        log::info!(
+            "KV estimates -> sliding: {} bytes (~{:.2} MB), full: {} bytes (~{:.2} MB), middle: {} bytes (~{:.2} MB)",
+            slide,
+            slide as f64 / (1024.0 * 1024.0),
+            full_cost,
+            full_cost as f64 / (1024.0 * 1024.0),
+            middle,
+            middle as f64 / (1024.0 * 1024.0)
+        );
+        middle
+    } else {
+        log::info!(
+            "KV estimate (no SWA detected) -> full: {} bytes (~{:.2} MB)",
+            full_cost,
+            full_cost as f64 / (1024.0 * 1024.0)
+        );
+        full_cost
+    };
+
+    Ok(KVCacheEstimate {
+        size: chosen_size,
+        per_token_size: kv_per_token,
+    })
+}
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/lib.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/lib.rs
@ -33,6 +33,10 @@ pub fn init<R: Runtime>() -> TauriPlugin<R> {
            commands::get_session_by_model,
            // GGUF commands
            gguf::commands::read_gguf_metadata,
+            gguf::commands::estimate_kv_cache_size,
+            gguf::commands::get_model_size,
+            gguf::commands::is_model_supported,
+            gguf::model_planner::plan_model_load
        ])
        .setup(|app, _api| {
            // Initialize and manage the plugin state
--- a/src-tauri/src/core/filesystem/commands.rs
+++ b/src-tauri/src/core/filesystem/commands.rs
@ -193,7 +193,7 @@ pub fn decompress<R: Runtime>(app: tauri::AppHandle<R>, path: &str, output_dir:
            fs::File::open(&path_buf).map_err(|e| e.to_string())?
        }
    };
-    
+
    #[cfg(not(windows))]
    let file = fs::File::open(&path_buf).map_err(|e| e.to_string())?;
    if path.ends_with(".tar.gz") {
@ -222,7 +222,10 @@ pub fn decompress<R: Runtime>(app: tauri::AppHandle<R>, path: &str, output_dir:
                {
                    use std::os::unix::fs::PermissionsExt;
                    if let Some(mode) = entry.unix_mode() {
-                        let _ = std::fs::set_permissions(&outpath, std::fs::Permissions::from_mode(mode));
+                        let _ = std::fs::set_permissions(
+                            &outpath,
+                            std::fs::Permissions::from_mode(mode),
+                        );
                    }
                }
            }
--- a/src-tauri/windows/hooks.nsh
+++ b/src-tauri/windows/hooks.nsh
@ -42,6 +42,11 @@
  ${If} ${FileExists} "$INSTDIR\resources\LICENSE"
    CopyFiles /SILENT "$INSTDIR\resources\LICENSE" "$INSTDIR\LICENSE"
    DetailPrint "Copied LICENSE to install root"
+
+    ; Optional cleanup - remove from resources folder
+    Delete "$INSTDIR\resources\LICENSE"
+  ${Else}
+    DetailPrint "LICENSE not found at expected location: $INSTDIR\resources\LICENSE"
  ${EndIf}

  ; ---- Copy vulkan-1.dll to install root ----
@ -51,6 +56,7 @@
    
    ; Optional cleanup - remove from resources folder
    Delete "$INSTDIR\resources\lib\vulkan-1.dll"
+
    ; Only remove the lib directory if it's empty after removing both files
    RMDir "$INSTDIR\resources\lib"
  ${Else}
--- a/web-app/src/components/ui/tests/dialog.test.tsx
+++ b/web-app/src/components/ui/tests/dialog.test.tsx
@ -2,6 +2,7 @@ import { render, screen, fireEvent } from '@testing-library/react'
 import { describe, it, expect, vi } from 'vitest'
 import userEvent from '@testing-library/user-event'
 import React from 'react'
+import '@testing-library/jest-dom'
 import {
  Dialog,
  DialogTrigger,
@ -117,7 +118,7 @@ describe('Dialog Components', () => {

  it('applies proper classes to dialog content', async () => {
    const user = userEvent.setup()
-    
+
    render(
      <Dialog>
        <DialogTrigger>Open Dialog</DialogTrigger>
@ -128,27 +129,38 @@ describe('Dialog Components', () => {
        </DialogContent>
      </Dialog>
    )
-    
+
    await user.click(screen.getByText('Open Dialog'))
-    
+
    const dialogContent = screen.getByRole('dialog')
    expect(dialogContent).toHaveClass(
      'bg-main-view',
+      'max-h-[calc(100%-80px)]',
+      'overflow-auto',
+      'border-main-view-fg/10',
+      'text-main-view-fg',
      'fixed',
      'top-[50%]',
      'left-[50%]',
-      'z-50',
+      'z-[90]',
+      'grid',
+      'w-full',
+      'max-w-[calc(100%-2rem)]',
      'translate-x-[-50%]',
      'translate-y-[-50%]',
-      'border',
+      'gap-4',
      'rounded-lg',
-      'shadow-lg'
+      'border',
+      'p-6',
+      'shadow-lg',
+      'duration-200',
+      'sm:max-w-lg'
    )
  })

  it('applies proper classes to dialog header', async () => {
    const user = userEvent.setup()
-    
+
    render(
      <Dialog>
        <DialogTrigger>Open Dialog</DialogTrigger>
@ -159,11 +171,11 @@ describe('Dialog Components', () => {
        </DialogContent>
      </Dialog>
    )
-    
+
    await user.click(screen.getByText('Open Dialog'))
-    
+
    const dialogHeader = screen.getByText('Dialog Title').closest('div')
-    expect(dialogHeader).toHaveClass('flex', 'flex-col', 'gap-2', 'text-center')
+    expect(dialogHeader).toHaveClass('flex', 'flex-col', 'gap-2', 'text-center', 'sm:text-left')
  })

  it('applies proper classes to dialog title', async () => {
@ -299,7 +311,7 @@ describe('Dialog Components', () => {
  it('supports onOpenChange callback', async () => {
    const onOpenChange = vi.fn()
    const user = userEvent.setup()
-    
+
    render(
      <Dialog onOpenChange={onOpenChange}>
        <DialogTrigger>Open Dialog</DialogTrigger>
@ -310,9 +322,98 @@ describe('Dialog Components', () => {
        </DialogContent>
      </Dialog>
    )
-    
+
    await user.click(screen.getByText('Open Dialog'))
-    
+
    expect(onOpenChange).toHaveBeenCalledWith(true)
  })
+
+  it('can hide close button when showCloseButton is false', async () => {
+    const user = userEvent.setup()
+
+    render(
+      <Dialog>
+        <DialogTrigger>Open Dialog</DialogTrigger>
+        <DialogContent showCloseButton={false}>
+          <DialogHeader>
+            <DialogTitle>Dialog Title</DialogTitle>
+          </DialogHeader>
+        </DialogContent>
+      </Dialog>
+    )
+
+    await user.click(screen.getByText('Open Dialog'))
+
+    expect(screen.queryByRole('button', { name: /close/i })).not.toBeInTheDocument()
+  })
+
+  it('shows close button by default', async () => {
+    const user = userEvent.setup()
+
+    render(
+      <Dialog>
+        <DialogTrigger>Open Dialog</DialogTrigger>
+        <DialogContent>
+          <DialogHeader>
+            <DialogTitle>Dialog Title</DialogTitle>
+          </DialogHeader>
+        </DialogContent>
+      </Dialog>
+    )
+
+    await user.click(screen.getByText('Open Dialog'))
+
+    expect(screen.getByRole('button', { name: /close/i })).toBeInTheDocument()
+  })
+
+  it('accepts aria-describedby prop', async () => {
+    const user = userEvent.setup()
+
+    render(
+      <Dialog>
+        <DialogTrigger>Open Dialog</DialogTrigger>
+        <DialogContent aria-describedby="custom-description">
+          <DialogHeader>
+            <DialogTitle>Dialog Title</DialogTitle>
+          </DialogHeader>
+          <p id="custom-description">Custom description text</p>
+        </DialogContent>
+      </Dialog>
+    )
+
+    await user.click(screen.getByText('Open Dialog'))
+
+    const dialogContent = screen.getByRole('dialog')
+    expect(dialogContent).toHaveAttribute('aria-describedby', 'custom-description')
+  })
+
+  it('applies data-slot attributes to components', async () => {
+    const user = userEvent.setup()
+
+    render(
+      <Dialog>
+        <DialogTrigger>Open Dialog</DialogTrigger>
+        <DialogContent>
+          <DialogHeader>
+            <DialogTitle>Dialog Title</DialogTitle>
+            <DialogDescription>Dialog description</DialogDescription>
+          </DialogHeader>
+          <div>Dialog body content</div>
+          <DialogFooter>
+            <button>Footer button</button>
+          </DialogFooter>
+        </DialogContent>
+      </Dialog>
+    )
+
+    expect(screen.getByText('Open Dialog')).toHaveAttribute('data-slot', 'dialog-trigger')
+
+    await user.click(screen.getByText('Open Dialog'))
+
+    expect(screen.getByRole('dialog')).toHaveAttribute('data-slot', 'dialog-content')
+    expect(screen.getByText('Dialog Title').closest('div')).toHaveAttribute('data-slot', 'dialog-header')
+    expect(screen.getByText('Dialog Title')).toHaveAttribute('data-slot', 'dialog-title')
+    expect(screen.getByText('Dialog description')).toHaveAttribute('data-slot', 'dialog-description')
+    expect(screen.getByText('Footer button').closest('div')).toHaveAttribute('data-slot', 'dialog-footer')
+  })
 })
--- a/web-app/src/components/ui/dialog.tsx
+++ b/web-app/src/components/ui/dialog.tsx
@ -37,7 +37,7 @@ function DialogOverlay({
    <DialogPrimitive.Overlay
      data-slot="dialog-overlay"
      className={cn(
-        'data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-50 bg-main-view/80 backdrop-blur-sm',
+        'data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-[80] bg-main-view/80 backdrop-blur-sm',
        className
      )}
      {...props}
@ -67,7 +67,7 @@ function DialogContent({
        data-slot="dialog-content"
        aria-describedby={ariaDescribedBy}
        className={cn(
-          'bg-main-view max-h-[calc(100%-80px)] overflow-auto border-main-view-fg/10 text-main-view-fg data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-50 grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg',
+          'bg-main-view max-h-[calc(100%-80px)] overflow-auto border-main-view-fg/10 text-main-view-fg data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-[90] grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg',
          className
        )}
        {...props}
--- a/web-app/src/constants/localStorage.ts
+++ b/web-app/src/constants/localStorage.ts
@ -21,4 +21,5 @@ export const localStorageKey = {
  lastUsedAssistant: 'last-used-assistant',
  favoriteModels: 'favorite-models',
  setupCompleted: 'setup-completed',
+  threadManagement: 'thread-management',
 }
--- a/web-app/src/constants/routes.ts
+++ b/web-app/src/constants/routes.ts
@ -3,6 +3,8 @@ export const route = {
  home: '/',
  appLogs: '/logs',
  assistant: '/assistant',
+  project: '/project',
+  projectDetail: '/project/$projectId',
  settings: {
    index: '/settings',
    model_providers: '/settings/providers',
--- a/web-app/src/containers/ChatInput.tsx
+++ b/web-app/src/containers/ChatInput.tsx
@ -4,6 +4,7 @@ import TextareaAutosize from 'react-textarea-autosize'
 import { cn } from '@/lib/utils'
 import { usePrompt } from '@/hooks/usePrompt'
 import { useThreads } from '@/hooks/useThreads'
+import { useThreadManagement } from '@/hooks/useThreadManagement'
 import { useCallback, useEffect, useRef, useState } from 'react'
 import { Button } from '@/components/ui/button'
 import {
@ -43,9 +44,15 @@ type ChatInputProps = {
  showSpeedToken?: boolean
  model?: ThreadModel
  initialMessage?: boolean
+  projectId?: string
 }

-const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
+const ChatInput = ({
+  model,
+  className,
+  initialMessage,
+  projectId,
+}: ChatInputProps) => {
  const textareaRef = useRef<HTMLTextAreaElement>(null)
  const [isFocused, setIsFocused] = useState(false)
  const [rows, setRows] = useState(1)
@ -58,6 +65,8 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
  const prompt = usePrompt((state) => state.prompt)
  const setPrompt = usePrompt((state) => state.setPrompt)
  const currentThreadId = useThreads((state) => state.currentThreadId)
+  const updateThread = useThreads((state) => state.updateThread)
+  const { getFolderById } = useThreadManagement()
  const { t } = useTranslation()
  const spellCheckChatInput = useGeneralSetting(
    (state) => state.spellCheckChatInput
@ -177,6 +186,28 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
      uploadedFiles.length > 0 ? uploadedFiles : undefined
    )
    setUploadedFiles([])
+
+    // Handle project assignment for new threads
+    if (projectId && !currentThreadId) {
+      const project = getFolderById(projectId)
+      if (project) {
+        // Use setTimeout to ensure the thread is created first
+        setTimeout(() => {
+          const newCurrentThreadId = useThreads.getState().currentThreadId
+          if (newCurrentThreadId) {
+            updateThread(newCurrentThreadId, {
+              metadata: {
+                project: {
+                  id: project.id,
+                  name: project.name,
+                  updated_at: project.updated_at,
+                },
+              },
+            })
+          }
+        }, 100)
+      }
+    }
  }

  useEffect(() => {
--- a/web-app/src/containers/DownloadButton.tsx
+++ b/web-app/src/containers/DownloadButton.tsx
@ -0,0 +1,142 @@
+import { Button } from '@/components/ui/button'
+import { Progress } from '@/components/ui/progress'
+import { useDownloadStore } from '@/hooks/useDownloadStore'
+import { useGeneralSetting } from '@/hooks/useGeneralSetting'
+import { useModelProvider } from '@/hooks/useModelProvider'
+import { useServiceHub } from '@/hooks/useServiceHub'
+import { useTranslation } from '@/i18n'
+import { extractModelName } from '@/lib/models'
+import { cn, sanitizeModelId } from '@/lib/utils'
+import { CatalogModel } from '@/services/models/types'
+import { useCallback, useMemo } from 'react'
+import { useShallow } from 'zustand/shallow'
+
+type ModelProps = {
+  model: CatalogModel
+  handleUseModel: (modelId: string) => void
+}
+const defaultModelQuantizations = ['iq4_xs', 'q4_k_m']
+
+export function DownloadButtonPlaceholder({
+  model,
+  handleUseModel,
+}: ModelProps) {
+  const { downloads, localDownloadingModels, addLocalDownloadingModel } =
+    useDownloadStore(
+      useShallow((state) => ({
+        downloads: state.downloads,
+        localDownloadingModels: state.localDownloadingModels,
+        addLocalDownloadingModel: state.addLocalDownloadingModel,
+      }))
+    )
+  const { t } = useTranslation()
+  const getProviderByName = useModelProvider((state) => state.getProviderByName)
+  const llamaProvider = getProviderByName('llamacpp')
+
+  const serviceHub = useServiceHub()
+  const huggingfaceToken = useGeneralSetting((state) => state.huggingfaceToken)
+
+  const quant =
+    model.quants.find((e) =>
+      defaultModelQuantizations.some((m) =>
+        e.model_id.toLowerCase().includes(m)
+      )
+    ) ?? model.quants[0]
+
+  const modelId = quant?.model_id || model.model_name
+
+  const downloadProcesses = useMemo(
+    () =>
+      Object.values(downloads).map((download) => ({
+        id: download.name,
+        name: download.name,
+        progress: download.progress,
+        current: download.current,
+        total: download.total,
+      })),
+    [downloads]
+  )
+
+  const isRecommendedModel = useCallback((modelId: string) => {
+    return (extractModelName(modelId)?.toLowerCase() ===
+      'jan-nano-gguf') as boolean
+  }, [])
+
+  if (model.quants.length === 0) {
+    return (
+      <div className="flex items-center gap-2">
+        <Button
+          size="sm"
+          onClick={() => {
+            window.open(`https://huggingface.co/${model.model_name}`, '_blank')
+          }}
+        >
+          View on HuggingFace
+        </Button>
+      </div>
+    )
+  }
+
+  const modelUrl = quant?.path || modelId
+  const isDownloading =
+    localDownloadingModels.has(modelId) ||
+    downloadProcesses.some((e) => e.id === modelId)
+
+  const downloadProgress =
+    downloadProcesses.find((e) => e.id === modelId)?.progress || 0
+  const isDownloaded = llamaProvider?.models.some(
+    (m: { id: string }) =>
+      m.id === modelId ||
+      m.id === `${model.developer}/${sanitizeModelId(modelId)}`
+  )
+  const isRecommended = isRecommendedModel(model.model_name)
+
+  const handleDownload = () => {
+    // Immediately set local downloading state
+    addLocalDownloadingModel(modelId)
+    const mmprojPath = (
+      model.mmproj_models?.find(
+        (e) => e.model_id.toLowerCase() === 'mmproj-f16'
+      ) || model.mmproj_models?.[0]
+    )?.path
+    serviceHub
+      .models()
+      .pullModelWithMetadata(modelId, modelUrl, mmprojPath, huggingfaceToken)
+  }
+
+  return (
+    <div
+      className={cn(
+        'flex items-center',
+        isRecommended && 'hub-download-button-step'
+      )}
+    >
+      {isDownloading && !isDownloaded && (
+        <div className={cn('flex items-center gap-2 w-20')}>
+          <Progress value={downloadProgress * 100} />
+          <span className="text-xs text-center text-main-view-fg/70">
+            {Math.round(downloadProgress * 100)}%
+          </span>
+        </div>
+      )}
+      {isDownloaded ? (
+        <Button
+          size="sm"
+          onClick={() => handleUseModel(modelId)}
+          data-test-id={`hub-model-${modelId}`}
+        >
+          {t('hub:use')}
+        </Button>
+      ) : (
+        <Button
+          data-test-id={`hub-model-${modelId}`}
+          size="sm"
+          onClick={handleDownload}
+          className={cn(isDownloading && 'hidden')}
+        >
+          {t('hub:download')}
+        </Button>
+      )}
+    </div>
+  )
+}
--- a/web-app/src/containers/DownloadManegement.tsx
+++ b/web-app/src/containers/DownloadManegement.tsx
@ -400,23 +400,33 @@ export function DownloadManagement() {
                          className="text-main-view-fg/70 cursor-pointer"
                          title="Cancel download"
                          onClick={() => {
-                            serviceHub
-                              .models()
-                              .abortDownload(download.name)
-                              .then(() => {
-                                toast.info(
-                                  t('common:toast.downloadCancelled.title'),
-                                  {
-                                    id: 'cancel-download',
-                                    description: t(
-                                      'common:toast.downloadCancelled.description'
-                                    ),
-                                  }
+                            // TODO: Consolidate cancellation logic
+                            if (download.id.startsWith('llamacpp')) {
+                              const downloadManager =
+                                window.core.extensionManager.getByName(
+                                  '@janhq/download-extension'
                                )
-                                if (downloadProcesses.length === 0) {
-                                  setIsPopoverOpen(false)
-                                }
-                              })
+
+                              downloadManager.cancelDownload(download.id)
+                            } else {
+                              serviceHub
+                                .models()
+                                .abortDownload(download.name)
+                                .then(() => {
+                                  toast.info(
+                                    t('common:toast.downloadCancelled.title'),
+                                    {
+                                      id: 'cancel-download',
+                                      description: t(
+                                        'common:toast.downloadCancelled.description'
+                                      ),
+                                    }
+                                  )
+                                  if (downloadProcesses.length === 0) {
+                                    setIsPopoverOpen(false)
+                                  }
+                                })
+                            }
                          }}
                        />
                      </div>
--- a/web-app/src/containers/LeftPanel.tsx
+++ b/web-app/src/containers/LeftPanel.tsx
@ -4,14 +4,18 @@ import { cn } from '@/lib/utils'
 import {
  IconLayoutSidebar,
  IconDots,
-  IconCirclePlusFilled,
-  IconSettingsFilled,
+  IconCirclePlus,
+  IconSettings,
  IconStar,
-  IconMessageFilled,
-  IconAppsFilled,
+  IconFolderPlus,
+  IconMessage,
+  IconApps,
  IconX,
  IconSearch,
-  IconClipboardSmileFilled,
+  IconClipboardSmile,
+  IconFolder,
+  IconPencil,
+  IconTrash,
 } from '@tabler/icons-react'
 import { route } from '@/constants/routes'
 import ThreadList from './ThreadList'
@ -28,6 +32,7 @@ import { UserProfileMenu } from '@/containers/auth/UserProfileMenu'
 import { useAuth } from '@/hooks/useAuth'

 import { useThreads } from '@/hooks/useThreads'
+import { useThreadManagement } from '@/hooks/useThreadManagement'

 import { useTranslation } from '@/i18n/react-i18next-compat'
 import { useMemo, useState, useEffect, useRef } from 'react'
@ -37,38 +42,40 @@ import { useSmallScreen } from '@/hooks/useMediaQuery'
 import { useClickOutside } from '@/hooks/useClickOutside'

 import { DeleteAllThreadsDialog } from '@/containers/dialogs'
+import AddProjectDialog from '@/containers/dialogs/AddProjectDialog'
+import { DeleteProjectDialog } from '@/containers/dialogs/DeleteProjectDialog'

 const mainMenus = [
  {
    title: 'common:newChat',
-    icon: IconCirclePlusFilled,
+    icon: IconCirclePlus,
    route: route.home,
    isEnabled: true,
  },
+  {
+    title: 'common:projects.title',
+    icon: IconFolderPlus,
+    route: route.project,
+    isEnabled: true,
+  },
  {
    title: 'common:assistants',
-    icon: IconClipboardSmileFilled,
+    icon: IconClipboardSmile,
    route: route.assistant,
    isEnabled: PlatformFeatures[PlatformFeature.ASSISTANTS],
  },
  {
    title: 'common:hub',
-    icon: IconAppsFilled,
+    icon: IconApps,
    route: route.hub.index,
    isEnabled: PlatformFeatures[PlatformFeature.MODEL_HUB],
  },
  {
    title: 'common:settings',
-    icon: IconSettingsFilled,
+    icon: IconSettings,
    route: route.settings.general,
    isEnabled: true,
  },
-  {
-    title: 'common:authentication',
-    icon: null,
-    route: null,
-    isEnabled: PlatformFeatures[PlatformFeature.AUTHENTICATION],
-  },
 ]

 const LeftPanel = () => {
@ -153,20 +160,65 @@ const LeftPanel = () => {
  const getFilteredThreads = useThreads((state) => state.getFilteredThreads)
  const threads = useThreads((state) => state.threads)

+  const { folders, addFolder, updateFolder, deleteFolder, getFolderById } =
+    useThreadManagement()
+
+  // Project dialog states
+  const [projectDialogOpen, setProjectDialogOpen] = useState(false)
+  const [editingProjectKey, setEditingProjectKey] = useState<string | null>(
+    null
+  )
+  const [deleteProjectConfirmOpen, setDeleteProjectConfirmOpen] =
+    useState(false)
+  const [deletingProjectId, setDeletingProjectId] = useState<string | null>(
+    null
+  )
+
  const filteredThreads = useMemo(() => {
    return getFilteredThreads(searchTerm)
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [getFilteredThreads, searchTerm, threads])

+  const filteredProjects = useMemo(() => {
+    if (!searchTerm) return folders
+    return folders.filter((folder) =>
+      folder.name.toLowerCase().includes(searchTerm.toLowerCase())
+    )
+  }, [folders, searchTerm])
+
  // Memoize categorized threads based on filteredThreads
  const favoritedThreads = useMemo(() => {
    return filteredThreads.filter((t) => t.isFavorite)
  }, [filteredThreads])

  const unFavoritedThreads = useMemo(() => {
-    return filteredThreads.filter((t) => !t.isFavorite)
+    return filteredThreads.filter((t) => !t.isFavorite && !t.metadata?.project)
  }, [filteredThreads])

+  // Project handlers
+  const handleProjectDelete = (id: string) => {
+    setDeletingProjectId(id)
+    setDeleteProjectConfirmOpen(true)
+  }
+
+  const confirmProjectDelete = () => {
+    if (deletingProjectId) {
+      deleteFolder(deletingProjectId)
+      setDeleteProjectConfirmOpen(false)
+      setDeletingProjectId(null)
+    }
+  }
+
+  const handleProjectSave = (name: string) => {
+    if (editingProjectKey) {
+      updateFolder(editingProjectKey, name)
+    } else {
+      addFolder(name)
+    }
+    setProjectDialogOpen(false)
+    setEditingProjectKey(null)
+  }
+
  // Disable body scroll when panel is open on small screens
  useEffect(() => {
    if (isSmallScreen && open) {
@ -261,15 +313,12 @@ const LeftPanel = () => {
          )}
        </div>

-        <div className="flex flex-col justify-between overflow-hidden mt-0 !h-[calc(100%-42px)] ">
-          <div className={cn('flex flex-col !h-[calc(100%-200px)]')}>
+        <div className="flex flex-col gap-y-1 overflow-hidden mt-0 !h-[calc(100%-42px)]">
+          <div className="space-y-1 py-1">
            {IS_MACOS && (
              <div
                ref={searchContainerMacRef}
-                className={cn(
-                  'relative mb-4 mt-1',
-                  isResizableContext ? 'mx-2' : 'mx-1'
-                )}
+                className={cn('relative mb-2 mt-1 mx-1')}
                data-ignore-outside-clicks
              >
                <IconSearch className="absolute size-4 top-1/2 left-2 -translate-y-1/2 text-left-panel-fg/50" />
@ -295,7 +344,151 @@ const LeftPanel = () => {
                )}
              </div>
            )}
-            <div className="flex flex-col w-full overflow-y-auto overflow-x-hidden">
+
+            {mainMenus.map((menu) => {
+              if (!menu.isEnabled) {
+                return null
+              }
+
+              // Handle authentication menu specially
+              if (menu.title === 'common:authentication') {
+                return (
+                  <div key={menu.title}>
+                    <div className="mx-1 my-2 border-t border-left-panel-fg/5" />
+                    {isAuthenticated ? (
+                      <UserProfileMenu />
+                    ) : (
+                      <AuthLoginButton />
+                    )}
+                  </div>
+                )
+              }
+
+              // Regular menu items must have route and icon
+              if (!menu.route || !menu.icon) return null
+
+              const isActive = (() => {
+                // Settings routes
+                if (menu.route.includes(route.settings.index)) {
+                  return currentPath.includes(route.settings.index)
+                }
+
+                // Default exact match for other routes
+                return currentPath === menu.route
+              })()
+              return (
+                <Link
+                  key={menu.title}
+                  to={menu.route}
+                  onClick={() => isSmallScreen && setLeftPanel(false)}
+                  data-test-id={`menu-${menu.title}`}
+                  activeOptions={{ exact: true }}
+                  className={cn(
+                    'flex items-center gap-1.5 cursor-pointer hover:bg-left-panel-fg/10 py-1 px-1 rounded',
+                    isActive && 'bg-left-panel-fg/10'
+                  )}
+                >
+                  <menu.icon size={18} className="text-left-panel-fg/70" />
+                  <span className="font-medium text-left-panel-fg/90">
+                    {t(menu.title)}
+                  </span>
+                </Link>
+              )
+            })}
+          </div>
+
+          {filteredProjects.length > 0 && (
+            <div className="space-y-1 py-1">
+              <div className="flex items-center justify-between mb-2">
+                <span className="block text-xs text-left-panel-fg/50 px-1 font-semibold">
+                  {t('common:projects.title')}
+                </span>
+              </div>
+              <div className="flex flex-col max-h-[140px] overflow-y-scroll">
+                {filteredProjects
+                  .slice()
+                  .sort((a, b) => b.updated_at - a.updated_at)
+                  .map((folder) => {
+                    const ProjectItem = () => {
+                      const [openDropdown, setOpenDropdown] = useState(false)
+                      const isProjectActive =
+                        currentPath === `/project/${folder.id}`
+
+                      return (
+                        <div key={folder.id} className="mb-1">
+                          <div
+                            className={cn(
+                              'rounded hover:bg-left-panel-fg/10 flex items-center justify-between gap-2 px-1.5 group/project-list transition-all cursor-pointer',
+                              isProjectActive && 'bg-left-panel-fg/10'
+                            )}
+                          >
+                            <Link
+                              to="/project/$projectId"
+                              params={{ projectId: folder.id }}
+                              onClick={() =>
+                                isSmallScreen && setLeftPanel(false)
+                              }
+                              className="py-1 pr-2 truncate flex items-center gap-2 flex-1"
+                            >
+                              <IconFolder
+                                size={16}
+                                className="text-left-panel-fg/70"
+                              />
+                              <span className="text-sm text-left-panel-fg/90">
+                                {folder.name}
+                              </span>
+                            </Link>
+                            <div className="flex items-center">
+                              <DropdownMenu
+                                open={openDropdown}
+                                onOpenChange={(open) => setOpenDropdown(open)}
+                              >
+                                <DropdownMenuTrigger asChild>
+                                  <IconDots
+                                    size={14}
+                                    className="text-left-panel-fg/60 shrink-0 cursor-pointer px-0.5 -mr-1 data-[state=open]:bg-left-panel-fg/10 rounded group-hover/project-list:data-[state=closed]:size-5 size-5 data-[state=closed]:size-0"
+                                    onClick={(e) => {
+                                      e.preventDefault()
+                                      e.stopPropagation()
+                                    }}
+                                  />
+                                </DropdownMenuTrigger>
+                                <DropdownMenuContent side="bottom" align="end">
+                                  <DropdownMenuItem
+                                    onClick={(e) => {
+                                      e.stopPropagation()
+                                      setEditingProjectKey(folder.id)
+                                      setProjectDialogOpen(true)
+                                    }}
+                                  >
+                                    <IconPencil size={16} />
+                                    <span>Edit</span>
+                                  </DropdownMenuItem>
+                                  <DropdownMenuItem
+                                    onClick={(e) => {
+                                      e.stopPropagation()
+                                      handleProjectDelete(folder.id)
+                                    }}
+                                  >
+                                    <IconTrash size={16} />
+                                    <span>Delete</span>
+                                  </DropdownMenuItem>
+                                </DropdownMenuContent>
+                              </DropdownMenu>
+                            </div>
+                          </div>
+                        </div>
+                      )
+                    }
+
+                    return <ProjectItem key={folder.id} />
+                  })}
+              </div>
+            </div>
+          )}
+
+          <div className="flex flex-col h-full overflow-y-scroll w-[calc(100%+6px)]">
+            <div className="flex flex-col w-full h-full overflow-y-auto overflow-x-hidden">
              <div className="h-full w-full overflow-y-auto">
                {favoritedThreads.length > 0 && (
                  <>
@ -398,7 +591,7 @@ const LeftPanel = () => {
                  <>
                    <div className="px-1 mt-2">
                      <div className="flex items-center gap-1 text-left-panel-fg/80">
-                        <IconMessageFilled size={18} />
+                        <IconMessage size={18} />
                        <h6 className="font-medium text-base">
                          {t('common:noThreadsYet')}
                        </h6>
@ -415,59 +608,38 @@ const LeftPanel = () => {
                </div>
              </div>
            </div>
+            {PlatformFeatures[PlatformFeature.AUTHENTICATION] && (
+              <div className="space-y-1 shrink-0 py-1">
+                <div>
+                  <div className="mx-1 my-2 border-t border-left-panel-fg/5" />
+                  {isAuthenticated ? <UserProfileMenu /> : <AuthLoginButton />}
+                </div>
+              </div>
+            )}
+
+            <DownloadManagement />
          </div>
-
-          <div className="space-y-1 shrink-0 py-1 mt-2">
-            {mainMenus.map((menu) => {
-              if (!menu.isEnabled) {
-                return null
-              }
-
-              // Handle authentication menu specially
-              if (menu.title === 'common:authentication') {
-                return (
-                  <div key={menu.title}>
-                    <div className="mx-1 my-2 border-t border-left-panel-fg/5" />
-                    {isAuthenticated ? (
-                      <UserProfileMenu />
-                    ) : (
-                      <AuthLoginButton />
-                    )}
-                  </div>
-                )
-              }
-
-              // Regular menu items must have route and icon
-              if (!menu.route || !menu.icon) return null
-
-              const isActive =
-                currentPath.includes(route.settings.index) &&
-                menu.route.includes(route.settings.index)
-              return (
-                <Link
-                  key={menu.title}
-                  to={menu.route}
-                  onClick={() => isSmallScreen && setLeftPanel(false)}
-                  data-test-id={`menu-${menu.title}`}
-                  className={cn(
-                    'flex items-center gap-1.5 cursor-pointer hover:bg-left-panel-fg/10 py-1 px-1 rounded',
-                    isActive
-                      ? 'bg-left-panel-fg/10'
-                      : '[&.active]:bg-left-panel-fg/10'
-                  )}
-                >
-                  <menu.icon size={18} className="text-left-panel-fg/70" />
-                  <span className="font-medium text-left-panel-fg/90">
-                    {t(menu.title)}
-                  </span>
-                </Link>
-              )
-            })}
-          </div>
-
-          <DownloadManagement />
        </div>
      </aside>
+
+      {/* Project Dialogs */}
+      <AddProjectDialog
+        open={projectDialogOpen}
+        onOpenChange={setProjectDialogOpen}
+        editingKey={editingProjectKey}
+        initialData={
+          editingProjectKey ? getFolderById(editingProjectKey) : undefined
+        }
+        onSave={handleProjectSave}
+      />
+      <DeleteProjectDialog
+        open={deleteProjectConfirmOpen}
+        onOpenChange={setDeleteProjectConfirmOpen}
+        onConfirm={confirmProjectDelete}
+        projectName={
+          deletingProjectId ? getFolderById(deletingProjectId)?.name : undefined
+        }
+      />
    </>
  )
 }
--- a/web-app/src/containers/ModelSetting.tsx
+++ b/web-app/src/containers/ModelSetting.tsx
@ -103,6 +103,13 @@ export function ModelSetting({
          })
        }

+        if (model.settings?.batch_size && result.batchSize !== undefined) {
+          settingsToUpdate.push({
+            key: 'batch_size',
+            value: result.batchSize,
+          })
+        }
+
        // Apply all settings in a single update to avoid race conditions
        if (settingsToUpdate.length > 0) {
          handleMultipleSettingsChange(settingsToUpdate)
@ -163,7 +170,8 @@ export function ModelSetting({
          key === 'ctx_len' ||
          key === 'ngl' ||
          key === 'chat_template' ||
-          key === 'offload_mmproj'
+          key === 'offload_mmproj' ||
+          key === 'batch_size'
      )

      if (requiresRestart) {
@ -222,7 +230,8 @@ export function ModelSetting({
        key === 'ctx_len' ||
        key === 'ngl' ||
        key === 'chat_template' ||
-        key === 'offload_mmproj'
+        key === 'offload_mmproj' ||
+        key === 'batch_size'
      ) {
        // Check if model is running before stopping it
        serviceHub
--- a/web-app/src/containers/RenderMarkdown.tsx
+++ b/web-app/src/containers/RenderMarkdown.tsx
@ -89,6 +89,7 @@ const CodeComponent = memo(
    onCopy,
    copiedId,
    ...props
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
  }: any) => {
    const { t } = useTranslation()
    const match = /language-(\w+)/.exec(className || '')
--- a/web-app/src/containers/SetupScreen.tsx
+++ b/web-app/src/containers/SetupScreen.tsx
@ -20,7 +20,7 @@ function SetupScreen() {
    localStorage.getItem(localStorageKey.setupCompleted) === 'true'

  return (
-    <div className="flex h-full flex-col flex-justify-center">
+    <div className="flex h-full flex-col justify-center">
      <HeaderPage></HeaderPage>
      <div className="h-full px-8 overflow-y-auto flex flex-col gap-2 justify-center ">
        <div className="w-full lg:w-4/6 mx-auto">
--- a/web-app/src/containers/ThreadList.tsx
+++ b/web-app/src/containers/ThreadList.tsx
@ -16,9 +16,13 @@ import {
  IconDots,
  IconStarFilled,
  IconStar,
+  IconFolder,
+  IconX,
 } from '@tabler/icons-react'
 import { useThreads } from '@/hooks/useThreads'
+import { useThreadManagement } from '@/hooks/useThreadManagement'
 import { useLeftPanel } from '@/hooks/useLeftPanel'
+import { useMessages } from '@/hooks/useMessages'
 import { cn } from '@/lib/utils'
 import { useSmallScreen } from '@/hooks/useMediaQuery'

@ -28,147 +32,274 @@ import {
  DropdownMenuItem,
  DropdownMenuSeparator,
  DropdownMenuTrigger,
+  DropdownMenuSub,
+  DropdownMenuSubContent,
+  DropdownMenuSubTrigger,
 } from '@/components/ui/dropdown-menu'
 import { useTranslation } from '@/i18n/react-i18next-compat'
-import { memo, useMemo, useState } from 'react'
+import { memo, MouseEvent, useMemo, useState } from 'react'
 import { useNavigate, useMatches } from '@tanstack/react-router'
 import { RenameThreadDialog, DeleteThreadDialog } from '@/containers/dialogs'
 import { route } from '@/constants/routes'
+import { toast } from 'sonner'

-const SortableItem = memo(({ thread }: { thread: Thread }) => {
-  const {
-    attributes,
-    listeners,
-    setNodeRef,
-    transform,
-    transition,
-    isDragging,
-  } = useSortable({ id: thread.id, disabled: true })
+const SortableItem = memo(
+  ({
+    thread,
+    variant,
+  }: {
+    thread: Thread
+    variant?: 'default' | 'project'
+  }) => {
+    const {
+      attributes,
+      listeners,
+      setNodeRef,
+      transform,
+      transition,
+      isDragging,
+    } = useSortable({ id: thread.id, disabled: true })

-  const isSmallScreen = useSmallScreen()
-  const setLeftPanel = useLeftPanel(state => state.setLeftPanel)
+    const isSmallScreen = useSmallScreen()
+    const setLeftPanel = useLeftPanel((state) => state.setLeftPanel)

-  const style = {
-    transform: CSS.Transform.toString(transform),
-    transition,
-    opacity: isDragging ? 0.5 : 1,
-  }
-  const toggleFavorite = useThreads((state) => state.toggleFavorite)
-  const deleteThread = useThreads((state) => state.deleteThread)
-  const renameThread = useThreads((state) => state.renameThread)
-  const { t } = useTranslation()
-  const [openDropdown, setOpenDropdown] = useState(false)
-  const navigate = useNavigate()
-  // Check if current route matches this thread's detail page
-  const matches = useMatches()
-  const isActive = matches.some(
-    (match) =>
-      match.routeId === '/threads/$threadId' &&
-      'threadId' in match.params &&
-      match.params.threadId === thread.id
-  )
+    const style = {
+      transform: CSS.Transform.toString(transform),
+      transition,
+      opacity: isDragging ? 0.5 : 1,
+    }
+    const toggleFavorite = useThreads((state) => state.toggleFavorite)
+    const deleteThread = useThreads((state) => state.deleteThread)
+    const renameThread = useThreads((state) => state.renameThread)
+    const updateThread = useThreads((state) => state.updateThread)
+    const getFolderById = useThreadManagement().getFolderById
+    const { folders } = useThreadManagement()
+    const getMessages = useMessages((state) => state.getMessages)
+    const { t } = useTranslation()
+    const [openDropdown, setOpenDropdown] = useState(false)
+    const navigate = useNavigate()
+    // Check if current route matches this thread's detail page
+    const matches = useMatches()
+    const isActive = matches.some(
+      (match) =>
+        match.routeId === '/threads/$threadId' &&
+        'threadId' in match.params &&
+        match.params.threadId === thread.id
+    )

-  const handleClick = () => {
-    if (!isDragging) {
-      // Only close panel and navigate if the thread is not already active
-      if (!isActive) {
-        if (isSmallScreen) setLeftPanel(false)
-        navigate({ to: route.threadsDetail, params: { threadId: thread.id } })
+    const handleClick = (e: MouseEvent<HTMLDivElement>) => {
+      if (openDropdown) {
+        e.stopPropagation()
+        e.preventDefault()
+        return
+      }
+      if (!isDragging) {
+        // Only close panel and navigate if the thread is not already active
+        if (!isActive) {
+          if (isSmallScreen) setLeftPanel(false)
+          navigate({ to: route.threadsDetail, params: { threadId: thread.id } })
+        }
      }
    }
-  }

-  const plainTitleForRename = useMemo(() => {
-    // Basic HTML stripping for simple span tags.
-    // If thread.title is undefined or null, treat as empty string before replace.
-    return (thread.title || '').replace(/<span[^>]*>|<\/span>/g, '')
-  }, [thread.title])
+    const plainTitleForRename = useMemo(() => {
+      // Basic HTML stripping for simple span tags.
+      // If thread.title is undefined or null, treat as empty string before replace.
+      return (thread.title || '').replace(/<span[^>]*>|<\/span>/g, '')
+    }, [thread.title])

+    const assignThreadToProject = (threadId: string, projectId: string) => {
+      const project = getFolderById(projectId)
+      if (project && updateThread) {
+        const projectMetadata = {
+          id: project.id,
+          name: project.name,
+          updated_at: project.updated_at,
+        }

-  return (
-    <div
-      ref={setNodeRef}
-      style={style}
-      {...attributes}
-      {...listeners}
-      onClick={handleClick}
-      onContextMenu={(e) => {
-        e.preventDefault()
-        e.stopPropagation()
-        setOpenDropdown(true)
-      }}
-      className={cn(
-        'mb-1 rounded hover:bg-left-panel-fg/10 flex items-center justify-between gap-2 px-1.5 group/thread-list transition-all',
-        isDragging ? 'cursor-move' : 'cursor-pointer',
-        isActive && 'bg-left-panel-fg/10'
-      )}
-    >
-      <div className="py-1 pr-2 truncate">
-        <span>{thread.title || t('common:newThread')}</span>
-      </div>
-      <div className="flex items-center">
-        <DropdownMenu
-          open={openDropdown}
-          onOpenChange={(open) => setOpenDropdown(open)}
+        updateThread(threadId, {
+          metadata: {
+            ...thread.metadata,
+            project: projectMetadata,
+          },
+        })
+
+        toast.success(`Thread assigned to "${project.name}" successfully`)
+      }
+    }
+
+    const getLastMessageInfo = useMemo(() => {
+      const messages = getMessages(thread.id)
+      if (messages.length === 0) return null
+
+      const lastMessage = messages[messages.length - 1]
+      return {
+        date: new Date(lastMessage.created_at || 0),
+        content: lastMessage.content?.[0]?.text?.value || '',
+      }
+    }, [getMessages, thread.id])
+
+    return (
+      <div
+        ref={setNodeRef}
+        style={style}
+        {...attributes}
+        {...listeners}
+        className={cn(
+          'rounded hover:bg-left-panel-fg/10 flex items-center justify-between gap-2 px-1.5 group/thread-list transition-all',
+          variant === 'project'
+            ? 'mb-2 rounded-lg px-4 border border-main-view-fg/10 bg-main-view-fg/5'
+            : 'mb-1',
+          isDragging ? 'cursor-move' : 'cursor-pointer',
+          isActive && 'bg-left-panel-fg/10'
+        )}
+        onClick={(e) => handleClick(e)}
+        onContextMenu={(e) => {
+          e.preventDefault()
+          e.stopPropagation()
+          setOpenDropdown(true)
+        }}
+      >
+        <div
+          className={cn(
+            'pr-2 truncate flex-1',
+            variant === 'project' ? 'py-2 cursor-pointer' : 'py-1'
+          )}
        >
-          <DropdownMenuTrigger asChild>
-            <IconDots
-              size={14}
-              className="text-left-panel-fg/60 shrink-0 cursor-pointer px-0.5 -mr-1 data-[state=open]:bg-left-panel-fg/10 rounded group-hover/thread-list:data-[state=closed]:size-5 size-5 data-[state=closed]:size-0"
-              onClick={(e) => {
-                e.preventDefault()
-                e.stopPropagation()
-              }}
-            />
-          </DropdownMenuTrigger>
-          <DropdownMenuContent side="bottom" align="end">
-            {thread.isFavorite ? (
-              <DropdownMenuItem
+          <span>{thread.title || t('common:newThread')}</span>
+          {variant === 'project' && (
+            <>
+              {variant === 'project' && getLastMessageInfo?.content && (
+                <div className="text-sm text-main-view-fg/60 mt-0.5 line-clamp-2">
+                  {getLastMessageInfo.content}
+                </div>
+              )}
+            </>
+          )}
+        </div>
+        <div className="flex items-center">
+          <DropdownMenu
+            open={openDropdown}
+            onOpenChange={(open) => setOpenDropdown(open)}
+          >
+            <DropdownMenuTrigger asChild>
+              <IconDots
+                size={14}
+                className="text-left-panel-fg/60 shrink-0 cursor-pointer px-0.5 -mr-1 data-[state=open]:bg-left-panel-fg/10 rounded group-hover/thread-list:data-[state=closed]:size-5 size-5 data-[state=closed]:size-0"
                onClick={(e) => {
+                  e.preventDefault()
                  e.stopPropagation()
-                  toggleFavorite(thread.id)
                }}
-              >
-                <IconStarFilled />
-                <span>{t('common:unstar')}</span>
-              </DropdownMenuItem>
-            ) : (
-              <DropdownMenuItem
-                onClick={(e) => {
-                  e.stopPropagation()
-                  toggleFavorite(thread.id)
-                }}
-              >
-                <IconStar />
-                <span>{t('common:star')}</span>
-              </DropdownMenuItem>
-            )}
-            <RenameThreadDialog
-              thread={thread}
-              plainTitleForRename={plainTitleForRename}
-              onRename={renameThread}
-              onDropdownClose={() => setOpenDropdown(false)}
-            />
+              />
+            </DropdownMenuTrigger>
+            <DropdownMenuContent side="bottom" align="end" className="w-44">
+              {thread.isFavorite ? (
+                <DropdownMenuItem
+                  onClick={(e) => {
+                    e.stopPropagation()
+                    toggleFavorite(thread.id)
+                  }}
+                >
+                  <IconStarFilled />
+                  <span>{t('common:unstar')}</span>
+                </DropdownMenuItem>
+              ) : (
+                <DropdownMenuItem
+                  onClick={(e) => {
+                    e.stopPropagation()
+                    toggleFavorite(thread.id)
+                  }}
+                >
+                  <IconStar />
+                  <span>{t('common:star')}</span>
+                </DropdownMenuItem>
+              )}
+              <RenameThreadDialog
+                thread={thread}
+                plainTitleForRename={plainTitleForRename}
+                onRename={renameThread}
+                onDropdownClose={() => setOpenDropdown(false)}
+              />

-            <DropdownMenuSeparator />
-            <DeleteThreadDialog
-              thread={thread}
-              onDelete={deleteThread}
-              onDropdownClose={() => setOpenDropdown(false)}
-            />
-          </DropdownMenuContent>
-        </DropdownMenu>
+              <DropdownMenuSub>
+                <DropdownMenuSubTrigger className="gap-2">
+                  <IconFolder size={16} />
+                  <span>Add to project</span>
+                </DropdownMenuSubTrigger>
+                <DropdownMenuSubContent>
+                  {folders.length === 0 ? (
+                    <DropdownMenuItem disabled>
+                      <span className="text-left-panel-fg/50">
+                        No projects available
+                      </span>
+                    </DropdownMenuItem>
+                  ) : (
+                    folders
+                      .sort((a, b) => b.updated_at - a.updated_at)
+                      .map((folder) => (
+                        <DropdownMenuItem
+                          key={folder.id}
+                          onClick={(e) => {
+                            e.stopPropagation()
+                            assignThreadToProject(thread.id, folder.id)
+                          }}
+                        >
+                          <IconFolder size={16} />
+                          <span className="truncate max-w-[200px]">
+                            {folder.name}
+                          </span>
+                        </DropdownMenuItem>
+                      ))
+                  )}
+                  {thread.metadata?.project && (
+                    <>
+                      <DropdownMenuSeparator />
+                      <DropdownMenuItem
+                        onClick={(e) => {
+                          e.stopPropagation()
+                          // Remove project from metadata
+                          const projectName = thread.metadata?.project?.name
+                          updateThread(thread.id, {
+                            metadata: {
+                              ...thread.metadata,
+                              project: undefined,
+                            },
+                          })
+                          toast.success(
+                            `Thread removed from "${projectName}" successfully`
+                          )
+                        }}
+                      >
+                        <IconX size={16} />
+                        <span>Remove from project</span>
+                      </DropdownMenuItem>
+                    </>
+                  )}
+                </DropdownMenuSubContent>
+              </DropdownMenuSub>
+              <DropdownMenuSeparator />
+              <DeleteThreadDialog
+                thread={thread}
+                onDelete={deleteThread}
+                onDropdownClose={() => setOpenDropdown(false)}
+                variant={variant}
+              />
+            </DropdownMenuContent>
+          </DropdownMenu>
+        </div>
      </div>
-    </div>
-  )
-})
+    )
+  }
+)

 type ThreadListProps = {
  threads: Thread[]
  isFavoriteSection?: boolean
+  variant?: 'default' | 'project'
+  showDate?: boolean
 }

-function ThreadList({ threads }: ThreadListProps) {
+function ThreadList({ threads, variant = 'default' }: ThreadListProps) {
  const sortedThreads = useMemo(() => {
    return threads.sort((a, b) => {
      return (b.updated || 0) - (a.updated || 0)
@ -192,7 +323,7 @@ function ThreadList({ threads }: ThreadListProps) {
        strategy={verticalListSortingStrategy}
      >
        {sortedThreads.map((thread, index) => (
-          <SortableItem key={index} thread={thread} />
+          <SortableItem key={index} thread={thread} variant={variant} />
        ))}
      </SortableContext>
    </DndContext>
--- a/web-app/src/containers/dialogs/AddProjectDialog.tsx
+++ b/web-app/src/containers/dialogs/AddProjectDialog.tsx
@ -0,0 +1,125 @@
+import { useState, useEffect } from 'react'
+import {
+  Dialog,
+  DialogContent,
+  DialogHeader,
+  DialogTitle,
+  DialogFooter,
+} from '@/components/ui/dialog'
+import { Button } from '@/components/ui/button'
+import { Input } from '@/components/ui/input'
+import { useThreadManagement } from '@/hooks/useThreadManagement'
+import { toast } from 'sonner'
+import { useTranslation } from '@/i18n/react-i18next-compat'
+
+interface AddProjectDialogProps {
+  open: boolean
+  onOpenChange: (open: boolean) => void
+  editingKey: string | null
+  initialData?: {
+    id: string
+    name: string
+    updated_at: number
+  }
+  onSave: (name: string) => void
+}
+
+export default function AddProjectDialog({
+  open,
+  onOpenChange,
+  editingKey,
+  initialData,
+  onSave,
+}: AddProjectDialogProps) {
+  const { t } = useTranslation()
+  const [name, setName] = useState(initialData?.name || '')
+  const { folders } = useThreadManagement()
+
+  useEffect(() => {
+    if (open) {
+      setName(initialData?.name || '')
+    }
+  }, [open, initialData])
+
+  const handleSave = () => {
+    if (!name.trim()) return
+
+    const trimmedName = name.trim()
+
+    // Check for duplicate names (excluding current project when editing)
+    const isDuplicate = folders.some(
+      (folder) =>
+        folder.name.toLowerCase() === trimmedName.toLowerCase() &&
+        folder.id !== editingKey
+    )
+
+    if (isDuplicate) {
+      toast.warning(t('projects.addProjectDialog.alreadyExists', { projectName: trimmedName }))
+      return
+    }
+
+    onSave(trimmedName)
+
+    // Show detailed success message
+    if (editingKey && initialData) {
+      toast.success(
+        t('projects.addProjectDialog.renameSuccess', {
+          oldName: initialData.name,
+          newName: trimmedName
+        })
+      )
+    } else {
+      toast.success(t('projects.addProjectDialog.createSuccess', { projectName: trimmedName }))
+    }
+
+    setName('')
+  }
+
+  const handleCancel = () => {
+    onOpenChange(false)
+    setName('')
+  }
+
+  // Check if the button should be disabled
+  const isButtonDisabled =
+    !name.trim() || (editingKey && name.trim() === initialData?.name)
+
+  return (
+    <Dialog open={open} onOpenChange={onOpenChange}>
+      <DialogContent className="sm:max-w-md">
+        <DialogHeader>
+          <DialogTitle>
+            {editingKey ? t('projects.addProjectDialog.editTitle') : t('projects.addProjectDialog.createTitle')}
+          </DialogTitle>
+        </DialogHeader>
+        <div className="space-y-4">
+          <div>
+            <label className="text-sm font-medium text-main-view-fg/80">
+              {t('projects.addProjectDialog.nameLabel')}
+            </label>
+            <Input
+              value={name}
+              onChange={(e) => setName(e.target.value)}
+              placeholder={t('projects.addProjectDialog.namePlaceholder')}
+              className="mt-1"
+              autoFocus
+              onKeyDown={(e) => {
+                if (e.key === 'Enter' && !isButtonDisabled) {
+                  handleSave()
+                }
+              }}
+            />
+          </div>
+        </div>
+        <DialogFooter>
+          <Button variant="link" onClick={handleCancel}>
+            {t('cancel')}
+          </Button>
+          <Button onClick={handleSave} disabled={Boolean(isButtonDisabled)}>
+            {editingKey ? t('projects.addProjectDialog.updateButton') : t('projects.addProjectDialog.createButton')}
+          </Button>
+        </DialogFooter>
+      </DialogContent>
+    </Dialog>
+  )
+}
--- a/web-app/src/containers/dialogs/DeleteProjectDialog.tsx
+++ b/web-app/src/containers/dialogs/DeleteProjectDialog.tsx
@ -0,0 +1,85 @@
+import { useRef } from 'react'
+import {
+  Dialog,
+  DialogContent,
+  DialogHeader,
+  DialogTitle,
+  DialogFooter,
+  DialogDescription,
+} from '@/components/ui/dialog'
+import { Button } from '@/components/ui/button'
+import { toast } from 'sonner'
+import { useTranslation } from '@/i18n/react-i18next-compat'
+
+interface DeleteProjectDialogProps {
+  open: boolean
+  onOpenChange: (open: boolean) => void
+  onConfirm: () => void
+  projectName?: string
+}
+
+export function DeleteProjectDialog({
+  open,
+  onOpenChange,
+  onConfirm,
+  projectName,
+}: DeleteProjectDialogProps) {
+  const { t } = useTranslation()
+  const deleteButtonRef = useRef<HTMLButtonElement>(null)
+
+  const handleConfirm = () => {
+    try {
+      onConfirm()
+      toast.success(
+        projectName
+          ? t('projects.deleteProjectDialog.successWithName', { projectName })
+          : t('projects.deleteProjectDialog.successWithoutName')
+      )
+      onOpenChange(false)
+    } catch (error) {
+      toast.error(t('projects.deleteProjectDialog.error'))
+      console.error('Delete project error:', error)
+    }
+  }
+
+  const handleKeyDown = (e: React.KeyboardEvent) => {
+    if (e.key === 'Enter') {
+      handleConfirm()
+    }
+  }
+
+  return (
+    <Dialog open={open} onOpenChange={onOpenChange}>
+      <DialogContent
+        className="sm:max-w-md"
+        onOpenAutoFocus={(e) => {
+          e.preventDefault()
+          deleteButtonRef.current?.focus()
+        }}
+      >
+        <DialogHeader>
+          <DialogTitle>{t('projects.deleteProjectDialog.title')}</DialogTitle>
+          <DialogDescription>
+            {t('projects.deleteProjectDialog.description')}
+          </DialogDescription>
+        </DialogHeader>
+        <DialogFooter>
+          <Button variant="link" onClick={() => onOpenChange(false)}>
+            {t('cancel')}
+          </Button>
+          <Button
+            ref={deleteButtonRef}
+            variant="destructive"
+            onClick={handleConfirm}
+            onKeyDown={handleKeyDown}
+            aria-label={t('projects.deleteProjectDialog.ariaLabel', {
+              projectName: projectName || t('projects.title').toLowerCase(),
+            })}
+          >
+            {t('projects.deleteProjectDialog.deleteButton')}
+          </Button>
+        </DialogFooter>
+      </DialogContent>
+    </Dialog>
+  )
+}
--- a/web-app/src/containers/dialogs/DeleteThreadDialog.tsx
+++ b/web-app/src/containers/dialogs/DeleteThreadDialog.tsx
@ -21,12 +21,14 @@ interface DeleteThreadDialogProps {
  thread: Thread
  onDelete: (threadId: string) => void
  onDropdownClose: () => void
+  variant?: 'default' | 'project'
 }

 export function DeleteThreadDialog({
  thread,
  onDelete,
  onDropdownClose,
+  variant = 'default',
 }: DeleteThreadDialogProps) {
  const { t } = useTranslation()
  const navigate = useNavigate()
@ -48,9 +50,11 @@ export function DeleteThreadDialog({
      id: 'delete-thread',
      description: t('common:toast.deleteThread.description'),
    })
-    setTimeout(() => {
-      navigate({ to: route.home })
-    }, 0)
+    if (variant !== 'project') {
+      setTimeout(() => {
+        navigate({ to: route.home })
+      }, 0)
+    }
  }

  const handleKeyDown = (e: React.KeyboardEvent) => {
--- a/web-app/src/containers/dialogs/index.ts
+++ b/web-app/src/containers/dialogs/index.ts
@ -6,4 +6,5 @@ export { MessageMetadataDialog } from './MessageMetadataDialog'
 export { DeleteMessageDialog } from './DeleteMessageDialog'
 export { FactoryResetDialog } from './FactoryResetDialog'
 export { DeleteAssistantDialog } from './DeleteAssistantDialog'
-export { AddProviderDialog } from './AddProviderDialog'
+export { DeleteProjectDialog } from './DeleteProjectDialog'
+export { AddProviderDialog } from './AddProviderDialog'
--- a/web-app/src/hooks/useModelProvider.ts
+++ b/web-app/src/hooks/useModelProvider.ts
@ -288,9 +288,40 @@ export const useModelProvider = create<ModelProviderState>()(
          })
        }

+        if (version <= 2 && state?.providers) {
+          state.providers.forEach((provider) => {
+            // Update cont_batching description for llamacpp provider
+            if (provider.provider === 'llamacpp' && provider.settings) {
+              const contBatchingSetting = provider.settings.find(
+                (s) => s.key === 'cont_batching'
+              )
+              if (contBatchingSetting) {
+                contBatchingSetting.description =
+                  'Enable continuous batching (a.k.a dynamic batching) for concurrent requests.'
+              }
+            }
+
+            // Migrate model settings
+            if (provider.models && provider.provider === 'llamacpp') {
+              provider.models.forEach((model) => {
+                if (!model.settings) model.settings = {}
+
+                if (!model.settings.batch_size) {
+                  model.settings.batch_size = {
+                    ...modelSettings.batch_size,
+                    controller_props: {
+                      ...modelSettings.batch_size.controller_props,
+                    },
+                  }
+                }
+              })
+            }
+          })
+        }
+
        return state
      },
-      version: 2,
+      version: 3,
    }
  )
 )
--- a/web-app/src/hooks/useThreadManagement.ts
+++ b/web-app/src/hooks/useThreadManagement.ts
@ -0,0 +1,82 @@
+import { create } from 'zustand'
+import { persist, createJSONStorage } from 'zustand/middleware'
+import { ulid } from 'ulidx'
+import { localStorageKey } from '@/constants/localStorage'
+import { useThreads } from '@/hooks/useThreads'
+
+type ThreadFolder = {
+  id: string
+  name: string
+  updated_at: number
+}
+
+type ThreadManagementState = {
+  folders: ThreadFolder[]
+  setFolders: (folders: ThreadFolder[]) => void
+  addFolder: (name: string) => void
+  updateFolder: (id: string, name: string) => void
+  deleteFolder: (id: string) => void
+  getFolderById: (id: string) => ThreadFolder | undefined
+}
+
+export const useThreadManagement = create<ThreadManagementState>()(
+  persist(
+    (set, get) => ({
+      folders: [],
+
+      setFolders: (folders) => {
+        set({ folders })
+      },
+
+      addFolder: (name) => {
+        const newFolder: ThreadFolder = {
+          id: ulid(),
+          name,
+          updated_at: Date.now(),
+        }
+        set((state) => ({
+          folders: [...state.folders, newFolder],
+        }))
+      },
+
+      updateFolder: (id, name) => {
+        set((state) => ({
+          folders: state.folders.map((folder) =>
+            folder.id === id
+              ? { ...folder, name, updated_at: Date.now() }
+              : folder
+          ),
+        }))
+      },
+
+      deleteFolder: (id) => {
+        // Remove project metadata from all threads that belong to this project
+        const threadsState = useThreads.getState()
+        const threadsToUpdate = Object.values(threadsState.threads).filter(
+          (thread) => thread.metadata?.project?.id === id
+        )
+
+        threadsToUpdate.forEach((thread) => {
+          threadsState.updateThread(thread.id, {
+            metadata: {
+              ...thread.metadata,
+              project: undefined,
+            },
+          })
+        })
+
+        set((state) => ({
+          folders: state.folders.filter((folder) => folder.id !== id),
+        }))
+      },
+
+      getFolderById: (id) => {
+        return get().folders.find((folder) => folder.id === id)
+      },
+    }),
+    {
+      name: localStorageKey.threadManagement,
+      storage: createJSONStorage(() => localStorage),
+    }
+  )
+)
--- a/web-app/src/hooks/useThreads.ts
+++ b/web-app/src/hooks/useThreads.ts
@ -20,12 +20,14 @@ type ThreadState = {
  createThread: (
    model: ThreadModel,
    title?: string,
-    assistant?: Assistant
+    assistant?: Assistant,
+    projectMetadata?: { id: string; name: string; updated_at: number }
  ) => Promise<Thread>
  updateCurrentThreadModel: (model: ThreadModel) => void
  getFilteredThreads: (searchTerm: string) => Thread[]
  updateCurrentThreadAssistant: (assistant: Assistant) => void
  updateThreadTimestamp: (threadId: string) => void
+  updateThread: (threadId: string, updates: Partial<Thread>) => void
  searchIndex: Fzf<Thread[]> | null
 }

@ -132,20 +134,28 @@ export const useThreads = create<ThreadState>()((set, get) => ({
  deleteAllThreads: () => {
    set((state) => {
      const allThreadIds = Object.keys(state.threads)
-      const favoriteThreadIds = allThreadIds.filter(
-        (threadId) => state.threads[threadId].isFavorite
-      )
-      const nonFavoriteThreadIds = allThreadIds.filter(
-        (threadId) => !state.threads[threadId].isFavorite
+
+      // Identify threads to keep (favorites OR have project metadata)
+      const threadsToKeepIds = allThreadIds.filter(
+        (threadId) =>
+          state.threads[threadId].isFavorite ||
+          state.threads[threadId].metadata?.project
      )

-      // Only delete non-favorite threads
-      nonFavoriteThreadIds.forEach((threadId) => {
+      // Identify threads to delete (non-favorites AND no project metadata)
+      const threadsToDeleteIds = allThreadIds.filter(
+        (threadId) =>
+          !state.threads[threadId].isFavorite &&
+          !state.threads[threadId].metadata?.project
+      )
+
+      // Delete threads that are not favorites and not in projects
+      threadsToDeleteIds.forEach((threadId) => {
        getServiceHub().threads().deleteThread(threadId)
      })

-      // Keep only favorite threads
-      const remainingThreads = favoriteThreadIds.reduce(
+      // Keep favorite threads and threads with project metadata
+      const remainingThreads = threadsToKeepIds.reduce(
        (acc, threadId) => {
          acc[threadId] = state.threads[threadId]
          return acc
@ -208,13 +218,18 @@ export const useThreads = create<ThreadState>()((set, get) => ({
  setCurrentThreadId: (threadId) => {
    if (threadId !== get().currentThreadId) set({ currentThreadId: threadId })
  },
-  createThread: async (model, title, assistant) => {
+  createThread: async (model, title, assistant, projectMetadata) => {
    const newThread: Thread = {
      id: ulid(),
      title: title ?? 'New Thread',
      model,
      updated: Date.now() / 1000,
      assistants: assistant ? [assistant] : [],
+      ...(projectMetadata && {
+        metadata: {
+          project: projectMetadata,
+        },
+      }),
    }
    return await getServiceHub()
      .threads()
@ -328,4 +343,26 @@ export const useThreads = create<ThreadState>()((set, get) => ({
      }
    })
  },
+  updateThread: (threadId, updates) => {
+    set((state) => {
+      const thread = state.threads[threadId]
+      if (!thread) return state
+
+      const updatedThread = {
+        ...thread,
+        ...updates,
+        updated: Date.now() / 1000,
+      }
+
+      getServiceHub().threads().updateThread(updatedThread)
+
+      const newThreads = { ...state.threads, [threadId]: updatedThread }
+      return {
+        threads: newThreads,
+        searchIndex: new Fzf<Thread[]>(Object.values(newThreads), {
+          selector: (item: Thread) => item.title,
+        }),
+      }
+    })
+  },
 }))
--- a/web-app/src/lib/predefined.ts
+++ b/web-app/src/lib/predefined.ts
@ -153,4 +153,16 @@ export const modelSettings = {
      value: false,
    },
  },
+  batch_size: {
+    key: 'batch_size',
+    title: 'Batch Size',
+    description: 'Logical maximum batch size for processing prompts.',
+    controller_type: 'input',
+    controller_props: {
+      value: 2048,
+      placeholder: '2048',
+      type: 'number',
+      textAlign: 'right',
+    },
+  },
 }
--- a/web-app/src/locales/de-DE/common.json
+++ b/web-app/src/locales/de-DE/common.json
@ -117,6 +117,7 @@
    "chatInput": "Frage mich etwas..."
  },
  "confirm": "Bestätige",
+  "continue": "Weiter",
  "loading": "Lade...",
  "error": "Fehler",
  "success": "Erfolg",
@ -127,6 +128,7 @@
  "createAssistant": "Assistenten anlegen",
  "enterApiKey": "API Key eingeben",
  "scrollToBottom": "Zum Ende scrollen",
+  "generateAiResponse": "KI-Antwort generieren",
  "addModel": {
    "title": "Modell hinzufügen",
    "modelId": "Modell ID",
@ -154,12 +156,12 @@
    "delete": "Löschen"
  },
  "editJson": {
-    "errorParse": "Failed to parse JSON",
-    "errorPaste": "Failed to paste JSON",
-    "errorFormat": "Invalid JSON format",
-    "titleAll": "Edit All Servers Configuration",
-    "placeholder": "Enter JSON configuration...",
-    "save": "Save"
+    "errorParse": "JSON-Parsing fehlgeschlagen",
+    "errorPaste": "JSON-Einfügen fehlgeschlagen",
+    "errorFormat": "Ungültiges JSON-Format",
+    "titleAll": "Alle Serverkonfigurationen bearbeiten",
+    "placeholder": "JSON-Konfiguration eingeben...",
+    "save": "Speichern"
  },
  "editModel": {
    "title": "Modell bearbeiten: {{modelId}}",
@ -228,11 +230,85 @@
      "title": "Nachricht Metadaten"
    }
  },
+  "projects": {
+    "title": "Projekte",
+    "addProject": "Projekt hinzufügen",
+    "addToProject": "Zum Projekt hinzufügen",
+    "removeFromProject": "Vom Projekt entfernen",
+    "createNewProject": "Neues Projekt erstellen",
+    "editProject": "Projekt bearbeiten",
+    "deleteProject": "Projekt löschen",
+    "projectName": "Projektname",
+    "enterProjectName": "Projektname eingeben...",
+    "noProjectsAvailable": "Keine Projekte verfügbar",
+    "noProjectsYet": "Noch keine Projekte",
+    "noProjectsYetDesc": "Starten Sie ein neues Projekt, indem Sie auf die Schaltfläche Projekt hinzufügen klicken.",
+    "projectNotFound": "Projekt nicht gefunden",
+    "projectNotFoundDesc": "Das gesuchte Projekt existiert nicht oder wurde gelöscht.",
+    "deleteProjectDialog": {
+      "title": "Projekt löschen",
+      "description": "Sind Sie sicher, dass Sie dieses Projekt löschen möchten? Diese Aktion kann nicht rückgängig gemacht werden.",
+      "deleteButton": "Löschen",
+      "successWithName": "Projekt \"{{projectName}}\" erfolgreich gelöscht",
+      "successWithoutName": "Projekt erfolgreich gelöscht",
+      "error": "Projekt konnte nicht gelöscht werden. Bitte versuchen Sie es erneut.",
+      "ariaLabel": "{{projectName}} löschen"
+    },
+    "addProjectDialog": {
+      "createTitle": "Neues Projekt erstellen",
+      "editTitle": "Projekt bearbeiten",
+      "nameLabel": "Projektname",
+      "namePlaceholder": "Projektname eingeben...",
+      "createButton": "Erstellen",
+      "updateButton": "Aktualisieren",
+      "alreadyExists": "Projekt \"{{projectName}}\" existiert bereits",
+      "createSuccess": "Projekt \"{{projectName}}\" erfolgreich erstellt",
+      "renameSuccess": "Projekt von \"{{oldName}}\" zu \"{{newName}}\" umbenannt"
+    },
+    "noConversationsIn": "Keine Gespräche in {{projectName}}",
+    "startNewConversation": "Starten Sie ein neues Gespräch mit {{projectName}} unten",
+    "conversationsIn": "Gespräche in {{projectName}}",
+    "conversationsDescription": "Klicken Sie auf ein Gespräch, um weiterzuchatten, oder starten Sie unten ein neues.",
+    "thread": "Thread",
+    "threads": "Threads",
+    "updated": "Aktualisiert:",
+    "collapseThreads": "Threads einklappen",
+    "expandThreads": "Threads ausklappen",
+    "update": "Aktualisieren"
+  },
  "toast": {
    "allThreadsUnfavorited": {
      "title": "Alle Threads De-Favorisieren ",
      "description": "Alle deine Threads wurden defavorisiert."
    },
+    "projectCreated": {
+      "title": "Projekt erstellt",
+      "description": "Projekt \"{{projectName}}\" erfolgreich erstellt"
+    },
+    "projectRenamed": {
+      "title": "Projekt umbenannt",
+      "description": "Projekt von \"{{oldName}}\" zu \"{{newName}}\" umbenannt"
+    },
+    "projectDeleted": {
+      "title": "Projekt gelöscht",
+      "description": "Projekt \"{{projectName}}\" erfolgreich gelöscht"
+    },
+    "projectAlreadyExists": {
+      "title": "Projekt existiert bereits",
+      "description": "Projekt \"{{projectName}}\" existiert bereits"
+    },
+    "projectDeleteFailed": {
+      "title": "Löschen fehlgeschlagen",
+      "description": "Projekt konnte nicht gelöscht werden. Bitte versuchen Sie es erneut."
+    },
+    "threadAssignedToProject": {
+      "title": "Thread zugewiesen",
+      "description": "Thread erfolgreich zu \"{{projectName}}\" hinzugefügt"
+    },
+    "threadRemovedFromProject": {
+      "title": "Thread entfernt",
+      "description": "Thread erfolgreich von \"{{projectName}}\" entfernt"
+    },
    "deleteAllThreads": {
      "title": "Alle Threads löschen",
      "description": "Alle deine Threads wurden permanent gelöscht."
@ -280,6 +356,80 @@
    "downloadAndVerificationComplete": {
      "title": "Download abgeschlossen",
      "description": "Modell \"{{item}}\" erfolgreich heruntergeladen und verifiziert"
+    },
+    "projectCreated": {
+      "title": "Projekt erstellt",
+      "description": "Projekt \"{{projectName}}\" erfolgreich erstellt"
+    },
+    "projectRenamed": {
+      "title": "Projekt umbenannt",
+      "description": "Projekt von \"{{oldName}}\" zu \"{{newName}}\" umbenannt"
+    },
+    "projectDeleted": {
+      "title": "Projekt gelöscht",
+      "description": "Projekt \"{{projectName}}\" erfolgreich gelöscht"
+    },
+    "projectAlreadyExists": {
+      "title": "Projekt existiert bereits",
+      "description": "Projekt \"{{projectName}}\" existiert bereits"
+    },
+    "projectDeleteFailed": {
+      "title": "Löschen fehlgeschlagen",
+      "description": "Projekt konnte nicht gelöscht werden. Bitte versuchen Sie es erneut."
+    },
+    "threadAssignedToProject": {
+      "title": "Thread zugewiesen",
+      "description": "Thread erfolgreich zu \"{{projectName}}\" hinzugefügt"
+    },
+    "threadRemovedFromProject": {
+      "title": "Thread entfernt",
+      "description": "Thread erfolgreich von \"{{projectName}}\" entfernt"
    }
+  },
+  "projects": {
+    "title": "Projekte",
+    "addProject": "Projekt hinzufügen",
+    "addToProject": "Zu Projekt hinzufügen",
+    "removeFromProject": "Von Projekt entfernen",
+    "createNewProject": "Neues Projekt erstellen",
+    "editProject": "Projekt bearbeiten",
+    "deleteProject": "Projekt löschen",
+    "projectName": "Projektname",
+    "enterProjectName": "Projektname eingeben...",
+    "noProjectsAvailable": "Keine Projekte verfügbar",
+    "noProjectsYet": "Noch keine Projekte",
+    "noProjectsYetDesc": "Starten Sie ein neues Projekt, indem Sie auf die Schaltfläche Projekt hinzufügen klicken.",
+    "projectNotFound": "Projekt nicht gefunden",
+    "projectNotFoundDesc": "Das gesuchte Projekt existiert nicht oder wurde gelöscht.",
+    "deleteProjectDialog": {
+      "title": "Projekt löschen",
+      "description": "Sind Sie sicher, dass Sie dieses Projekt löschen möchten? Diese Aktion kann nicht rückgängig gemacht werden.",
+      "deleteButton": "Löschen",
+      "successWithName": "Projekt \"{{projectName}}\" erfolgreich gelöscht",
+      "successWithoutName": "Projekt erfolgreich gelöscht",
+      "error": "Projekt konnte nicht gelöscht werden. Bitte versuchen Sie es erneut.",
+      "ariaLabel": "{{projectName}} löschen"
+    },
+    "addProjectDialog": {
+      "createTitle": "Neues Projekt erstellen",
+      "editTitle": "Projekt bearbeiten",
+      "nameLabel": "Projektname",
+      "namePlaceholder": "Projektname eingeben...",
+      "createButton": "Erstellen",
+      "updateButton": "Aktualisieren",
+      "alreadyExists": "Projekt \"{{projectName}}\" existiert bereits",
+      "createSuccess": "Projekt \"{{projectName}}\" erfolgreich erstellt",
+      "renameSuccess": "Projekt von \"{{oldName}}\" zu \"{{newName}}\" umbenannt"
+    },
+    "noConversationsIn": "Keine Gespräche in {{projectName}}",
+    "startNewConversation": "Starten Sie ein neues Gespräch mit {{projectName}} unten",
+    "conversationsIn": "Gespräche in {{projectName}}",
+    "conversationsDescription": "Klicken Sie auf ein Gespräch, um weiterzuchatten, oder starten Sie unten ein neues.",
+    "thread": "Thread",
+    "threads": "Threads",
+    "updated": "Aktualisiert:",
+    "collapseThreads": "Threads einklappen",
+    "expandThreads": "Threads ausklappen",
+    "update": "Aktualisieren"
  }
 }
--- a/web-app/src/locales/en/common.json
+++ b/web-app/src/locales/en/common.json
@ -119,6 +119,7 @@
    "chatInput": "Ask me anything..."
  },
  "confirm": "Confirm",
+  "continue": "Continue",
  "loading": "Loading...",
  "error": "Error",
  "success": "Success",
@ -231,6 +232,52 @@
      "title": "Message Metadata"
    }
  },
+  "projects": {
+    "title": "Projects",
+    "addProject": "Add Project",
+    "addToProject": "Add to project",
+    "removeFromProject": "Remove from project",
+    "createNewProject": "Create New Project",
+    "editProject": "Edit Project",
+    "deleteProject": "Delete Project",
+    "projectName": "Project Name",
+    "enterProjectName": "Enter project name...",
+    "noProjectsAvailable": "No projects available",
+    "noProjectsYet": "No Projects Yet",
+    "noProjectsYetDesc": "Start a new project by clicking the Add Project button.",
+    "projectNotFound": "Project Not Found",
+    "projectNotFoundDesc": "The project you're looking for doesn't exist or has been deleted.",
+    "deleteProjectDialog": {
+      "title": "Delete Project",
+      "description": "Are you sure you want to delete this project? This action cannot be undone.",
+      "deleteButton": "Delete",
+      "successWithName": "Project \"{{projectName}}\" deleted successfully",
+      "successWithoutName": "Project deleted successfully",
+      "error": "Failed to delete project. Please try again.",
+      "ariaLabel": "Delete {{projectName}}"
+    },
+    "addProjectDialog": {
+      "createTitle": "Create New Project",
+      "editTitle": "Edit Project",
+      "nameLabel": "Project Name",
+      "namePlaceholder": "Enter project name...",
+      "createButton": "Create",
+      "updateButton": "Update",
+      "alreadyExists": "Project \"{{projectName}}\" already exists",
+      "createSuccess": "Project \"{{projectName}}\" created successfully",
+      "renameSuccess": "Project renamed from \"{{oldName}}\" to \"{{newName}}\""
+    },
+    "noConversationsIn": "No Conversations in {{projectName}}",
+    "startNewConversation": "Start a new conversation with {{projectName}} below",
+    "conversationsIn": "Conversations in {{projectName}}",
+    "conversationsDescription": "Click on any conversation to continue chatting, or start a new one below.",
+    "thread": "thread",
+    "threads": "threads",
+    "updated": "Updated:",
+    "collapseThreads": "Collapse threads",
+    "expandThreads": "Expand threads",
+    "update": "Update"
+  },
  "toast": {
    "allThreadsUnfavorited": {
      "title": "All Threads Unfavorited",
@ -283,6 +330,34 @@
    "downloadAndVerificationComplete": {
      "title": "Download Complete",
      "description": "Model \"{{item}}\" downloaded and verified successfully"
+    },
+    "projectCreated": {
+      "title": "Project Created",
+      "description": "Project \"{{projectName}}\" created successfully"
+    },
+    "projectRenamed": {
+      "title": "Project Renamed",
+      "description": "Project renamed from \"{{oldName}}\" to \"{{newName}}\""
+    },
+    "projectDeleted": {
+      "title": "Project Deleted",
+      "description": "Project \"{{projectName}}\" deleted successfully"
+    },
+    "projectAlreadyExists": {
+      "title": "Project Already Exists",
+      "description": "Project \"{{projectName}}\" already exists"
+    },
+    "projectDeleteFailed": {
+      "title": "Delete Failed",
+      "description": "Failed to delete project. Please try again."
+    },
+    "threadAssignedToProject": {
+      "title": "Thread Assigned",
+      "description": "Thread assigned to \"{{projectName}}\" successfully"
+    },
+    "threadRemovedFromProject": {
+      "title": "Thread Removed",
+      "description": "Thread removed from \"{{projectName}}\" successfully"
    }
  }
 }
--- a/web-app/src/locales/id/common.json
+++ b/web-app/src/locales/id/common.json
@ -117,6 +117,7 @@
    "chatInput": "Tanyakan apa saja padaku..."
  },
  "confirm": "Konfirmasi",
+  "continue": "Lanjutkan",
  "loading": "Memuat...",
  "error": "Kesalahan",
  "success": "Sukses",
@ -127,6 +128,7 @@
  "createAssistant": "Buat Asisten",
  "enterApiKey": "Masukkan Kunci API",
  "scrollToBottom": "Gulir ke bawah",
+  "generateAiResponse": "Hasilkan Respons AI",
  "addModel": {
    "title": "Tambah Model",
    "modelId": "ID Model",
@ -170,6 +172,13 @@
    "embeddings": "Embedding",
    "notAvailable": "Belum tersedia"
  },
+  "outOfContextError": {
+    "truncateInput": "Potong Input",
+    "title": "Kesalahan konteks habis",
+    "description": "Obrolan ini mencapai batas memori AI, seperti papan tulis yang penuh. Kami dapat memperluas jendela memori (disebut ukuran konteks) agar lebih mengingat, tetapi mungkin akan menggunakan lebih banyak memori komputer Anda. Kami juga dapat memotong input, yang berarti akan melupakan sebagian riwayat obrolan untuk memberi ruang bagi pesan baru.",
+    "increaseContextSizeDescription": "Apakah Anda ingin meningkatkan ukuran konteks?",
+    "increaseContextSize": "Tingkatkan Ukuran Konteks"
+  },
  "toolApproval": {
    "title": "Permintaan Izin Alat",
    "description": "Asisten ingin menggunakan <strong>{{toolName}}</strong>",
@ -273,6 +282,80 @@
    "downloadAndVerificationComplete": {
      "title": "Unduhan Selesai",
      "description": "Model \"{{item}}\" berhasil diunduh dan diverifikasi"
+    },
+    "projectCreated": {
+      "title": "Proyek Dibuat",
+      "description": "Proyek \"{{projectName}}\" berhasil dibuat"
+    },
+    "projectRenamed": {
+      "title": "Proyek Diganti Nama",
+      "description": "Proyek diganti nama dari \"{{oldName}}\" ke \"{{newName}}\""
+    },
+    "projectDeleted": {
+      "title": "Proyek Dihapus",
+      "description": "Proyek \"{{projectName}}\" berhasil dihapus"
+    },
+    "projectAlreadyExists": {
+      "title": "Proyek Sudah Ada",
+      "description": "Proyek \"{{projectName}}\" sudah ada"
+    },
+    "projectDeleteFailed": {
+      "title": "Penghapusan Gagal",
+      "description": "Gagal menghapus proyek. Silakan coba lagi."
+    },
+    "threadAssignedToProject": {
+      "title": "Thread Ditugaskan",
+      "description": "Thread berhasil ditugaskan ke \"{{projectName}}\""
+    },
+    "threadRemovedFromProject": {
+      "title": "Thread Dihapus",
+      "description": "Thread berhasil dihapus dari \"{{projectName}}\""
    }
+  },
+  "projects": {
+    "title": "Proyek",
+    "addProject": "Tambah Proyek",
+    "addToProject": "Tambahkan ke proyek",
+    "removeFromProject": "Hapus dari proyek",
+    "createNewProject": "Buat Proyek Baru",
+    "editProject": "Edit Proyek",
+    "deleteProject": "Hapus Proyek",
+    "projectName": "Nama Proyek",
+    "enterProjectName": "Masukkan nama proyek...",
+    "noProjectsAvailable": "Tidak ada proyek tersedia",
+    "noProjectsYet": "Belum Ada Proyek",
+    "noProjectsYetDesc": "Mulai proyek baru dengan mengklik tombol Tambah Proyek.",
+    "projectNotFound": "Proyek Tidak Ditemukan",
+    "projectNotFoundDesc": "Proyek yang Anda cari tidak ada atau telah dihapus.",
+    "deleteProjectDialog": {
+      "title": "Hapus Proyek",
+      "description": "Apakah Anda yakin ingin menghapus proyek ini? Tindakan ini tidak dapat dibatalkan.",
+      "deleteButton": "Hapus",
+      "successWithName": "Proyek \"{{projectName}}\" berhasil dihapus",
+      "successWithoutName": "Proyek berhasil dihapus",
+      "error": "Gagal menghapus proyek. Silakan coba lagi.",
+      "ariaLabel": "Hapus {{projectName}}"
+    },
+    "addProjectDialog": {
+      "createTitle": "Buat Proyek Baru",
+      "editTitle": "Edit Proyek",
+      "nameLabel": "Nama Proyek",
+      "namePlaceholder": "Masukkan nama proyek...",
+      "createButton": "Buat",
+      "updateButton": "Perbarui",
+      "alreadyExists": "Proyek \"{{projectName}}\" sudah ada",
+      "createSuccess": "Proyek \"{{projectName}}\" berhasil dibuat",
+      "renameSuccess": "Proyek diubah dari \"{{oldName}}\" menjadi \"{{newName}}\""
+    },
+    "noConversationsIn": "Tidak Ada Percakapan di {{projectName}}",
+    "startNewConversation": "Mulai percakapan baru dengan {{projectName}} di bawah",
+    "conversationsIn": "Percakapan di {{projectName}}",
+    "conversationsDescription": "Klik percakapan mana pun untuk melanjutkan chatting, atau mulai yang baru di bawah.",
+    "thread": "utas",
+    "threads": "utas",
+    "updated": "Diperbarui:",
+    "collapseThreads": "Tutup utas",
+    "expandThreads": "Buka utas",
+    "update": "Perbarui"
  }
 }
--- a/web-app/src/locales/pl/common.json
+++ b/web-app/src/locales/pl/common.json
@ -117,6 +117,7 @@
    "chatInput": "Zapytaj mnie o cokolwiek…"
  },
  "confirm": "Potwierdź",
+  "continue": "Kontynuuj",
  "loading": "Wczytywanie…",
  "error": "Błąd",
  "success": "Sukces",
@ -229,6 +230,52 @@
      "title": "Metadane Wiadomości"
    }
  },
+  "projects": {
+    "title": "Projekty",
+    "addProject": "Dodaj Projekt",
+    "addToProject": "Dodaj do projektu",
+    "removeFromProject": "Usuń z projektu",
+    "createNewProject": "Utwórz Nowy Projekt",
+    "editProject": "Edytuj Projekt",
+    "deleteProject": "Usuń Projekt",
+    "projectName": "Nazwa Projektu",
+    "enterProjectName": "Wprowadź nazwę projektu...",
+    "noProjectsAvailable": "Brak dostępnych projektów",
+    "noProjectsYet": "Brak Projektów",
+    "noProjectsYetDesc": "Rozpocznij nowy projekt klikając przycisk Dodaj Projekt.",
+    "projectNotFound": "Projekt Nie Znaleziony",
+    "projectNotFoundDesc": "Projekt, którego szukasz nie istnieje lub został usunięty.",
+    "deleteProjectDialog": {
+      "title": "Usuń Projekt",
+      "description": "Na pewno chcesz usunąć ten projekt? Tej operacji nie można cofnąć.",
+      "deleteButton": "Usuń",
+      "successWithName": "Projekt \"{{projectName}}\" został pomyślnie usunięty",
+      "successWithoutName": "Projekt został pomyślnie usunięty",
+      "error": "Nie udało się usunąć projektu. Spróbuj ponownie.",
+      "ariaLabel": "Usuń {{projectName}}"
+    },
+    "addProjectDialog": {
+      "createTitle": "Utwórz Nowy Projekt",
+      "editTitle": "Edytuj Projekt",
+      "nameLabel": "Nazwa Projektu",
+      "namePlaceholder": "Wprowadź nazwę projektu...",
+      "createButton": "Utwórz",
+      "updateButton": "Aktualizuj",
+      "alreadyExists": "Projekt \"{{projectName}}\" już istnieje",
+      "createSuccess": "Projekt \"{{projectName}}\" został pomyślnie utworzony",
+      "renameSuccess": "Projekt zmieniono z \"{{oldName}}\" na \"{{newName}}\""
+    },
+    "noConversationsIn": "Brak Rozmów w {{projectName}}",
+    "startNewConversation": "Rozpocznij nową rozmowę z {{projectName}} poniżej",
+    "conversationsIn": "Rozmowy w {{projectName}}",
+    "conversationsDescription": "Kliknij na dowolną rozmowę aby kontynuować czat, lub rozpocznij nową poniżej.",
+    "thread": "wątek",
+    "threads": "wątki",
+    "updated": "Zaktualizowano:",
+    "collapseThreads": "Zwiń wątki",
+    "expandThreads": "Rozwiń wątki",
+    "update": "Aktualizuj"
+  },
  "toast": {
    "allThreadsUnfavorited": {
      "title": "Wszystkie Wątki Usunięte z Ulubionych",
--- a/web-app/src/locales/vn/common.json
+++ b/web-app/src/locales/vn/common.json
@ -117,6 +117,7 @@
    "chatInput": "Hỏi tôi bất cứ điều gì..."
  },
  "confirm": "Xác nhận",
+  "continue": "Tiếp tục",
  "loading": "Đang tải...",
  "error": "Lỗi",
  "success": "Thành công",
--- a/web-app/src/locales/zh-CN/common.json
+++ b/web-app/src/locales/zh-CN/common.json
@ -117,6 +117,7 @@
    "chatInput": "随便问我什么..."
  },
  "confirm": "确认",
+  "continue": "继续",
  "loading": "加载中...",
  "error": "错误",
  "success": "成功",
--- a/web-app/src/locales/zh-TW/common.json
+++ b/web-app/src/locales/zh-TW/common.json
@ -117,6 +117,7 @@
    "chatInput": "問我任何事..."
  },
  "confirm": "確認",
+  "continue": "繼續",
  "loading": "載入中...",
  "error": "錯誤",
  "success": "成功",
--- a/web-app/src/routeTree.gen.ts
+++ b/web-app/src/routeTree.gen.ts
@ -8,130 +8,330 @@
 // You should NOT make any changes in this file as it will be overwritten.
 // Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified.

-import { Route as rootRouteImport } from './routes/__root'
-import { Route as SystemMonitorRouteImport } from './routes/system-monitor'
-import { Route as LogsRouteImport } from './routes/logs'
-import { Route as AssistantRouteImport } from './routes/assistant'
-import { Route as IndexRouteImport } from './routes/index'
-import { Route as HubIndexRouteImport } from './routes/hub/index'
-import { Route as ThreadsThreadIdRouteImport } from './routes/threads/$threadId'
-import { Route as SettingsShortcutsRouteImport } from './routes/settings/shortcuts'
-import { Route as SettingsPrivacyRouteImport } from './routes/settings/privacy'
-import { Route as SettingsMcpServersRouteImport } from './routes/settings/mcp-servers'
-import { Route as SettingsLocalApiServerRouteImport } from './routes/settings/local-api-server'
-import { Route as SettingsHttpsProxyRouteImport } from './routes/settings/https-proxy'
-import { Route as SettingsHardwareRouteImport } from './routes/settings/hardware'
-import { Route as SettingsGeneralRouteImport } from './routes/settings/general'
-import { Route as SettingsExtensionsRouteImport } from './routes/settings/extensions'
-import { Route as SettingsAppearanceRouteImport } from './routes/settings/appearance'
-import { Route as LocalApiServerLogsRouteImport } from './routes/local-api-server/logs'
-import { Route as HubModelIdRouteImport } from './routes/hub/$modelId'
-import { Route as SettingsProvidersIndexRouteImport } from './routes/settings/providers/index'
-import { Route as SettingsProvidersProviderNameRouteImport } from './routes/settings/providers/$providerName'
-import { Route as AuthGoogleCallbackRouteImport } from './routes/auth.google.callback'
+// Import Routes

-const SystemMonitorRoute = SystemMonitorRouteImport.update({
+import { Route as rootRoute } from './routes/__root'
+import { Route as SystemMonitorImport } from './routes/system-monitor'
+import { Route as LogsImport } from './routes/logs'
+import { Route as AssistantImport } from './routes/assistant'
+import { Route as IndexImport } from './routes/index'
+import { Route as ProjectIndexImport } from './routes/project/index'
+import { Route as HubIndexImport } from './routes/hub/index'
+import { Route as ThreadsThreadIdImport } from './routes/threads/$threadId'
+import { Route as SettingsShortcutsImport } from './routes/settings/shortcuts'
+import { Route as SettingsPrivacyImport } from './routes/settings/privacy'
+import { Route as SettingsMcpServersImport } from './routes/settings/mcp-servers'
+import { Route as SettingsLocalApiServerImport } from './routes/settings/local-api-server'
+import { Route as SettingsHttpsProxyImport } from './routes/settings/https-proxy'
+import { Route as SettingsHardwareImport } from './routes/settings/hardware'
+import { Route as SettingsGeneralImport } from './routes/settings/general'
+import { Route as SettingsExtensionsImport } from './routes/settings/extensions'
+import { Route as SettingsAppearanceImport } from './routes/settings/appearance'
+import { Route as ProjectProjectIdImport } from './routes/project/$projectId'
+import { Route as LocalApiServerLogsImport } from './routes/local-api-server/logs'
+import { Route as HubModelIdImport } from './routes/hub/$modelId'
+import { Route as SettingsProvidersIndexImport } from './routes/settings/providers/index'
+import { Route as SettingsProvidersProviderNameImport } from './routes/settings/providers/$providerName'
+import { Route as AuthGoogleCallbackImport } from './routes/auth.google.callback'
+
+// Create/Update Routes
+
+const SystemMonitorRoute = SystemMonitorImport.update({
  id: '/system-monitor',
  path: '/system-monitor',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const LogsRoute = LogsRouteImport.update({
+
+const LogsRoute = LogsImport.update({
  id: '/logs',
  path: '/logs',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const AssistantRoute = AssistantRouteImport.update({
+
+const AssistantRoute = AssistantImport.update({
  id: '/assistant',
  path: '/assistant',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const IndexRoute = IndexRouteImport.update({
+
+const IndexRoute = IndexImport.update({
  id: '/',
  path: '/',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const HubIndexRoute = HubIndexRouteImport.update({
+
+const ProjectIndexRoute = ProjectIndexImport.update({
+  id: '/project/',
+  path: '/project/',
+  getParentRoute: () => rootRoute,
+} as any)
+
+const HubIndexRoute = HubIndexImport.update({
  id: '/hub/',
  path: '/hub/',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const ThreadsThreadIdRoute = ThreadsThreadIdRouteImport.update({
+
+const ThreadsThreadIdRoute = ThreadsThreadIdImport.update({
  id: '/threads/$threadId',
  path: '/threads/$threadId',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const SettingsShortcutsRoute = SettingsShortcutsRouteImport.update({
+
+const SettingsShortcutsRoute = SettingsShortcutsImport.update({
  id: '/settings/shortcuts',
  path: '/settings/shortcuts',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const SettingsPrivacyRoute = SettingsPrivacyRouteImport.update({
+
+const SettingsPrivacyRoute = SettingsPrivacyImport.update({
  id: '/settings/privacy',
  path: '/settings/privacy',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const SettingsMcpServersRoute = SettingsMcpServersRouteImport.update({
+
+const SettingsMcpServersRoute = SettingsMcpServersImport.update({
  id: '/settings/mcp-servers',
  path: '/settings/mcp-servers',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const SettingsLocalApiServerRoute = SettingsLocalApiServerRouteImport.update({
+
+const SettingsLocalApiServerRoute = SettingsLocalApiServerImport.update({
  id: '/settings/local-api-server',
  path: '/settings/local-api-server',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const SettingsHttpsProxyRoute = SettingsHttpsProxyRouteImport.update({
+
+const SettingsHttpsProxyRoute = SettingsHttpsProxyImport.update({
  id: '/settings/https-proxy',
  path: '/settings/https-proxy',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const SettingsHardwareRoute = SettingsHardwareRouteImport.update({
+
+const SettingsHardwareRoute = SettingsHardwareImport.update({
  id: '/settings/hardware',
  path: '/settings/hardware',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const SettingsGeneralRoute = SettingsGeneralRouteImport.update({
+
+const SettingsGeneralRoute = SettingsGeneralImport.update({
  id: '/settings/general',
  path: '/settings/general',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const SettingsExtensionsRoute = SettingsExtensionsRouteImport.update({
+
+const SettingsExtensionsRoute = SettingsExtensionsImport.update({
  id: '/settings/extensions',
  path: '/settings/extensions',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const SettingsAppearanceRoute = SettingsAppearanceRouteImport.update({
+
+const SettingsAppearanceRoute = SettingsAppearanceImport.update({
  id: '/settings/appearance',
  path: '/settings/appearance',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const LocalApiServerLogsRoute = LocalApiServerLogsRouteImport.update({
+
+const ProjectProjectIdRoute = ProjectProjectIdImport.update({
+  id: '/project/$projectId',
+  path: '/project/$projectId',
+  getParentRoute: () => rootRoute,
+} as any)
+
+const LocalApiServerLogsRoute = LocalApiServerLogsImport.update({
  id: '/local-api-server/logs',
  path: '/local-api-server/logs',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const HubModelIdRoute = HubModelIdRouteImport.update({
+
+const HubModelIdRoute = HubModelIdImport.update({
  id: '/hub/$modelId',
  path: '/hub/$modelId',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
-const SettingsProvidersIndexRoute = SettingsProvidersIndexRouteImport.update({
+
+const SettingsProvidersIndexRoute = SettingsProvidersIndexImport.update({
  id: '/settings/providers/',
  path: '/settings/providers/',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)
+
 const SettingsProvidersProviderNameRoute =
-  SettingsProvidersProviderNameRouteImport.update({
+  SettingsProvidersProviderNameImport.update({
    id: '/settings/providers/$providerName',
    path: '/settings/providers/$providerName',
-    getParentRoute: () => rootRouteImport,
+    getParentRoute: () => rootRoute,
  } as any)
-const AuthGoogleCallbackRoute = AuthGoogleCallbackRouteImport.update({
+
+const AuthGoogleCallbackRoute = AuthGoogleCallbackImport.update({
  id: '/auth/google/callback',
  path: '/auth/google/callback',
-  getParentRoute: () => rootRouteImport,
+  getParentRoute: () => rootRoute,
 } as any)

+// Populate the FileRoutesByPath interface
+
+declare module '@tanstack/react-router' {
+  interface FileRoutesByPath {
+    '/': {
+      id: '/'
+      path: '/'
+      fullPath: '/'
+      preLoaderRoute: typeof IndexImport
+      parentRoute: typeof rootRoute
+    }
+    '/assistant': {
+      id: '/assistant'
+      path: '/assistant'
+      fullPath: '/assistant'
+      preLoaderRoute: typeof AssistantImport
+      parentRoute: typeof rootRoute
+    }
+    '/logs': {
+      id: '/logs'
+      path: '/logs'
+      fullPath: '/logs'
+      preLoaderRoute: typeof LogsImport
+      parentRoute: typeof rootRoute
+    }
+    '/system-monitor': {
+      id: '/system-monitor'
+      path: '/system-monitor'
+      fullPath: '/system-monitor'
+      preLoaderRoute: typeof SystemMonitorImport
+      parentRoute: typeof rootRoute
+    }
+    '/hub/$modelId': {
+      id: '/hub/$modelId'
+      path: '/hub/$modelId'
+      fullPath: '/hub/$modelId'
+      preLoaderRoute: typeof HubModelIdImport
+      parentRoute: typeof rootRoute
+    }
+    '/local-api-server/logs': {
+      id: '/local-api-server/logs'
+      path: '/local-api-server/logs'
+      fullPath: '/local-api-server/logs'
+      preLoaderRoute: typeof LocalApiServerLogsImport
+      parentRoute: typeof rootRoute
+    }
+    '/project/$projectId': {
+      id: '/project/$projectId'
+      path: '/project/$projectId'
+      fullPath: '/project/$projectId'
+      preLoaderRoute: typeof ProjectProjectIdImport
+      parentRoute: typeof rootRoute
+    }
+    '/settings/appearance': {
+      id: '/settings/appearance'
+      path: '/settings/appearance'
+      fullPath: '/settings/appearance'
+      preLoaderRoute: typeof SettingsAppearanceImport
+      parentRoute: typeof rootRoute
+    }
+    '/settings/extensions': {
+      id: '/settings/extensions'
+      path: '/settings/extensions'
+      fullPath: '/settings/extensions'
+      preLoaderRoute: typeof SettingsExtensionsImport
+      parentRoute: typeof rootRoute
+    }
+    '/settings/general': {
+      id: '/settings/general'
+      path: '/settings/general'
+      fullPath: '/settings/general'
+      preLoaderRoute: typeof SettingsGeneralImport
+      parentRoute: typeof rootRoute
+    }
+    '/settings/hardware': {
+      id: '/settings/hardware'
+      path: '/settings/hardware'
+      fullPath: '/settings/hardware'
+      preLoaderRoute: typeof SettingsHardwareImport
+      parentRoute: typeof rootRoute
+    }
+    '/settings/https-proxy': {
+      id: '/settings/https-proxy'
+      path: '/settings/https-proxy'
+      fullPath: '/settings/https-proxy'
+      preLoaderRoute: typeof SettingsHttpsProxyImport
+      parentRoute: typeof rootRoute
+    }
+    '/settings/local-api-server': {
+      id: '/settings/local-api-server'
+      path: '/settings/local-api-server'
+      fullPath: '/settings/local-api-server'
+      preLoaderRoute: typeof SettingsLocalApiServerImport
+      parentRoute: typeof rootRoute
+    }
+    '/settings/mcp-servers': {
+      id: '/settings/mcp-servers'
+      path: '/settings/mcp-servers'
+      fullPath: '/settings/mcp-servers'
+      preLoaderRoute: typeof SettingsMcpServersImport
+      parentRoute: typeof rootRoute
+    }
+    '/settings/privacy': {
+      id: '/settings/privacy'
+      path: '/settings/privacy'
+      fullPath: '/settings/privacy'
+      preLoaderRoute: typeof SettingsPrivacyImport
+      parentRoute: typeof rootRoute
+    }
+    '/settings/shortcuts': {
+      id: '/settings/shortcuts'
+      path: '/settings/shortcuts'
+      fullPath: '/settings/shortcuts'
+      preLoaderRoute: typeof SettingsShortcutsImport
+      parentRoute: typeof rootRoute
+    }
+    '/threads/$threadId': {
+      id: '/threads/$threadId'
+      path: '/threads/$threadId'
+      fullPath: '/threads/$threadId'
+      preLoaderRoute: typeof ThreadsThreadIdImport
+      parentRoute: typeof rootRoute
+    }
+    '/hub/': {
+      id: '/hub/'
+      path: '/hub'
+      fullPath: '/hub'
+      preLoaderRoute: typeof HubIndexImport
+      parentRoute: typeof rootRoute
+    }
+    '/project/': {
+      id: '/project/'
+      path: '/project'
+      fullPath: '/project'
+      preLoaderRoute: typeof ProjectIndexImport
+      parentRoute: typeof rootRoute
+    }
+    '/auth/google/callback': {
+      id: '/auth/google/callback'
+      path: '/auth/google/callback'
+      fullPath: '/auth/google/callback'
+      preLoaderRoute: typeof AuthGoogleCallbackImport
+      parentRoute: typeof rootRoute
+    }
+    '/settings/providers/$providerName': {
+      id: '/settings/providers/$providerName'
+      path: '/settings/providers/$providerName'
+      fullPath: '/settings/providers/$providerName'
+      preLoaderRoute: typeof SettingsProvidersProviderNameImport
+      parentRoute: typeof rootRoute
+    }
+    '/settings/providers/': {
+      id: '/settings/providers/'
+      path: '/settings/providers'
+      fullPath: '/settings/providers'
+      preLoaderRoute: typeof SettingsProvidersIndexImport
+      parentRoute: typeof rootRoute
+    }
+  }
+}
+
+// Create and export the route tree
+
 export interface FileRoutesByFullPath {
  '/': typeof IndexRoute
  '/assistant': typeof AssistantRoute
@ -139,6 +339,7 @@ export interface FileRoutesByFullPath {
  '/system-monitor': typeof SystemMonitorRoute
  '/hub/$modelId': typeof HubModelIdRoute
  '/local-api-server/logs': typeof LocalApiServerLogsRoute
+  '/project/$projectId': typeof ProjectProjectIdRoute
  '/settings/appearance': typeof SettingsAppearanceRoute
  '/settings/extensions': typeof SettingsExtensionsRoute
  '/settings/general': typeof SettingsGeneralRoute
@ -150,10 +351,12 @@ export interface FileRoutesByFullPath {
  '/settings/shortcuts': typeof SettingsShortcutsRoute
  '/threads/$threadId': typeof ThreadsThreadIdRoute
  '/hub': typeof HubIndexRoute
+  '/project': typeof ProjectIndexRoute
  '/auth/google/callback': typeof AuthGoogleCallbackRoute
  '/settings/providers/$providerName': typeof SettingsProvidersProviderNameRoute
  '/settings/providers': typeof SettingsProvidersIndexRoute
 }
+
 export interface FileRoutesByTo {
  '/': typeof IndexRoute
  '/assistant': typeof AssistantRoute
@ -161,6 +364,7 @@ export interface FileRoutesByTo {
  '/system-monitor': typeof SystemMonitorRoute
  '/hub/$modelId': typeof HubModelIdRoute
  '/local-api-server/logs': typeof LocalApiServerLogsRoute
+  '/project/$projectId': typeof ProjectProjectIdRoute
  '/settings/appearance': typeof SettingsAppearanceRoute
  '/settings/extensions': typeof SettingsExtensionsRoute
  '/settings/general': typeof SettingsGeneralRoute
@ -172,18 +376,21 @@ export interface FileRoutesByTo {
  '/settings/shortcuts': typeof SettingsShortcutsRoute
  '/threads/$threadId': typeof ThreadsThreadIdRoute
  '/hub': typeof HubIndexRoute
+  '/project': typeof ProjectIndexRoute
  '/auth/google/callback': typeof AuthGoogleCallbackRoute
  '/settings/providers/$providerName': typeof SettingsProvidersProviderNameRoute
  '/settings/providers': typeof SettingsProvidersIndexRoute
 }
+
 export interface FileRoutesById {
-  __root__: typeof rootRouteImport
+  '__root__': typeof rootRoute
  '/': typeof IndexRoute
  '/assistant': typeof AssistantRoute
  '/logs': typeof LogsRoute
  '/system-monitor': typeof SystemMonitorRoute
  '/hub/$modelId': typeof HubModelIdRoute
  '/local-api-server/logs': typeof LocalApiServerLogsRoute
+  '/project/$projectId': typeof ProjectProjectIdRoute
  '/settings/appearance': typeof SettingsAppearanceRoute
  '/settings/extensions': typeof SettingsExtensionsRoute
  '/settings/general': typeof SettingsGeneralRoute
@ -195,10 +402,12 @@ export interface FileRoutesById {
  '/settings/shortcuts': typeof SettingsShortcutsRoute
  '/threads/$threadId': typeof ThreadsThreadIdRoute
  '/hub/': typeof HubIndexRoute
+  '/project/': typeof ProjectIndexRoute
  '/auth/google/callback': typeof AuthGoogleCallbackRoute
  '/settings/providers/$providerName': typeof SettingsProvidersProviderNameRoute
  '/settings/providers/': typeof SettingsProvidersIndexRoute
 }
+
 export interface FileRouteTypes {
  fileRoutesByFullPath: FileRoutesByFullPath
  fullPaths:
@ -208,6 +417,7 @@ export interface FileRouteTypes {
    | '/system-monitor'
    | '/hub/$modelId'
    | '/local-api-server/logs'
+    | '/project/$projectId'
    | '/settings/appearance'
    | '/settings/extensions'
    | '/settings/general'
@ -219,6 +429,7 @@ export interface FileRouteTypes {
    | '/settings/shortcuts'
    | '/threads/$threadId'
    | '/hub'
+    | '/project'
    | '/auth/google/callback'
    | '/settings/providers/$providerName'
    | '/settings/providers'
@ -230,6 +441,7 @@ export interface FileRouteTypes {
    | '/system-monitor'
    | '/hub/$modelId'
    | '/local-api-server/logs'
+    | '/project/$projectId'
    | '/settings/appearance'
    | '/settings/extensions'
    | '/settings/general'
@ -241,6 +453,7 @@ export interface FileRouteTypes {
    | '/settings/shortcuts'
    | '/threads/$threadId'
    | '/hub'
+    | '/project'
    | '/auth/google/callback'
    | '/settings/providers/$providerName'
    | '/settings/providers'
@ -252,6 +465,7 @@ export interface FileRouteTypes {
    | '/system-monitor'
    | '/hub/$modelId'
    | '/local-api-server/logs'
+    | '/project/$projectId'
    | '/settings/appearance'
    | '/settings/extensions'
    | '/settings/general'
@ -263,11 +477,13 @@ export interface FileRouteTypes {
    | '/settings/shortcuts'
    | '/threads/$threadId'
    | '/hub/'
+    | '/project/'
    | '/auth/google/callback'
    | '/settings/providers/$providerName'
    | '/settings/providers/'
  fileRoutesById: FileRoutesById
 }
+
 export interface RootRouteChildren {
  IndexRoute: typeof IndexRoute
  AssistantRoute: typeof AssistantRoute
@ -275,6 +491,7 @@ export interface RootRouteChildren {
  SystemMonitorRoute: typeof SystemMonitorRoute
  HubModelIdRoute: typeof HubModelIdRoute
  LocalApiServerLogsRoute: typeof LocalApiServerLogsRoute
+  ProjectProjectIdRoute: typeof ProjectProjectIdRoute
  SettingsAppearanceRoute: typeof SettingsAppearanceRoute
  SettingsExtensionsRoute: typeof SettingsExtensionsRoute
  SettingsGeneralRoute: typeof SettingsGeneralRoute
@ -286,156 +503,12 @@ export interface RootRouteChildren {
  SettingsShortcutsRoute: typeof SettingsShortcutsRoute
  ThreadsThreadIdRoute: typeof ThreadsThreadIdRoute
  HubIndexRoute: typeof HubIndexRoute
+  ProjectIndexRoute: typeof ProjectIndexRoute
  AuthGoogleCallbackRoute: typeof AuthGoogleCallbackRoute
  SettingsProvidersProviderNameRoute: typeof SettingsProvidersProviderNameRoute
  SettingsProvidersIndexRoute: typeof SettingsProvidersIndexRoute
 }

-declare module '@tanstack/react-router' {
-  interface FileRoutesByPath {
-    '/system-monitor': {
-      id: '/system-monitor'
-      path: '/system-monitor'
-      fullPath: '/system-monitor'
-      preLoaderRoute: typeof SystemMonitorRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/logs': {
-      id: '/logs'
-      path: '/logs'
-      fullPath: '/logs'
-      preLoaderRoute: typeof LogsRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/assistant': {
-      id: '/assistant'
-      path: '/assistant'
-      fullPath: '/assistant'
-      preLoaderRoute: typeof AssistantRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/': {
-      id: '/'
-      path: '/'
-      fullPath: '/'
-      preLoaderRoute: typeof IndexRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/hub/': {
-      id: '/hub/'
-      path: '/hub'
-      fullPath: '/hub'
-      preLoaderRoute: typeof HubIndexRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/threads/$threadId': {
-      id: '/threads/$threadId'
-      path: '/threads/$threadId'
-      fullPath: '/threads/$threadId'
-      preLoaderRoute: typeof ThreadsThreadIdRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/settings/shortcuts': {
-      id: '/settings/shortcuts'
-      path: '/settings/shortcuts'
-      fullPath: '/settings/shortcuts'
-      preLoaderRoute: typeof SettingsShortcutsRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/settings/privacy': {
-      id: '/settings/privacy'
-      path: '/settings/privacy'
-      fullPath: '/settings/privacy'
-      preLoaderRoute: typeof SettingsPrivacyRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/settings/mcp-servers': {
-      id: '/settings/mcp-servers'
-      path: '/settings/mcp-servers'
-      fullPath: '/settings/mcp-servers'
-      preLoaderRoute: typeof SettingsMcpServersRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/settings/local-api-server': {
-      id: '/settings/local-api-server'
-      path: '/settings/local-api-server'
-      fullPath: '/settings/local-api-server'
-      preLoaderRoute: typeof SettingsLocalApiServerRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/settings/https-proxy': {
-      id: '/settings/https-proxy'
-      path: '/settings/https-proxy'
-      fullPath: '/settings/https-proxy'
-      preLoaderRoute: typeof SettingsHttpsProxyRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/settings/hardware': {
-      id: '/settings/hardware'
-      path: '/settings/hardware'
-      fullPath: '/settings/hardware'
-      preLoaderRoute: typeof SettingsHardwareRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/settings/general': {
-      id: '/settings/general'
-      path: '/settings/general'
-      fullPath: '/settings/general'
-      preLoaderRoute: typeof SettingsGeneralRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/settings/extensions': {
-      id: '/settings/extensions'
-      path: '/settings/extensions'
-      fullPath: '/settings/extensions'
-      preLoaderRoute: typeof SettingsExtensionsRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/settings/appearance': {
-      id: '/settings/appearance'
-      path: '/settings/appearance'
-      fullPath: '/settings/appearance'
-      preLoaderRoute: typeof SettingsAppearanceRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/local-api-server/logs': {
-      id: '/local-api-server/logs'
-      path: '/local-api-server/logs'
-      fullPath: '/local-api-server/logs'
-      preLoaderRoute: typeof LocalApiServerLogsRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/hub/$modelId': {
-      id: '/hub/$modelId'
-      path: '/hub/$modelId'
-      fullPath: '/hub/$modelId'
-      preLoaderRoute: typeof HubModelIdRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/settings/providers/': {
-      id: '/settings/providers/'
-      path: '/settings/providers'
-      fullPath: '/settings/providers'
-      preLoaderRoute: typeof SettingsProvidersIndexRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/settings/providers/$providerName': {
-      id: '/settings/providers/$providerName'
-      path: '/settings/providers/$providerName'
-      fullPath: '/settings/providers/$providerName'
-      preLoaderRoute: typeof SettingsProvidersProviderNameRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-    '/auth/google/callback': {
-      id: '/auth/google/callback'
-      path: '/auth/google/callback'
-      fullPath: '/auth/google/callback'
-      preLoaderRoute: typeof AuthGoogleCallbackRouteImport
-      parentRoute: typeof rootRouteImport
-    }
-  }
-}
-
 const rootRouteChildren: RootRouteChildren = {
  IndexRoute: IndexRoute,
  AssistantRoute: AssistantRoute,
@ -443,6 +516,7 @@ const rootRouteChildren: RootRouteChildren = {
  SystemMonitorRoute: SystemMonitorRoute,
  HubModelIdRoute: HubModelIdRoute,
  LocalApiServerLogsRoute: LocalApiServerLogsRoute,
+  ProjectProjectIdRoute: ProjectProjectIdRoute,
  SettingsAppearanceRoute: SettingsAppearanceRoute,
  SettingsExtensionsRoute: SettingsExtensionsRoute,
  SettingsGeneralRoute: SettingsGeneralRoute,
@ -454,10 +528,112 @@ const rootRouteChildren: RootRouteChildren = {
  SettingsShortcutsRoute: SettingsShortcutsRoute,
  ThreadsThreadIdRoute: ThreadsThreadIdRoute,
  HubIndexRoute: HubIndexRoute,
+  ProjectIndexRoute: ProjectIndexRoute,
  AuthGoogleCallbackRoute: AuthGoogleCallbackRoute,
  SettingsProvidersProviderNameRoute: SettingsProvidersProviderNameRoute,
  SettingsProvidersIndexRoute: SettingsProvidersIndexRoute,
 }
-export const routeTree = rootRouteImport
+
+export const routeTree = rootRoute
  ._addFileChildren(rootRouteChildren)
  ._addFileTypes<FileRouteTypes>()
+
+/* ROUTE_MANIFEST_START
+{
+  "routes": {
+    "__root__": {
+      "filePath": "__root.tsx",
+      "children": [
+        "/",
+        "/assistant",
+        "/logs",
+        "/system-monitor",
+        "/hub/$modelId",
+        "/local-api-server/logs",
+        "/project/$projectId",
+        "/settings/appearance",
+        "/settings/extensions",
+        "/settings/general",
+        "/settings/hardware",
+        "/settings/https-proxy",
+        "/settings/local-api-server",
+        "/settings/mcp-servers",
+        "/settings/privacy",
+        "/settings/shortcuts",
+        "/threads/$threadId",
+        "/hub/",
+        "/project/",
+        "/auth/google/callback",
+        "/settings/providers/$providerName",
+        "/settings/providers/"
+      ]
+    },
+    "/": {
+      "filePath": "index.tsx"
+    },
+    "/assistant": {
+      "filePath": "assistant.tsx"
+    },
+    "/logs": {
+      "filePath": "logs.tsx"
+    },
+    "/system-monitor": {
+      "filePath": "system-monitor.tsx"
+    },
+    "/hub/$modelId": {
+      "filePath": "hub/$modelId.tsx"
+    },
+    "/local-api-server/logs": {
+      "filePath": "local-api-server/logs.tsx"
+    },
+    "/project/$projectId": {
+      "filePath": "project/$projectId.tsx"
+    },
+    "/settings/appearance": {
+      "filePath": "settings/appearance.tsx"
+    },
+    "/settings/extensions": {
+      "filePath": "settings/extensions.tsx"
+    },
+    "/settings/general": {
+      "filePath": "settings/general.tsx"
+    },
+    "/settings/hardware": {
+      "filePath": "settings/hardware.tsx"
+    },
+    "/settings/https-proxy": {
+      "filePath": "settings/https-proxy.tsx"
+    },
+    "/settings/local-api-server": {
+      "filePath": "settings/local-api-server.tsx"
+    },
+    "/settings/mcp-servers": {
+      "filePath": "settings/mcp-servers.tsx"
+    },
+    "/settings/privacy": {
+      "filePath": "settings/privacy.tsx"
+    },
+    "/settings/shortcuts": {
+      "filePath": "settings/shortcuts.tsx"
+    },
+    "/threads/$threadId": {
+      "filePath": "threads/$threadId.tsx"
+    },
+    "/hub/": {
+      "filePath": "hub/index.tsx"
+    },
+    "/project/": {
+      "filePath": "project/index.tsx"
+    },
+    "/auth/google/callback": {
+      "filePath": "auth.google.callback.tsx"
+    },
+    "/settings/providers/$providerName": {
+      "filePath": "settings/providers/$providerName.tsx"
+    },
+    "/settings/providers/": {
+      "filePath": "settings/providers/index.tsx"
+    }
+  }
+}
+ROUTE_MANIFEST_END */
--- a/web-app/src/routes/assistant.tsx
+++ b/web-app/src/routes/assistant.tsx
@ -12,6 +12,7 @@ import { AvatarEmoji } from '@/containers/AvatarEmoji'
 import { useTranslation } from '@/i18n/react-i18next-compat'
 import { PlatformGuard } from '@/lib/platform/PlatformGuard'
 import { PlatformFeature } from '@/lib/platform/types'
+import { Button } from '@/components/ui/button'

 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 export const Route = createFileRoute(route.assistant as any)({
@ -59,74 +60,73 @@ function AssistantContent() {
  }

  return (
-    <div className="flex h-full flex-col flex-justify-center">
+    <div className="flex h-full flex-col justify-center">
      <HeaderPage>
-        <span>{t('assistants:title')}</span>
+        <div className="flex items-center justify-between w-full mr-2">
+          <span>{t('assistants:title')}</span>
+          <Button
+            onClick={() => {
+              setEditingKey(null)
+              setOpen(true)
+            }}
+            size="sm"
+            className="relative z-50"
+          >
+            <IconCirclePlus size={16} />
+            Add Assistant
+          </Button>
+        </div>
      </HeaderPage>
-      <div className="h-full p-4 overflow-y-auto">
-        <div className="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4">
+      <div className="h-full p-4 w-3/4 mx-auto overflow-y-auto mt-2">
+        <div className="space-y-3">
          {assistants
            .slice()
            .sort((a, b) => a.created_at - b.created_at)
            .map((assistant) => (
              <div
-                className="bg-main-view-fg/3 p-3 rounded-md"
+                className="bg-main-view-fg/3 py-2 px-4 rounded-lg flex items-center gap-4"
                key={assistant.id}
              >
-                <div className="flex items-center justify-between gap-2">
-                  <h3 className="text-base font-medium text-main-view-fg/80">
-                    <div className="flex items-center gap-1">
-                      {assistant?.avatar && (
-                        <span className="shrink-0 w-4 h-4 relative flex items-center justify-center">
-                          <AvatarEmoji
-                            avatar={assistant?.avatar}
-                            imageClassName="object-cover"
-                            textClassName="text-sm"
-                          />
-                        </span>
-                      )}
-                      <span className="line-clamp-1">{assistant.name}</span>
-                    </div>
-                  </h3>
-                  <div className="flex items-center gap-0.5">
-                    <div
-                      className="size-6 cursor-pointer flex items-center justify-center rounded hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
-                      title={t('assistants:editAssistant')}
-                      onClick={() => {
-                        setEditingKey(assistant.id)
-                        setOpen(true)
-                      }}
-                    >
-                      <IconPencil size={18} className="text-main-view-fg/50" />
-                    </div>
-                    <div
-                      className="size-6 cursor-pointer flex items-center justify-center rounded hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
-                      title={t('assistants:deleteAssistant')}
-                      onClick={() => handleDelete(assistant.id)}
-                    >
-                      <IconTrash size={18} className="text-main-view-fg/50" />
+                <div className="flex items-start gap-3 flex-1">
+                  {assistant?.avatar && (
+                    <div className="shrink-0 w-8 h-8 relative flex items-center justify-center bg-main-view-fg/4 rounded-md">
+                      <AvatarEmoji
+                        avatar={assistant?.avatar}
+                        imageClassName="w-5 h-5 object-contain"
+                        textClassName="text-lg"
+                      />
                    </div>
+                  )}
+                  <div className="flex-1 min-w-0">
+                    <h3 className="text-base font-medium text-main-view-fg/80 line-clamp-1">
+                      {assistant.name}
+                    </h3>
+                    <p className="text-main-view-fg/50 text-sm line-clamp-2 mt-0.5">
+                      {assistant.description}
+                    </p>
                  </div>
                </div>
-                <p
-                  className="text-main-view-fg/50 mt-1 line-clamp-2"
-                  title={assistant.description}
-                >
-                  {assistant.description}
-                </p>
+                <div className="flex items-center">
+                  <button
+                    className="size-8 cursor-pointer flex items-center justify-center rounded-md hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
+                    title={t('assistants:editAssistant')}
+                    onClick={() => {
+                      setEditingKey(assistant.id)
+                      setOpen(true)
+                    }}
+                  >
+                    <IconPencil size={16} className="text-main-view-fg/50" />
+                  </button>
+                  <button
+                    className="size-8 cursor-pointer flex items-center justify-center rounded-md hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
+                    title={t('assistants:deleteAssistant')}
+                    onClick={() => handleDelete(assistant.id)}
+                  >
+                    <IconTrash size={16} className="text-main-view-fg/50" />
+                  </button>
+                </div>
              </div>
            ))}
-
-          <div
-            className="bg-main-view p-3 min-h-[88px] rounded-md border border-dashed border-main-view-fg/10 flex items-center justify-center cursor-pointer hover:bg-main-view-fg/1 transition-all duration-200 ease-in-out"
-            key="new-assistant"
-            onClick={() => {
-              setEditingKey(null)
-              setOpen(true)
-            }}
-          >
-            <IconCirclePlus className="text-main-view-fg/50" />
-          </div>
        </div>
        <AddEditAssistant
          open={open}
--- a/web-app/src/routes/hub/$modelId.tsx
+++ b/web-app/src/routes/hub/$modelId.tsx
@ -21,10 +21,7 @@ import { useEffect, useMemo, useCallback, useState } from 'react'
 import { useModelProvider } from '@/hooks/useModelProvider'
 import { useDownloadStore } from '@/hooks/useDownloadStore'
 import { useServiceHub } from '@/hooks/useServiceHub'
-import type {
-  CatalogModel,
-  ModelQuant,
-} from '@/services/models/types'
+import type { CatalogModel, ModelQuant } from '@/services/models/types'
 import { Progress } from '@/components/ui/progress'
 import { Button } from '@/components/ui/button'
 import { cn } from '@/lib/utils'
@ -80,12 +77,13 @@ function HubModelDetailContent() {
  }, [fetchSources])

  const fetchRepo = useCallback(async () => {
-    const repoInfo = await serviceHub.models().fetchHuggingFaceRepo(
-      search.repo || modelId,
-      huggingfaceToken
-    )
+    const repoInfo = await serviceHub
+      .models()
+      .fetchHuggingFaceRepo(search.repo || modelId, huggingfaceToken)
    if (repoInfo) {
-      const repoDetail = serviceHub.models().convertHfRepoToCatalogModel(repoInfo)
+      const repoDetail = serviceHub
+        .models()
+        .convertHfRepoToCatalogModel(repoInfo)
      setRepoData(repoDetail || undefined)
    }
  }, [serviceHub, modelId, search, huggingfaceToken])
@ -168,7 +166,9 @@ function HubModelDetailContent() {
      try {
        // Use the HuggingFace path for the model
        const modelPath = variant.path
-        const supported = await serviceHub.models().isModelSupported(modelPath, 8192)
+        const supported = await serviceHub
+          .models()
+          .isModelSupported(modelPath, 8192)
        setModelSupportStatus((prev) => ({
          ...prev,
          [modelKey]: supported,
@ -473,12 +473,20 @@ function HubModelDetailContent() {
                                        addLocalDownloadingModel(
                                          variant.model_id
                                        )
-                                        serviceHub.models().pullModelWithMetadata(
-                                          variant.model_id,
-                                          variant.path,
-                                          modelData.mmproj_models?.[0]?.path,
-                                          huggingfaceToken
-                                        )
+                                        serviceHub
+                                          .models()
+                                          .pullModelWithMetadata(
+                                            variant.model_id,
+                                            variant.path,
+                                            (
+                                              modelData.mmproj_models?.find(
+                                                (e) =>
+                                                  e.model_id.toLowerCase() ===
+                                                  'mmproj-f16'
+                                              ) || modelData.mmproj_models?.[0]
+                                            )?.path,
+                                            huggingfaceToken
+                                          )
                                      }}
                                      className={cn(isDownloading && 'hidden')}
                                    >
--- a/web-app/src/routes/hub/index.tsx
+++ b/web-app/src/routes/hub/index.tsx
@ -1,6 +1,6 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import { useVirtualizer } from '@tanstack/react-virtual'
-import { createFileRoute, useNavigate, useSearch } from '@tanstack/react-router'
+import { createFileRoute, useNavigate } from '@tanstack/react-router'
 import { route } from '@/constants/routes'
 import { useModelSources } from '@/hooks/useModelSources'
 import { cn } from '@/lib/utils'
@ -34,8 +34,6 @@ import {
  TooltipTrigger,
 } from '@/components/ui/tooltip'
 import { ModelInfoHoverCard } from '@/containers/ModelInfoHoverCard'
-import Joyride, { CallBackProps, STATUS } from 'react-joyride'
-import { CustomTooltipJoyRide } from '@/containers/CustomeTooltipJoyRide'
 import {
  DropdownMenu,
  DropdownMenuContent,
@ -51,10 +49,9 @@ import { Loader } from 'lucide-react'
 import { useTranslation } from '@/i18n/react-i18next-compat'
 import Fuse from 'fuse.js'
 import { useGeneralSetting } from '@/hooks/useGeneralSetting'
+import { DownloadButtonPlaceholder } from '@/containers/DownloadButton'
+import { useShallow } from 'zustand/shallow'

-type ModelProps = {
-  model: CatalogModel
-}
 type SearchParams = {
  repo: string
 }
@ -77,7 +74,7 @@ function Hub() {

 function HubContent() {
  const parentRef = useRef(null)
-  const { huggingfaceToken } = useGeneralSetting()
+  const huggingfaceToken = useGeneralSetting((state) => state.huggingfaceToken)
  const serviceHub = useServiceHub()

  const { t } = useTranslation()
@ -93,7 +90,13 @@ function HubContent() {
    }
  }, [])

-  const { sources, fetchSources, loading } = useModelSources()
+  const { sources, fetchSources, loading } = useModelSources(
+    useShallow((state) => ({
+      sources: state.sources,
+      fetchSources: state.fetchSources,
+      loading: state.loading,
+    }))
+  )

  const [searchValue, setSearchValue] = useState('')
  const [sortSelected, setSortSelected] = useState('newest')
@ -108,16 +111,9 @@ function HubContent() {
  const [modelSupportStatus, setModelSupportStatus] = useState<
    Record<string, 'RED' | 'YELLOW' | 'GREEN' | 'LOADING'>
  >({})
-  const [joyrideReady, setJoyrideReady] = useState(false)
-  const [currentStepIndex, setCurrentStepIndex] = useState(0)
  const addModelSourceTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(
    null
  )
-  const downloadButtonRef = useRef<HTMLButtonElement>(null)
-  const hasTriggeredDownload = useRef(false)
-
-  const { getProviderByName } = useModelProvider()
-  const llamaProvider = getProviderByName('llamacpp')

  const toggleModelExpansion = (modelId: string) => {
    setExpandedModels((prev) => ({
@ -168,9 +164,10 @@ function HubContent() {
        ?.map((model) => ({
          ...model,
          quants: model.quants.filter((variant) =>
-            llamaProvider?.models.some(
-              (m: { id: string }) => m.id === variant.model_id
-            )
+            useModelProvider
+              .getState()
+              .getProviderByName('llamacpp')
+              ?.models.some((m: { id: string }) => m.id === variant.model_id)
          ),
        }))
        .filter((model) => model.quants.length > 0)
@ -186,7 +183,6 @@ function HubContent() {
    showOnlyDownloaded,
    huggingFaceRepo,
    searchOptions,
-    llamaProvider?.models,
  ])

  // The virtualizer
@ -215,9 +211,13 @@ function HubContent() {

    addModelSourceTimeoutRef.current = setTimeout(async () => {
      try {
-        const repoInfo = await serviceHub.models().fetchHuggingFaceRepo(searchValue, huggingfaceToken)
+        const repoInfo = await serviceHub
+          .models()
+          .fetchHuggingFaceRepo(searchValue, huggingfaceToken)
        if (repoInfo) {
-          const catalogModel = serviceHub.models().convertHfRepoToCatalogModel(repoInfo)
+          const catalogModel = serviceHub
+            .models()
+            .convertHfRepoToCatalogModel(repoInfo)
          if (
            !sources.some(
              (s) =>
@ -303,7 +303,9 @@ function HubContent() {
      try {
        // Use the HuggingFace path for the model
        const modelPath = variant.path
-        const supportStatus = await serviceHub.models().isModelSupported(modelPath, 8192)
+        const supportStatus = await serviceHub
+          .models()
+          .isModelSupported(modelPath, 8192)

        setModelSupportStatus((prev) => ({
          ...prev,
@ -320,178 +322,7 @@ function HubContent() {
    [modelSupportStatus, serviceHub]
  )

-  const DownloadButtonPlaceholder = useMemo(() => {
-    return ({ model }: ModelProps) => {
-      // Check if this is a HuggingFace repository (no quants)
-      if (model.quants.length === 0) {
-        return (
-          <div className="flex items-center gap-2">
-            <Button
-              size="sm"
-              onClick={() => {
-                window.open(
-                  `https://huggingface.co/${model.model_name}`,
-                  '_blank'
-                )
-              }}
-            >
-              View on HuggingFace
-            </Button>
-          </div>
-        )
-      }
-
-      const quant =
-        model.quants.find((e) =>
-          defaultModelQuantizations.some((m) =>
-            e.model_id.toLowerCase().includes(m)
-          )
-        ) ?? model.quants[0]
-      const modelId = quant?.model_id || model.model_name
-      const modelUrl = quant?.path || modelId
-      const isDownloading =
-        localDownloadingModels.has(modelId) ||
-        downloadProcesses.some((e) => e.id === modelId)
-      const downloadProgress =
-        downloadProcesses.find((e) => e.id === modelId)?.progress || 0
-      const isDownloaded = llamaProvider?.models.some(
-        (m: { id: string }) => m.id === modelId
-      )
-      const isRecommended = isRecommendedModel(model.model_name)
-
-      const handleDownload = () => {
-        // Immediately set local downloading state
-        addLocalDownloadingModel(modelId)
-        const mmprojPath = model.mmproj_models?.[0]?.path
-        serviceHub.models().pullModelWithMetadata(
-          modelId, 
-          modelUrl,
-          mmprojPath,
-          huggingfaceToken
-        )
-      }
-
-      return (
-        <div
-          className={cn(
-            'flex items-center',
-            isRecommended && 'hub-download-button-step'
-          )}
-        >
-          {isDownloading && !isDownloaded && (
-            <div className={cn('flex items-center gap-2 w-20')}>
-              <Progress value={downloadProgress * 100} />
-              <span className="text-xs text-center text-main-view-fg/70">
-                {Math.round(downloadProgress * 100)}%
-              </span>
-            </div>
-          )}
-          {isDownloaded ? (
-            <Button
-              size="sm"
-              onClick={() => handleUseModel(modelId)}
-              data-test-id={`hub-model-${modelId}`}
-            >
-              {t('hub:use')}
-            </Button>
-          ) : (
-            <Button
-              data-test-id={`hub-model-${modelId}`}
-              size="sm"
-              onClick={handleDownload}
-              className={cn(isDownloading && 'hidden')}
-              ref={isRecommended ? downloadButtonRef : undefined}
-            >
-              {t('hub:download')}
-            </Button>
-          )}
-        </div>
-      )
-    }
-  }, [
-    localDownloadingModels,
-    downloadProcesses,
-    llamaProvider?.models,
-    isRecommendedModel,
-    t,
-    addLocalDownloadingModel,
-    huggingfaceToken,
-    handleUseModel,
-    serviceHub,
-  ])
-
-  const { step } = useSearch({ from: Route.id })
-  const isSetup = step === 'setup_local_provider'
-
-  // Wait for DOM to be ready before starting Joyride
-  useEffect(() => {
-    if (!loading && filteredModels.length > 0 && isSetup) {
-      const timer = setTimeout(() => {
-        setJoyrideReady(true)
-      }, 100)
-      return () => clearTimeout(timer)
-    } else {
-      setJoyrideReady(false)
-    }
-  }, [loading, filteredModels.length, isSetup])
-
-  const handleJoyrideCallback = (data: CallBackProps) => {
-    const { status, index } = data
-
-    if (
-      status === STATUS.FINISHED &&
-      !isDownloading &&
-      isLastStep &&
-      !hasTriggeredDownload.current
-    ) {
-      const recommendedModel = filteredModels.find((model) =>
-        isRecommendedModel(model.model_name)
-      )
-      if (recommendedModel && recommendedModel.quants[0]?.model_id) {
-        if (downloadButtonRef.current) {
-          hasTriggeredDownload.current = true
-          downloadButtonRef.current.click()
-        }
-        return
-      }
-    }
-
-    if (status === STATUS.FINISHED) {
-      navigate({
-        to: route.hub.index,
-      })
-    }
-
-    // Track current step index
-    setCurrentStepIndex(index)
-  }
-
-  // Check if any model is currently downloading
-  const isDownloading =
-    localDownloadingModels.size > 0 || downloadProcesses.length > 0
-
-  const steps = [
-    {
-      target: '.hub-model-card-step',
-      title: t('hub:joyride.recommendedModelTitle'),
-      disableBeacon: true,
-      content: t('hub:joyride.recommendedModelContent'),
-    },
-    {
-      target: '.hub-download-button-step',
-      title: isDownloading
-        ? t('hub:joyride.downloadInProgressTitle')
-        : t('hub:joyride.downloadModelTitle'),
-      disableBeacon: true,
-      content: isDownloading
-        ? t('hub:joyride.downloadInProgressContent')
-        : t('hub:joyride.downloadModelContent'),
-    },
-  ]
-
  // Check if we're on the last step
-  const isLastStep = currentStepIndex === steps.length - 1
-
  const renderFilter = () => {
    return (
      <>
@ -544,31 +375,6 @@ function HubContent() {

  return (
    <>
-      <Joyride
-        run={joyrideReady}
-        floaterProps={{
-          hideArrow: true,
-        }}
-        steps={steps}
-        tooltipComponent={CustomTooltipJoyRide}
-        spotlightPadding={0}
-        continuous={true}
-        showSkipButton={!isLastStep}
-        hideCloseButton={true}
-        spotlightClicks={true}
-        disableOverlay={IS_LINUX}
-        disableOverlayClose={true}
-        callback={handleJoyrideCallback}
-        locale={{
-          back: t('hub:joyride.back'),
-          close: t('hub:joyride.close'),
-          last: !isDownloading
-            ? t('hub:joyride.lastWithDownload')
-            : t('hub:joyride.last'),
-          next: t('hub:joyride.next'),
-          skip: t('hub:joyride.skip'),
-        }}
-      />
      <div className="flex h-full w-full">
        <div className="flex flex-col h-full w-full ">
          <HeaderPage>
@ -698,6 +504,7 @@ function HubContent() {
                                />
                                <DownloadButtonPlaceholder
                                  model={filteredModels[virtualItem.index]}
+                                  handleUseModel={handleUseModel}
                                />
                              </div>
                            </div>
@ -908,10 +715,13 @@ function HubContent() {
                                                (e) => e.id === variant.model_id
                                              )?.progress || 0
                                            const isDownloaded =
-                                              llamaProvider?.models.some(
-                                                (m: { id: string }) =>
-                                                  m.id === variant.model_id
-                                              )
+                                              useModelProvider
+                                                .getState()
+                                                .getProviderByName('llamacpp')
+                                                ?.models.some(
+                                                  (m: { id: string }) =>
+                                                    m.id === variant.model_id
+                                                )

                                            if (isDownloading) {
                                              return (
@ -962,14 +772,26 @@ function HubContent() {
                                                  addLocalDownloadingModel(
                                                    variant.model_id
                                                  )
-                                                  serviceHub.models().pullModelWithMetadata(
-                                                    variant.model_id,
-                                                    variant.path,
-                                                    filteredModels[
-                                                      virtualItem.index
-                                                    ].mmproj_models?.[0]?.path,
-                                                    huggingfaceToken
-                                                  )
+                                                  serviceHub
+                                                    .models()
+                                                    .pullModelWithMetadata(
+                                                      variant.model_id,
+                                                      variant.path,
+
+                                                      (
+                                                        filteredModels[
+                                                          virtualItem.index
+                                                        ].mmproj_models?.find(
+                                                          (e) =>
+                                                            e.model_id.toLowerCase() ===
+                                                            'mmproj-f16'
+                                                        ) ||
+                                                        filteredModels[
+                                                          virtualItem.index
+                                                        ].mmproj_models?.[0]
+                                                      )?.path,
+                                                      huggingfaceToken
+                                                    )
                                                }}
                                              >
                                                <IconDownload
--- a/web-app/src/routes/project/$projectId.tsx
+++ b/web-app/src/routes/project/$projectId.tsx
@ -0,0 +1,143 @@
+import { createFileRoute, useParams } from '@tanstack/react-router'
+import { useMemo } from 'react'
+
+import { useThreadManagement } from '@/hooks/useThreadManagement'
+import { useThreads } from '@/hooks/useThreads'
+import { useModelProvider } from '@/hooks/useModelProvider'
+import { useTranslation } from '@/i18n/react-i18next-compat'
+
+import ChatInput from '@/containers/ChatInput'
+import HeaderPage from '@/containers/HeaderPage'
+import SetupScreen from '@/containers/SetupScreen'
+import ThreadList from '@/containers/ThreadList'
+import DropdownAssistant from '@/containers/DropdownAssistant'
+
+import { PlatformFeatures } from '@/lib/platform/const'
+import { PlatformFeature } from '@/lib/platform/types'
+import { IconMessage } from '@tabler/icons-react'
+import { cn } from '@/lib/utils'
+import { useAppearance } from '@/hooks/useAppearance'
+import { useSmallScreen } from '@/hooks/useMediaQuery'
+
+export const Route = createFileRoute('/project/$projectId')({
+  component: ProjectPage,
+})
+
+function ProjectPage() {
+  const { t } = useTranslation()
+  const { projectId } = useParams({ from: '/project/$projectId' })
+  const { providers } = useModelProvider()
+  const { getFolderById } = useThreadManagement()
+  const threads = useThreads((state) => state.threads)
+
+  const chatWidth = useAppearance((state) => state.chatWidth)
+  const isSmallScreen = useSmallScreen()
+
+  // Find the project
+  const project = getFolderById(projectId)
+
+  // Get threads for this project
+  const projectThreads = useMemo(() => {
+    return Object.values(threads)
+      .filter((thread) => thread.metadata?.project?.id === projectId)
+      .sort((a, b) => (b.updated || 0) - (a.updated || 0))
+  }, [threads, projectId])
+
+  // Conditional to check if there are any valid providers
+  const hasValidProviders = providers.some(
+    (provider) =>
+      provider.api_key?.length ||
+      (provider.provider === 'llamacpp' && provider.models.length) ||
+      (provider.provider === 'jan' && provider.models.length)
+  )
+
+  if (!hasValidProviders) {
+    return <SetupScreen />
+  }
+
+  if (!project) {
+    return (
+      <div className="flex h-full flex-col items-center justify-center">
+        <div className="text-center">
+          <h1 className="text-2xl font-semibold text-main-view-fg mb-2">
+            {t('projects.projectNotFound')}
+          </h1>
+          <p className="text-main-view-fg/70">
+            {t('projects.projectNotFoundDesc')}
+          </p>
+        </div>
+      </div>
+    )
+  }
+
+  return (
+    <div className="flex h-full flex-col">
+      <HeaderPage>
+        <div className="flex items-center justify-between w-full">
+          {PlatformFeatures[PlatformFeature.ASSISTANTS] && (
+            <DropdownAssistant />
+          )}
+        </div>
+      </HeaderPage>
+
+      <div className="h-full relative flex flex-col justify-between px-4 md:px-8 py-4 overflow-y-auto">
+        <div
+          className={cn(
+            'mx-auto flex h-full flex-col justify-between',
+            chatWidth === 'compact' ? 'w-full md:w-4/6' : 'w-full',
+            isSmallScreen && 'w-full'
+          )}
+        >
+          <div className="flex h-full flex-col">
+            <div className="mb-6 mt-2">
+              {projectThreads.length > 0 && (
+                <>
+                  <h2 className="text-xl font-semibold text-main-view-fg mb-2">
+                    {t('projects.conversationsIn', { projectName: project.name })}
+                  </h2>
+                  <p className="text-main-view-fg/70">
+                    {t('projects.conversationsDescription')}
+                  </p>
+                </>
+              )}
+            </div>
+
+            {/* Thread List or Empty State */}
+            <div className="mb-0">
+              {projectThreads.length > 0 ? (
+                <ThreadList threads={projectThreads} variant="project" />
+              ) : (
+                <div className="flex flex-col items-center justify-center py-12 text-center">
+                  <IconMessage
+                    size={48}
+                    className="text-main-view-fg/30 mb-4"
+                  />
+                  <h3 className="text-lg font-medium text-main-view-fg/60 mb-2">
+                    {t('projects.noConversationsIn', { projectName: project.name })}
+                  </h3>
+                  <p className="text-main-view-fg/50 text-sm">
+                    {t('projects.startNewConversation', { projectName: project.name })}
+                  </p>
+                </div>
+              )}
+            </div>
+          </div>
+        </div>
+      </div>
+      {/* New Chat Input */}
+      <div
+        className={cn(
+          'mx-auto pt-2 pb-3 shrink-0 relative px-2',
+          chatWidth === 'compact' ? 'w-full md:w-4/6' : 'w-full',
+          isSmallScreen && 'w-full'
+        )}
+      >
+        <ChatInput
+          showSpeedToken={false}
+          initialMessage={true}
+          projectId={projectId}
+        />
+      </div>
+    </div>
+  )
+}
--- a/web-app/src/routes/project/index.tsx
+++ b/web-app/src/routes/project/index.tsx
@ -0,0 +1,244 @@
+import { createFileRoute } from '@tanstack/react-router'
+import { useState, useMemo } from 'react'
+
+import { useThreadManagement } from '@/hooks/useThreadManagement'
+import { useThreads } from '@/hooks/useThreads'
+import { useTranslation } from '@/i18n/react-i18next-compat'
+
+import HeaderPage from '@/containers/HeaderPage'
+import ThreadList from '@/containers/ThreadList'
+import {
+  IconCirclePlus,
+  IconPencil,
+  IconTrash,
+  IconFolder,
+  IconChevronDown,
+  IconChevronRight,
+} from '@tabler/icons-react'
+import AddProjectDialog from '@/containers/dialogs/AddProjectDialog'
+import { DeleteProjectDialog } from '@/containers/dialogs/DeleteProjectDialog'
+import { Button } from '@/components/ui/button'
+
+import { formatDate } from '@/utils/formatDate'
+
+export const Route = createFileRoute('/project/')({
+  component: Project,
+})
+
+function Project() {
+  return <ProjectContent />
+}
+
+function ProjectContent() {
+  const { t } = useTranslation()
+  const { folders, addFolder, updateFolder, deleteFolder, getFolderById } =
+    useThreadManagement()
+  const threads = useThreads((state) => state.threads)
+  const [open, setOpen] = useState(false)
+  const [editingKey, setEditingKey] = useState<string | null>(null)
+  const [deleteConfirmOpen, setDeleteConfirmOpen] = useState(false)
+  const [deletingId, setDeletingId] = useState<string | null>(null)
+  const [expandedProjects, setExpandedProjects] = useState<Set<string>>(
+    new Set()
+  )
+
+  const handleDelete = (id: string) => {
+    setDeletingId(id)
+    setDeleteConfirmOpen(true)
+  }
+
+  const confirmDelete = () => {
+    if (deletingId) {
+      deleteFolder(deletingId)
+      setDeleteConfirmOpen(false)
+      setDeletingId(null)
+    }
+  }
+
+  const handleSave = (name: string) => {
+    if (editingKey) {
+      updateFolder(editingKey, name)
+    } else {
+      addFolder(name)
+    }
+    setOpen(false)
+    setEditingKey(null)
+  }
+
+  const formatProjectDate = (timestamp: number) => {
+    return formatDate(new Date(timestamp), { includeTime: false })
+  }
+
+  // Get threads for a specific project
+  const getThreadsForProject = useMemo(() => {
+    return (projectId: string) => {
+      return Object.values(threads)
+        .filter((thread) => thread.metadata?.project?.id === projectId)
+        .sort((a, b) => (b.updated || 0) - (a.updated || 0))
+    }
+  }, [threads])
+
+  const toggleProjectExpansion = (projectId: string) => {
+    setExpandedProjects((prev) => {
+      const newSet = new Set(prev)
+      if (newSet.has(projectId)) {
+        newSet.delete(projectId)
+      } else {
+        newSet.add(projectId)
+      }
+      return newSet
+    })
+  }
+
+  return (
+    <div className="flex h-full flex-col justify-center">
+      <HeaderPage>
+        <div className="flex items-center justify-between w-full mr-2">
+          <span>{t('projects.title')}</span>
+          <Button
+            onClick={() => {
+              setEditingKey(null)
+              setOpen(true)
+            }}
+            size="sm"
+            className="relative z-50"
+          >
+            <IconCirclePlus size={16} />
+            {t('projects.addProject')}
+          </Button>
+        </div>
+      </HeaderPage>
+      <div className="h-full overflow-y-auto flex flex-col">
+        <div className="p-4 w-full md:w-3/4 mx-auto mt-2">
+          {folders.length === 0 ? (
+            <div className="flex flex-col items-center justify-center py-12 text-center">
+              <IconFolder size={48} className="text-main-view-fg/30 mb-4" />
+              <h3 className="text-lg font-medium text-main-view-fg/60 mb-2">
+                {t('projects.noProjectsYet')}
+              </h3>
+              <p className="text-main-view-fg/50 text-sm">
+                {t('projects.noProjectsYetDesc')}
+              </p>
+            </div>
+          ) : (
+            <div className="space-y-3">
+              {folders
+                .slice()
+                .sort((a, b) => b.updated_at - a.updated_at)
+                .map((folder) => {
+                  const projectThreads = getThreadsForProject(folder.id)
+                  const isExpanded = expandedProjects.has(folder.id)
+
+                  return (
+                    <div
+                      className="bg-main-view-fg/3 py-2 px-4 rounded-lg"
+                      key={folder.id}
+                    >
+                      <div className="flex items-center gap-4">
+                        <div className="flex items-start gap-3 flex-1">
+                          <div className="shrink-0 w-8 h-8 relative flex items-center justify-center bg-main-view-fg/4 rounded-md">
+                            <IconFolder
+                              size={16}
+                              className="text-main-view-fg/50"
+                            />
+                          </div>
+                          <div className="flex-1 min-w-0">
+                            <div className="flex items-center gap-2">
+                              <h3 className="text-base font-medium text-main-view-fg/80 line-clamp-1">
+                                {folder.name}
+                              </h3>
+                              <span className="text-xs bg-main-view-fg/10 text-main-view-fg/60 px-2 py-0.5 rounded-full">
+                                {projectThreads.length}{' '}
+                                {projectThreads.length === 1
+                                  ? t('projects.thread')
+                                  : t('projects.threads')}
+                              </span>
+                            </div>
+                            <p className="text-main-view-fg/50 text-xs line-clamp-2 mt-0.5">
+                              {t('projects.updated')}{' '}
+                              {formatProjectDate(folder.updated_at)}
+                            </p>
+                          </div>
+                        </div>
+                        <div className="flex items-center">
+                          {projectThreads.length > 0 && (
+                            <button
+                              className="size-8 cursor-pointer flex items-center justify-center rounded-md hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out mr-1"
+                              title={
+                                isExpanded
+                                  ? t('projects.collapseThreads')
+                                  : t('projects.expandThreads')
+                              }
+                              onClick={() => toggleProjectExpansion(folder.id)}
+                            >
+                              {isExpanded ? (
+                                <IconChevronDown
+                                  size={16}
+                                  className="text-main-view-fg/50"
+                                />
+                              ) : (
+                                <IconChevronRight
+                                  size={16}
+                                  className="text-main-view-fg/50"
+                                />
+                              )}
+                            </button>
+                          )}
+                          <button
+                            className="size-8 cursor-pointer flex items-center justify-center rounded-md hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
+                            title={t('projects.editProject')}
+                            onClick={() => {
+                              setEditingKey(folder.id)
+                              setOpen(true)
+                            }}
+                          >
+                            <IconPencil
+                              size={16}
+                              className="text-main-view-fg/50"
+                            />
+                          </button>
+                          <button
+                            className="size-8 cursor-pointer flex items-center justify-center rounded-md hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
+                            title={t('projects.deleteProject')}
+                            onClick={() => handleDelete(folder.id)}
+                          >
+                            <IconTrash
+                              size={16}
+                              className="text-main-view-fg/50"
+                            />
+                          </button>
+                        </div>
+                      </div>
+
+                      {/* Thread List */}
+                      {isExpanded && projectThreads.length > 0 && (
+                        <div className="mt-3 pl-2">
+                          <ThreadList
+                            threads={projectThreads}
+                            variant="project"
+                          />
+                        </div>
+                      )}
+                    </div>
+                  )
+                })}
+            </div>
+          )}
+        </div>
+      </div>
+      <AddProjectDialog
+        open={open}
+        onOpenChange={setOpen}
+        editingKey={editingKey}
+        initialData={editingKey ? getFolderById(editingKey) : undefined}
+        onSave={handleSave}
+      />
+      <DeleteProjectDialog
+        open={deleteConfirmOpen}
+        onOpenChange={setDeleteConfirmOpen}
+        onConfirm={confirmDelete}
+        projectName={deletingId ? getFolderById(deletingId)?.name : undefined}
+      />
+    </div>
+  )
+}
--- a/web-app/src/services/tests/models.test.ts
+++ b/web-app/src/services/tests/models.test.ts
@ -22,7 +22,7 @@ Object.defineProperty(global, 'MODEL_CATALOG_URL', {

 describe('DefaultModelsService', () => {
  let modelsService: DefaultModelsService
-  
+
  const mockEngine = {
    list: vi.fn(),
    updateSettings: vi.fn(),
@ -246,7 +246,9 @@ describe('DefaultModelsService', () => {
      })
      mockEngine.load.mockRejectedValue(error)

-      await expect(modelsService.startModel(provider, model)).rejects.toThrow(error)
+      await expect(modelsService.startModel(provider, model)).rejects.toThrow(
+        error
+      )
    })
    it('should not load model again', async () => {
      const mockSettings = {
@ -263,7 +265,9 @@ describe('DefaultModelsService', () => {
        includes: () => true,
      })
      expect(mockEngine.load).toBeCalledTimes(0)
-      await expect(modelsService.startModel(provider, model)).resolves.toBe(undefined)
+      await expect(modelsService.startModel(provider, model)).resolves.toBe(
+        undefined
+      )
    })
  })

@ -312,7 +316,9 @@ describe('DefaultModelsService', () => {
        json: vi.fn().mockResolvedValue(mockRepoData),
      })

-      const result = await modelsService.fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
+      const result = await modelsService.fetchHuggingFaceRepo(
+        'microsoft/DialoGPT-medium'
+      )

      expect(result).toEqual(mockRepoData)
      expect(fetch).toHaveBeenCalledWith(
@ -342,7 +348,9 @@ describe('DefaultModelsService', () => {
      )

      // Test with domain prefix
-      await modelsService.fetchHuggingFaceRepo('huggingface.co/microsoft/DialoGPT-medium')
+      await modelsService.fetchHuggingFaceRepo(
+        'huggingface.co/microsoft/DialoGPT-medium'
+      )
      expect(fetch).toHaveBeenCalledWith(
        'https://huggingface.co/api/models/microsoft/DialoGPT-medium?blobs=true&files_metadata=true',
        {
@ -365,7 +373,9 @@ describe('DefaultModelsService', () => {
      expect(await modelsService.fetchHuggingFaceRepo('')).toBeNull()

      // Test string without slash
-      expect(await modelsService.fetchHuggingFaceRepo('invalid-repo')).toBeNull()
+      expect(
+        await modelsService.fetchHuggingFaceRepo('invalid-repo')
+      ).toBeNull()

      // Test whitespace only
      expect(await modelsService.fetchHuggingFaceRepo('   ')).toBeNull()
@ -378,7 +388,8 @@ describe('DefaultModelsService', () => {
        statusText: 'Not Found',
      })

-      const result = await modelsService.fetchHuggingFaceRepo('nonexistent/model')
+      const result =
+        await modelsService.fetchHuggingFaceRepo('nonexistent/model')

      expect(result).toBeNull()
      expect(fetch).toHaveBeenCalledWith(
@ -398,7 +409,9 @@ describe('DefaultModelsService', () => {
        statusText: 'Internal Server Error',
      })

-      const result = await modelsService.fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
+      const result = await modelsService.fetchHuggingFaceRepo(
+        'microsoft/DialoGPT-medium'
+      )

      expect(result).toBeNull()
      expect(consoleSpy).toHaveBeenCalledWith(
@ -414,7 +427,9 @@ describe('DefaultModelsService', () => {

      ;(fetch as any).mockRejectedValue(new Error('Network error'))

-      const result = await modelsService.fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
+      const result = await modelsService.fetchHuggingFaceRepo(
+        'microsoft/DialoGPT-medium'
+      )

      expect(result).toBeNull()
      expect(consoleSpy).toHaveBeenCalledWith(
@ -448,7 +463,9 @@ describe('DefaultModelsService', () => {
        json: vi.fn().mockResolvedValue(mockRepoData),
      })

-      const result = await modelsService.fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
+      const result = await modelsService.fetchHuggingFaceRepo(
+        'microsoft/DialoGPT-medium'
+      )

      expect(result).toEqual(mockRepoData)
    })
@ -487,7 +504,9 @@ describe('DefaultModelsService', () => {
        json: vi.fn().mockResolvedValue(mockRepoData),
      })

-      const result = await modelsService.fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
+      const result = await modelsService.fetchHuggingFaceRepo(
+        'microsoft/DialoGPT-medium'
+      )

      expect(result).toEqual(mockRepoData)
    })
@ -531,7 +550,9 @@ describe('DefaultModelsService', () => {
        json: vi.fn().mockResolvedValue(mockRepoData),
      })

-      const result = await modelsService.fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
+      const result = await modelsService.fetchHuggingFaceRepo(
+        'microsoft/DialoGPT-medium'
+      )

      expect(result).toEqual(mockRepoData)
      // Verify the GGUF file is present in siblings
@ -576,7 +597,8 @@ describe('DefaultModelsService', () => {
    }

    it('should convert HuggingFace repo to catalog model format', () => {
-      const result = modelsService.convertHfRepoToCatalogModel(mockHuggingFaceRepo)
+      const result =
+        modelsService.convertHfRepoToCatalogModel(mockHuggingFaceRepo)

      const expected: CatalogModel = {
        model_name: 'microsoft/DialoGPT-medium',
@ -586,12 +608,12 @@ describe('DefaultModelsService', () => {
        num_quants: 2,
        quants: [
          {
-            model_id: 'model-q4_0',
+            model_id: 'microsoft/model-q4_0',
            path: 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q4_0.gguf',
            file_size: '2.0 GB',
          },
          {
-            model_id: 'model-q8_0',
+            model_id: 'microsoft/model-q8_0',
            path: 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q8_0.GGUF',
            file_size: '4.0 GB',
          },
@ -635,7 +657,8 @@ describe('DefaultModelsService', () => {
        siblings: undefined,
      }

-      const result = modelsService.convertHfRepoToCatalogModel(repoWithoutSiblings)
+      const result =
+        modelsService.convertHfRepoToCatalogModel(repoWithoutSiblings)

      expect(result.num_quants).toBe(0)
      expect(result.quants).toEqual([])
@ -663,7 +686,9 @@ describe('DefaultModelsService', () => {
        ],
      }

-      const result = modelsService.convertHfRepoToCatalogModel(repoWithVariousFileSizes)
+      const result = modelsService.convertHfRepoToCatalogModel(
+        repoWithVariousFileSizes
+      )

      expect(result.quants[0].file_size).toBe('500.0 MB')
      expect(result.quants[1].file_size).toBe('3.5 GB')
@ -676,7 +701,8 @@ describe('DefaultModelsService', () => {
        tags: [],
      }

-      const result = modelsService.convertHfRepoToCatalogModel(repoWithEmptyTags)
+      const result =
+        modelsService.convertHfRepoToCatalogModel(repoWithEmptyTags)

      expect(result.description).toBe('**Tags**: ')
    })
@ -687,7 +713,8 @@ describe('DefaultModelsService', () => {
        downloads: undefined as any,
      }

-      const result = modelsService.convertHfRepoToCatalogModel(repoWithoutDownloads)
+      const result =
+        modelsService.convertHfRepoToCatalogModel(repoWithoutDownloads)

      expect(result.downloads).toBe(0)
    })
@ -714,15 +741,17 @@ describe('DefaultModelsService', () => {
        ],
      }

-      const result = modelsService.convertHfRepoToCatalogModel(repoWithVariousGGUF)
+      const result =
+        modelsService.convertHfRepoToCatalogModel(repoWithVariousGGUF)

-      expect(result.quants[0].model_id).toBe('model')
-      expect(result.quants[1].model_id).toBe('MODEL')
-      expect(result.quants[2].model_id).toBe('complex-model-name')
+      expect(result.quants[0].model_id).toBe('microsoft/model')
+      expect(result.quants[1].model_id).toBe('microsoft/MODEL')
+      expect(result.quants[2].model_id).toBe('microsoft/complex-model-name')
    })

    it('should generate correct download paths', () => {
-      const result = modelsService.convertHfRepoToCatalogModel(mockHuggingFaceRepo)
+      const result =
+        modelsService.convertHfRepoToCatalogModel(mockHuggingFaceRepo)

      expect(result.quants[0].path).toBe(
        'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q4_0.gguf'
@ -733,7 +762,8 @@ describe('DefaultModelsService', () => {
    })

    it('should generate correct readme URL', () => {
-      const result = modelsService.convertHfRepoToCatalogModel(mockHuggingFaceRepo)
+      const result =
+        modelsService.convertHfRepoToCatalogModel(mockHuggingFaceRepo)

      expect(result.readme).toBe(
        'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/README.md'
@ -767,13 +797,14 @@ describe('DefaultModelsService', () => {
        ],
      }

-      const result = modelsService.convertHfRepoToCatalogModel(repoWithMixedCase)
+      const result =
+        modelsService.convertHfRepoToCatalogModel(repoWithMixedCase)

      expect(result.num_quants).toBe(3)
      expect(result.quants).toHaveLength(3)
-      expect(result.quants[0].model_id).toBe('model-1')
-      expect(result.quants[1].model_id).toBe('model-2')
-      expect(result.quants[2].model_id).toBe('model-3')
+      expect(result.quants[0].model_id).toBe('microsoft/model-1')
+      expect(result.quants[1].model_id).toBe('microsoft/model-2')
+      expect(result.quants[2].model_id).toBe('microsoft/model-3')
    })

    it('should handle edge cases with file size formatting', () => {
@ -798,7 +829,8 @@ describe('DefaultModelsService', () => {
        ],
      }

-      const result = modelsService.convertHfRepoToCatalogModel(repoWithEdgeCases)
+      const result =
+        modelsService.convertHfRepoToCatalogModel(repoWithEdgeCases)

      expect(result.quants[0].file_size).toBe('0.0 MB')
      expect(result.quants[1].file_size).toBe('1.0 GB')
@ -850,7 +882,10 @@ describe('DefaultModelsService', () => {

      mockEngineManager.get.mockReturnValue(mockEngineWithSupport)

-      const result = await modelsService.isModelSupported('/path/to/model.gguf', 4096)
+      const result = await modelsService.isModelSupported(
+        '/path/to/model.gguf',
+        4096
+      )

      expect(result).toBe('GREEN')
      expect(mockEngineWithSupport.isModelSupported).toHaveBeenCalledWith(
@ -867,7 +902,10 @@ describe('DefaultModelsService', () => {

      mockEngineManager.get.mockReturnValue(mockEngineWithSupport)

-      const result = await modelsService.isModelSupported('/path/to/model.gguf', 8192)
+      const result = await modelsService.isModelSupported(
+        '/path/to/model.gguf',
+        8192
+      )

      expect(result).toBe('YELLOW')
      expect(mockEngineWithSupport.isModelSupported).toHaveBeenCalledWith(
@ -884,7 +922,9 @@ describe('DefaultModelsService', () => {

      mockEngineManager.get.mockReturnValue(mockEngineWithSupport)

-      const result = await modelsService.isModelSupported('/path/to/large-model.gguf')
+      const result = await modelsService.isModelSupported(
+        '/path/to/large-model.gguf'
+      )

      expect(result).toBe('RED')
      expect(mockEngineWithSupport.isModelSupported).toHaveBeenCalledWith(
--- a/web-app/src/services/models/default.ts
+++ b/web-app/src/services/models/default.ts
@ -30,6 +30,10 @@ export class DefaultModelsService implements ModelsService {
    return EngineManager.instance().get(provider) as AIEngine | undefined
  }

+  async getModel(modelId: string): Promise<modelInfo | undefined> {
+    return this.getEngine()?.get(modelId)
+  }
+
  async fetchModels(): Promise<modelInfo[]> {
    return this.getEngine()?.list() ?? []
  }
@ -127,7 +131,7 @@ export class DefaultModelsService implements ModelsService {
      const modelId = file.rfilename.replace(/\.gguf$/i, '')

      return {
-        model_id: sanitizeModelId(modelId),
+        model_id: `${repo.author}/${sanitizeModelId(modelId)}`,
        path: `https://huggingface.co/${repo.modelId}/resolve/main/${file.rfilename}`,
        file_size: formatFileSize(file.size),
      }
@ -529,19 +533,21 @@ export class DefaultModelsService implements ModelsService {
      // Fallback if method is not available
      console.warn('planModelLoad method not available in llamacpp engine')
      return {
-        gpuLayers: 0,
+        gpuLayers: 100,
        maxContextLength: 2048,
-        noOffloadKVCache: true,
+        noOffloadKVCache: false,
        offloadMmproj: false,
+        batchSize: 2048,
        mode: 'Unsupported',
      }
    } catch (error) {
      console.error(`Error planning model load for path ${modelPath}:`, error)
      return {
-        gpuLayers: 0,
+        gpuLayers: 100,
        maxContextLength: 2048,
-        noOffloadKVCache: true,
+        noOffloadKVCache: false,
        offloadMmproj: false,
+        batchSize: 2048,
        mode: 'Unsupported',
      }
    }
--- a/web-app/src/services/models/types.ts
+++ b/web-app/src/services/models/types.ts
@ -86,10 +86,12 @@ export interface ModelPlan {
  maxContextLength: number
  noOffloadKVCache: boolean
  offloadMmproj: boolean
+  batchSize: number
  mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
 }

 export interface ModelsService {
+  getModel(modelId: string): Promise<modelInfo | undefined>
  fetchModels(): Promise<modelInfo[]>
  fetchModelCatalog(): Promise<ModelCatalog>
  fetchHuggingFaceRepo(
--- a/web-app/src/services/threads/default.ts
+++ b/web-app/src/services/threads/default.ts
@ -30,6 +30,12 @@ export class DefaultThreadsService implements ThreadsService {
                provider: e.assistants?.[0]?.model?.engine,
              },
              assistants: e.assistants ?? [defaultAssistant],
+              metadata: {
+                ...e.metadata,
+                // Override extracted fields to avoid duplication
+                order: e.metadata?.order,
+                is_favorite: e.metadata?.is_favorite,
+              },
            } as Thread
          })
        })
@ -101,6 +107,7 @@ export class DefaultThreadsService implements ThreadsService {
          },
        ],
        metadata: {
+          ...thread.metadata,
          is_favorite: thread.isFavorite,
          order: thread.order,
        },
@ -115,4 +122,4 @@ export class DefaultThreadsService implements ThreadsService {
      .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
      ?.deleteThread(threadId)
  }
-}
+}
--- a/web-app/src/types/threads.d.ts
+++ b/web-app/src/types/threads.d.ts
@ -44,6 +44,14 @@ type Thread = {
  model?: ThreadModel
  updated: number
  order?: number
+  metadata?: {
+    project?: {
+      id: string
+      name: string
+      updated_at: number
+    }
+    [key: string]: unknown
+  }
 }

 type Assistant = {
@ -62,4 +70,4 @@ type TokenSpeed = {
  tokenSpeed: number
  tokenCount: number
  lastTimestamp: number
-}
+}