diff --git a/.github/scripts/rename-app-beta.sh b/.github/scripts/rename-app-beta.sh
deleted file mode 100644
index a12d1d635..000000000
--- a/.github/scripts/rename-app-beta.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-# Check if the correct number of arguments is provided
-if [ "$#" -ne 1 ]; then
-    echo "Usage: $0 <path_to_json_input_file>"
-    exit 1
-fi
-
-INPUT_JSON_FILE="$1"
-
-# Check if the input file exists
-if [ ! -f "$INPUT_JSON_FILE" ]; then
-    echo "Input file not found: $INPUT_JSON_FILE"
-    exit 1
-fi
-
-# Use jq to transform the content
-jq '
-    .name = "jan-beta" |
-    .productName = "Jan-beta" |
-    .build.appId = "jan-beta.ai.app" |
-    .build.productName = "Jan-beta" |
-    .build.appId = "jan-beta.ai.app" |
-    .build.protocols[0].name = "Jan-beta" |
-    .build.protocols[0].schemes = ["jan-beta"] |
-    .build.artifactName = "jan-beta-${os}-${arch}-${version}.${ext}" |
-    .build.publish[0].channel = "beta"
-' "$INPUT_JSON_FILE" > ./package.json.tmp
-
-cat ./package.json.tmp
-
-rm $INPUT_JSON_FILE
-mv ./package.json.tmp $INPUT_JSON_FILE
-
-# Update the layout file
-LAYOUT_FILE_PATH="web/app/layout.tsx"
-
-if [ ! -f "$LAYOUT_FILE_PATH" ]; then
-    echo "File does not exist: $LAYOUT_FILE_PATH"
-    exit 1
-fi
-
-# Perform the replacements
-sed -i -e "s#Jan#Jan-beta#g" "$LAYOUT_FILE_PATH"
-
-# Notify completion
-echo "File has been updated: $LAYOUT_FILE_PATH"
\ No newline at end of file
diff --git a/.github/scripts/rename-app.sh b/.github/scripts/rename-app.sh
new file mode 100644
index 000000000..7c2ad6ef3
--- /dev/null
+++ b/.github/scripts/rename-app.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Check if the correct number of arguments is provided
+if [ "$#" -ne 2 ]; then
+    echo "Usage: $0 <path_to_json_input_file> <channel>"
+    exit 1
+fi
+
+INPUT_JSON_FILE="$1"
+
+CHANNEL="$2"
+
+if [ "$CHANNEL" == "nightly" ]; then
+    UPDATER="latest"
+else
+    UPDATER="beta"
+fi
+
+# Check if the input file exists
+if [ ! -f "$INPUT_JSON_FILE" ]; then
+    echo "Input file not found: $INPUT_JSON_FILE"
+    exit 1
+fi
+
+# Use jq to transform the content
+jq --arg channel "$CHANNEL" --arg updater "$UPDATER" '
+    .name = "jan-\($channel)" |
+    .productName = "Jan-\($channel)" |
+    .build.appId = "jan-\($channel).ai.app" |
+    .build.productName = "Jan-\($channel)" |
+    .build.appId = "jan-\($channel).ai.app" |
+    .build.protocols[0].name = "Jan-\($channel)" |
+    .build.protocols[0].schemes = ["jan-\($channel)"] |
+    .build.artifactName = "jan-\($channel)-${os}-${arch}-${version}.${ext}" |
+    .build.publish[0].channel = $updater
+' "$INPUT_JSON_FILE" > ./package.json.tmp
+
+cat ./package.json.tmp
+
+rm $INPUT_JSON_FILE
+mv ./package.json.tmp $INPUT_JSON_FILE
+
+# Update the layout file
+LAYOUT_FILE_PATH="web/app/layout.tsx"
+
+if [ ! -f "$LAYOUT_FILE_PATH" ]; then
+    echo "File does not exist: $LAYOUT_FILE_PATH"
+    exit 1
+fi
+
+# Perform the replacements
+sed -i -e "s#Jan#Jan-$CHANNEL#g" "$LAYOUT_FILE_PATH"
+
+# Notify completion
+echo "File has been updated: $LAYOUT_FILE_PATH"
diff --git a/.github/scripts/rename-uninstaller-beta.sh b/.github/scripts/rename-uninstaller.sh
similarity index 61%
rename from .github/scripts/rename-uninstaller-beta.sh
rename to .github/scripts/rename-uninstaller.sh
index c322825da..7d3992fd0 100644
--- a/.github/scripts/rename-uninstaller-beta.sh
+++ b/.github/scripts/rename-uninstaller.sh
@@ -3,6 +3,14 @@
 # File path to be modified
 FILE_PATH="electron/scripts/uninstaller.nsh"
 
+# Check if the correct number of arguments is provided
+if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 <channel>"
+    exit 1
+fi
+
+CHANNEL="$1"
+
 # Check if the file exists
 if [ ! -f "$FILE_PATH" ]; then
     echo "File does not exist: $FILE_PATH"
@@ -10,7 +18,7 @@ if [ ! -f "$FILE_PATH" ]; then
 fi
 
 # Perform the replacements
-sed -i -e "s#jan#jan-beta#g" "$FILE_PATH"
+sed -i -e "s#jan#jan-$CHANNEL#g" "$FILE_PATH"
 
 # Notify completion
 echo "File has been updated: $FILE_PATH"
\ No newline at end of file
diff --git a/.github/scripts/rename-workspace-beta.sh b/.github/scripts/rename-workspace.sh
similarity index 74%
rename from .github/scripts/rename-workspace-beta.sh
rename to .github/scripts/rename-workspace.sh
index 6286d1889..420042e2c 100644
--- a/.github/scripts/rename-workspace-beta.sh
+++ b/.github/scripts/rename-workspace.sh
@@ -3,6 +3,8 @@
 # File path to be modified
 FILE_PATH="$1"
 
+CHANNEL="$2"
+
 # Check if the file exists
 if [ ! -f "$FILE_PATH" ]; then
     echo "File does not exist: $FILE_PATH"
@@ -10,7 +12,7 @@ if [ ! -f "$FILE_PATH" ]; then
 fi
 
 # Perform the replacements
-sed -i -e 's/yarn workspace jan/yarn workspace jan-beta/g' "$FILE_PATH"
+sed -i -e "s/yarn workspace jan/yarn workspace jan-$CHANNEL/g" "$FILE_PATH"
 
 # Notify completion
 echo "File has been updated: $FILE_PATH"
\ No newline at end of file
diff --git a/.github/workflows/jan-docs-new-release.yaml b/.github/workflows/jan-docs-new-release.yaml
index 2acca92de..a8e94b6d7 100644
--- a/.github/workflows/jan-docs-new-release.yaml
+++ b/.github/workflows/jan-docs-new-release.yaml
@@ -58,6 +58,6 @@ jobs:
           accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
           projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
           directory: ./docs/out
-          branch: dev
+          branch: main
           # Optional: Enable this if you want to have GitHub Deployments triggered
           gitHubToken: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/jan-docs.yml b/.github/workflows/jan-docs.yml
index ada038e83..9b5fd97f1 100644
--- a/.github/workflows/jan-docs.yml
+++ b/.github/workflows/jan-docs.yml
@@ -83,6 +83,6 @@ jobs:
           accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
           projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
           directory: ./docs/out
-          branch: dev
+          branch: main
           # Optional: Enable this if you want to have GitHub Deployments triggered
           gitHubToken: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/jan-electron-build-beta.yml b/.github/workflows/jan-electron-build-beta.yml
index 4f2625266..67496b355 100644
--- a/.github/workflows/jan-electron-build-beta.yml
+++ b/.github/workflows/jan-electron-build-beta.yml
@@ -133,4 +133,26 @@ jobs:
         run: |
           gh release edit v${{ needs.create-draft-release.outputs.version }} --draft=false --prerelease
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+  noti-discord-and-update-url-readme:
+    needs: [build-macos-x64, build-macos-arm64, create-draft-release, build-windows-x64, build-linux-x64, combine-beta-mac-yml]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set version to environment variable
+        run: |
+          echo "VERSION=${{ needs.create-draft-release.outputs.version }}" >> $GITHUB_ENV
+
+      - name: Notify Discord
+        uses: Ilshidur/action-discord@master
+        with:
+          args: |
+            Jan-beta App version {{ VERSION }}, has been released, use the following links to download the app with faster speed or visit the Github release page for more information:
+            - Windows: https://delta.jan.ai/beta/jan-beta-win-x64-{{ VERSION }}.exe
+            - macOS Intel: https://delta.jan.ai/beta/jan-beta-mac-x64-{{ VERSION }}.dmg
+            - macOS Apple Silicon: https://delta.jan.ai/beta/jan-beta-mac-arm64-{{ VERSION }}.dmg
+            - Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb
+            - Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage
+            - Github Release URL: https://github.com/janhq/jan/releases/tag/v{{ VERSION }}
+        env:
+          DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }}
\ No newline at end of file
diff --git a/.github/workflows/jan-electron-linter-and-test.yml b/.github/workflows/jan-electron-linter-and-test.yml
index 4e20d6c5f..b2105acb4 100644
--- a/.github/workflows/jan-electron-linter-and-test.yml
+++ b/.github/workflows/jan-electron-linter-and-test.yml
@@ -319,6 +319,13 @@ jobs:
         #   TURBO_TEAM: 'linux'
         #   TURBO_TOKEN: '${{ secrets.TURBO_TOKEN }}'
 
+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-report
+          path: electron/playwright-report/
+          retention-days: 2
+
   coverage-check:
     runs-on: [self-hosted, Linux, ubuntu-desktop]
     needs: base_branch_cov
diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux-x64.yml
index 496d153ae..92188c364 100644
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@@ -60,18 +60,25 @@ jobs:
           mv /tmp/package.json electron/package.json
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
           mv /tmp/package.json web/package.json
-          jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
           cat electron/package.json
+          # chmod +x .github/scripts/rename-app.sh
+          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          # chmod +x .github/scripts/rename-workspace.sh
+          # .github/scripts/rename-workspace.sh ./package.json nightly
+          # echo "------------------------"
+          # cat ./electron/package.json
+          # echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
         shell: bash
         run: |
-          chmod +x .github/scripts/rename-app-beta.sh
-          .github/scripts/rename-app-beta.sh ./electron/package.json
-          chmod +x .github/scripts/rename-workspace-beta.sh
-          .github/scripts/rename-workspace-beta.sh ./package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json beta
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json beta
           echo "------------------------"
           cat ./electron/package.json
           echo "------------------------"
diff --git a/.github/workflows/template-build-macos-arm64.yml b/.github/workflows/template-build-macos-arm64.yml
index 40cdda627..a23e34cf9 100644
--- a/.github/workflows/template-build-macos-arm64.yml
+++ b/.github/workflows/template-build-macos-arm64.yml
@@ -72,22 +72,29 @@ jobs:
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
           mv /tmp/package.json web/package.json
 
-          jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
           jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
-          cat electron/package.json
+          # cat electron/package.json
+          # chmod +x .github/scripts/rename-app.sh
+          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          # chmod +x .github/scripts/rename-workspace.sh
+          # .github/scripts/rename-workspace.sh ./package.json nightly
+          # echo "------------------------"
+          # cat ./electron/package.json
+          # echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
         shell: bash
         run: |
-          chmod +x .github/scripts/rename-app-beta.sh
-          .github/scripts/rename-app-beta.sh ./electron/package.json
-          chmod +x .github/scripts/rename-workspace-beta.sh
-          .github/scripts/rename-workspace-beta.sh ./package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json beta
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json beta
           echo "------------------------"
           cat ./electron/package.json
           echo "------------------------"
@@ -186,7 +193,7 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: jan-mac-arm64-${{ inputs.new_version }}
-          path: ./electron/dist/jan-mac-arm64-${{ inputs.new_version }}.dmg
+          path: ./electron/dist/*.dmg
 
       - name: Upload Artifact
         if: inputs.beta == false
diff --git a/.github/workflows/template-build-macos-x64.yml b/.github/workflows/template-build-macos-x64.yml
index f139797af..18309fca0 100644
--- a/.github/workflows/template-build-macos-x64.yml
+++ b/.github/workflows/template-build-macos-x64.yml
@@ -72,22 +72,29 @@ jobs:
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
           mv /tmp/package.json web/package.json
 
-          jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
           jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
-          cat electron/package.json
+          # cat electron/package.json
+          # chmod +x .github/scripts/rename-app.sh
+          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          # chmod +x .github/scripts/rename-workspace.sh
+          # .github/scripts/rename-workspace.sh ./package.json nightly
+          # echo "------------------------"
+          # cat ./electron/package.json
+          # echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
         shell: bash
         run: |
-          chmod +x .github/scripts/rename-app-beta.sh
-          .github/scripts/rename-app-beta.sh ./electron/package.json
-          chmod +x .github/scripts/rename-workspace-beta.sh
-          .github/scripts/rename-workspace-beta.sh ./package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json beta
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json beta
           echo "------------------------"
           cat ./electron/package.json
           echo "------------------------"
@@ -186,7 +193,7 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: jan-mac-x64-${{ inputs.new_version }}
-          path: ./electron/dist/jan-mac-x64-${{ inputs.new_version }}.dmg
+          path: ./electron/dist/*.dmg
 
       - name: Upload Artifact
         if: inputs.beta == false
diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml
index ffe94fecc..2a1d3f15b 100644
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@@ -73,23 +73,35 @@ jobs:
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
           mv /tmp/package.json web/package.json
 
-          jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
           jq '.build.win.sign = "./sign.js"' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
           cat electron/package.json
 
+          # chmod +x .github/scripts/rename-app.sh
+          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          # chmod +x .github/scripts/rename-workspace.sh
+          # .github/scripts/rename-workspace.sh ./package.json nightly
+          # chmod +x .github/scripts/rename-uninstaller.sh
+          # .github/scripts/rename-uninstaller.sh nightly
+          # echo "------------------------"
+          # cat ./electron/package.json
+          # echo "------------------------"
+          # cat ./package.json
+          # echo "------------------------"
+
       - name: Change App Name for beta version
         if: inputs.beta == true
         shell: bash
         run: |
-          chmod +x .github/scripts/rename-app-beta.sh
-          .github/scripts/rename-app-beta.sh ./electron/package.json
-          chmod +x .github/scripts/rename-workspace-beta.sh
-          .github/scripts/rename-workspace-beta.sh ./package.json
-          chmod +x .github/scripts/rename-uninstaller-beta.sh
-          .github/scripts/rename-uninstaller-beta.sh
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json beta
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json beta
+          chmod +x .github/scripts/rename-uninstaller.sh
+          .github/scripts/rename-uninstaller.sh beta
           echo "------------------------"
           cat ./electron/package.json
           echo "------------------------"
diff --git a/.husky/pre-commit b/.husky/pre-commit
index a4aa5add4..53c4e577e 100644
--- a/.husky/pre-commit
+++ b/.husky/pre-commit
@@ -1 +1 @@
-npm run lint --fix
\ No newline at end of file
+npx oxlint@latest --fix
\ No newline at end of file
diff --git a/JanBanner.png b/JanBanner.png
new file mode 100644
index 000000000..165831871
Binary files /dev/null and b/JanBanner.png differ
diff --git a/README.md b/README.md
index ecede5bd9..043960537 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-# Jan - Turn your computer into an AI computer
+# Jan - Local AI Assistant
 
-![Jan banner](https://github.com/janhq/jan/assets/89722390/35daac7d-b895-487c-a6ac-6663daaad78e)
+![Jan banner](./JanBanner.png)
 
 <p align="center">
   <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
@@ -12,18 +12,22 @@
 </p>
 
 <p align="center">
-  <a href="https://jan.ai/guides">Getting Started</a> 
+  <a href="https://jan.ai/docs/quickstart">Getting Started</a> 
   - <a href="https://jan.ai/docs">Docs</a> 
   - <a href="https://github.com/janhq/jan/releases">Changelog</a> 
   - <a href="https://github.com/janhq/jan/issues">Bug reports</a> 
   - <a href="https://discord.gg/AsJ8krTT3N">Discord</a>
 </p>
 
-> [!Warning] >**Jan is currently in Development**: Expect breaking changes and bugs!
+<p align="center">
+⚠️ <b> Jan is currently in Development</b>: Expect breaking changes and bugs!
+</p>
 
-Jan is an open-source ChatGPT alternative that runs 100% offline on your computer.
 
-**Jan runs on any hardware.** From PCs to multi-GPU clusters, Jan supports universal architectures:
+Jan is a ChatGPT-alternative that runs 100% offline on your device. Our goal is to make it easy for a layperson to download and run LLMs and use AI with **full control** and **privacy**.
+
+Jan is powered by [Cortex](https://github.com/janhq/cortex.cpp), our embeddable local AI engine that runs on any hardware.
+From PCs to multi-GPU clusters, Jan & Cortex supports universal architectures:
 
 - [x] NVIDIA GPUs (fast)
 - [x] Apple M-series (fast)
@@ -31,6 +35,12 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
 - [x] Linux Debian
 - [x] Windows x64
 
+#### Features:
+- [Model Library](https://jan.ai/docs/models/manage-models#add-models) with popular LLMs like Llama, Gemma, Mistral, or Qwen 
+- Connect to [Remote AI APIs](https://jan.ai/docs/remote-models/openai) like Groq and OpenRouter
+- Local API Server with OpenAI-equivalent API
+- [Extensions](https://jan.ai/docs/extensions) for customizing Jan
+
 ## Download
 
 <table>
@@ -74,7 +84,40 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
     </td>
   </tr>
   <tr style="text-align:center">
-    <td style="text-align:center"><b>Experimental (Nightly Build)</b></td>
+    <td style="text-align:center"><b>Beta (Preview)</b></td>
+    <td style="text-align:center">
+      <a href='https://app.jan.ai/download/beta/win-x64'>
+        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/windows.png' style="height:14px; width: 14px" />
+        <b>jan.exe</b>
+      </a>
+    </td>
+    <td style="text-align:center">
+      <a href='https://app.jan.ai/download/beta/mac-x64'>
+        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/mac.png' style="height:15px; width: 15px" />
+        <b>Intel</b>
+      </a>
+    </td>
+    <td style="text-align:center">
+      <a href='https://app.jan.ai/download/beta/mac-arm64'>
+        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/mac.png' style="height:15px; width: 15px" />
+        <b>M1/M2/M3/M4</b>
+      </a>
+    </td>
+    <td style="text-align:center">
+      <a href='https://app.jan.ai/download/beta/linux-amd64-deb'>
+        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/linux.png' style="height:14px; width: 14px" />
+        <b>jan.deb</b>
+      </a>
+    </td>
+    <td style="text-align:center">
+      <a href='https://app.jan.ai/download/beta/linux-amd64-appimage'>
+        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/linux.png' style="height:14px; width: 14px" />
+        <b>jan.AppImage</b>
+      </a>
+    </td>
+  </tr>
+  <tr style="text-align:center">
+    <td style="text-align:center"><b>Nightly Build (Experimental)</b></td>
     <td style="text-align:center">
       <a href='https://app.jan.ai/download/nightly/win-x64'>
         <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/windows.png' style="height:14px; width: 14px" />
@@ -108,79 +151,64 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
   </tr>
 </table>
 
-Download the latest version of Jan at https://jan.ai/ or visit the **[GitHub Releases](https://github.com/janhq/jan/releases)** to download any previous release.
+Download the latest version of Jan at https://jan.ai/ or visit the [GitHub Releases](https://github.com/janhq/jan/releases) to download any previous release.
 
 ## Demo
 
-![Demo](/demo.gif)
+https://github.com/user-attachments/assets/c3592fa2-c504-4d9d-a885-7e00122a50f3
 
-_Realtime Video: Jan v0.4.3-nightly on a Mac M1, 16GB Sonoma 14_
+*Real-time Video: Jan v0.5.7 on a Mac M2, 16GB Sonoma 14.2*
 
 ## Quicklinks
 
-#### Jan
+### Jan
 
-- [Jan website](https://jan.ai/)
+- [Jan Website](https://jan.ai/)
 - [Jan GitHub](https://github.com/janhq/jan)
-- [User Guides](https://jan.ai/guides/)
-- [Developer docs](https://jan.ai/developer/)
-- [API reference](https://jan.ai/api-reference/)
-- [Specs](https://jan.ai/docs/)
+- [Documentation](https://jan.ai/docs)
+- [Jan Changelog](https://jan.ai/changelog)
+- [Jan Blog](https://jan.ai/blog)
 
-#### Nitro
+### Cortex.cpp
+Jan is powered by **Cortex.cpp**. It is a C++ command-line interface (CLI) designed as an alternative to [Ollama](https://ollama.com/). By default, it runs on the llama.cpp engine but also supports other engines, including ONNX and TensorRT-LLM, making it a multi-engine platform.
 
-Nitro is a high-efficiency C++ inference engine for edge computing. It is lightweight and embeddable, and can be used on its own within your own projects.
-
-- [Nitro Website](https://nitro.jan.ai)
-- [Nitro GitHub](https://github.com/janhq/nitro)
-- [Documentation](https://nitro.jan.ai/docs)
-- [API Reference](https://nitro.jan.ai/api-reference)
-
-## Troubleshooting
-
-As Jan is in development mode, you might get stuck on a broken build.
-
-To reset your installation:
-
-1. Use the following commands to remove any dangling backend processes:
-
-   ```sh
-   ps aux | grep nitro
-   ```
-
-   Look for processes like "nitro" and "nitro_arm_64," and kill them one by one with:
-
-   ```sh
-   kill -9 <PID>
-   ```
-
-2. **Remove Jan from your Applications folder and Cache folder**
-
-   ```bash
-   make clean
-   ```
-
-   This will remove all build artifacts and cached files:
-
-   - Delete Jan extension from your `~/jan/extensions` folder
-   - Delete all `node_modules` in current folder
-   - Clear Application cache in `~/Library/Caches/jan`
 
+- [Cortex Website](https://cortex.so/)
+- [Cortex GitHub](https://github.com/janhq/cortex.cpp)
+- [Documentation](https://cortex.so/docs/)
+- [Models Library](https://cortex.so/models)
+- API Reference: *Under development*
+  
 ## Requirements for running Jan
 
-- MacOS: 13 or higher
-- Windows:
+- **MacOS**: 13 or higher
+- **Windows**:
   - Windows 10 or higher
   - To enable GPU support:
     - Nvidia GPU with CUDA Toolkit 11.7 or higher
     - Nvidia driver 470.63.01 or higher
-- Linux:
+- **Linux**:
   - glibc 2.27 or higher (check with `ldd --version`)
   - gcc 11, g++ 11, cpp 11 or higher, refer to this [link](https://jan.ai/guides/troubleshooting/gpu-not-used/#specific-requirements-for-linux) for more information
   - To enable GPU support:
     - Nvidia GPU with CUDA Toolkit 11.7 or higher
     - Nvidia driver 470.63.01 or higher
 
+## Troubleshooting
+
+As Jan is in development mode, you might get stuck on a some common issues:
+- [Troubleshooting a broken build](https://jan.ai/docs/troubleshooting#broken-build)
+- [Troubleshooting NVIDIA GPU](https://jan.ai/docs/troubleshooting#troubleshooting-nvidia-gpu)
+- [Troubleshooting Something's Amiss](https://jan.ai/docs/troubleshooting#somethings-amiss)
+
+
+If you can't find what you need in our troubleshooting guide, feel free reach out to us for extra help:
+1. Copy your [error logs & device specifications](https://jan.ai/docs/troubleshooting#how-to-get-error-logs).
+2. Go to our [Discord](https://discord.com/invite/FTk2MvZwJH) & send it to **#🆘|get-help** channel for further support.
+
+*Check the logs to ensure the information is what you intend to send. Note that we retain your logs for only 24 hours, so report any issues promptly.*
+  
+
 ## Contributing
 
 Contributions are welcome! Please read the [CONTRIBUTING.md](CONTRIBUTING.md) file
@@ -209,11 +237,7 @@ Contributions are welcome! Please read the [CONTRIBUTING.md](CONTRIBUTING.md) fi
 
 This will start the development server and open the desktop app.
 
-3. (Optional) **Run the API server without frontend**
 
-   ```bash
-   yarn dev:server
-   ```
 
 ### For production build
 
@@ -225,102 +249,6 @@ make build
 
 This will build the app MacOS m1/m2 for production (with code signing already done) and put the result in `dist` folder.
 
-### Docker mode
-
-- Supported OS: Linux, WSL2 Docker
-- Pre-requisites:
-
-  - Docker Engine and Docker Compose are required to run Jan in Docker mode. Follow the [instructions](https://docs.docker.com/engine/install/ubuntu/) below to get started with Docker Engine on Ubuntu.
-
-    ```bash
-    curl -fsSL https://get.docker.com -o get-docker.sh
-    sudo sh ./get-docker.sh --dry-run
-    ```
-
-  - If you intend to run Jan in GPU mode, you need to install `nvidia-driver` and `nvidia-docker2`. Follow the instruction [here](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) for installation.
-
-- Run Jan in Docker mode
-  > User can choose between `docker-compose.yml` with latest prebuilt docker image or `docker-compose-dev.yml` with local docker build
-
-| Docker compose Profile | Description                                  |
-| ---------------------- | -------------------------------------------- |
-| `cpu-fs`               | Run Jan in CPU mode with default file system |
-| `cpu-s3fs`             | Run Jan in CPU mode with S3 file system      |
-| `gpu-fs`               | Run Jan in GPU mode with default file system |
-| `gpu-s3fs`             | Run Jan in GPU mode with S3 file system      |
-
-| Environment Variable    | Description                                                                                             |
-| ----------------------- | ------------------------------------------------------------------------------------------------------- |
-| `S3_BUCKET_NAME`        | S3 bucket name - leave blank for default file system                                                    |
-| `AWS_ACCESS_KEY_ID`     | AWS access key ID - leave blank for default file system                                                 |
-| `AWS_SECRET_ACCESS_KEY` | AWS secret access key - leave blank for default file system                                             |
-| `AWS_ENDPOINT`          | AWS endpoint URL - leave blank for default file system                                                  |
-| `AWS_REGION`            | AWS region - leave blank for default file system                                                        |
-| `API_BASE_URL`          | Jan Server URL, please modify it as your public ip address or domain name default http://localhost:1377 |
-
-- **Option 1**: Run Jan in CPU mode
-
-  ```bash
-  # cpu mode with default file system
-  docker compose --profile cpu-fs up -d
-
-  # cpu mode with S3 file system
-  docker compose --profile cpu-s3fs up -d
-  ```
-
-- **Option 2**: Run Jan in GPU mode
-
-  - **Step 1**: Check CUDA compatibility with your NVIDIA driver by running `nvidia-smi` and check the CUDA version in the output
-
-    ```bash
-    nvidia-smi
-
-    # Output
-    +---------------------------------------------------------------------------------------+
-    | NVIDIA-SMI 531.18                 Driver Version: 531.18       CUDA Version: 12.1     |
-    |-----------------------------------------+----------------------+----------------------+
-    | GPU  Name                      TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
-    | Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
-    |                                         |                      |               MIG M. |
-    |=========================================+======================+======================|
-    |   0  NVIDIA GeForce RTX 4070 Ti    WDDM | 00000000:01:00.0  On |                  N/A |
-    |  0%   44C    P8               16W / 285W|   1481MiB / 12282MiB |      2%      Default |
-    |                                         |                      |                  N/A |
-    +-----------------------------------------+----------------------+----------------------+
-    |   1  NVIDIA GeForce GTX 1660 Ti    WDDM | 00000000:02:00.0 Off |                  N/A |
-    |  0%   49C    P8               14W / 120W|      0MiB /  6144MiB |      0%      Default |
-    |                                         |                      |                  N/A |
-    +-----------------------------------------+----------------------+----------------------+
-    |   2  NVIDIA GeForce GTX 1660 Ti    WDDM | 00000000:05:00.0 Off |                  N/A |
-    | 29%   38C    P8               11W / 120W|      0MiB /  6144MiB |      0%      Default |
-    |                                         |                      |                  N/A |
-    +-----------------------------------------+----------------------+----------------------+
-
-    +---------------------------------------------------------------------------------------+
-    | Processes:                                                                            |
-    |  GPU   GI   CI        PID   Type   Process name                            GPU Memory |
-    |        ID   ID                                                             Usage      |
-    |=======================================================================================|
-    ```
-
-  - **Step 2**: Visit [NVIDIA NGC Catalog ](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags) and find the smallest minor version of image tag that matches your CUDA version (e.g., 12.1 -> 12.1.0)
-
-  - **Step 3**: Update the `Dockerfile.gpu` line number 5 with the latest minor version of the image tag from step 2 (e.g. change `FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base` to `FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS base`)
-
-  - **Step 4**: Run command to start Jan in GPU mode
-
-    ```bash
-    # GPU mode with default file system
-    docker compose --profile gpu-fs up -d
-
-    # GPU mode with S3 file system
-    docker compose --profile gpu-s3fs up -d
-    ```
-
-This will start the web server and you can access Jan at `http://localhost:3000`.
-
-> Note: RAG feature is not supported in Docker mode with s3fs yet.
-
 ## Acknowledgements
 
 Jan builds on top of other open-source projects:
@@ -334,18 +262,18 @@ Jan builds on top of other open-source projects:
 
 - Bugs & requests: file a GitHub ticket
 - For discussion: join our Discord [here](https://discord.gg/FTk2MvZwJH)
-- For business inquiries: email hello@jan.ai
+- For business inquiries: email hello@jan.ai 
 - For jobs: please email hr@jan.ai
 
 ## Trust & Safety
 
-Beware of scams.
+Beware of scams!
 
-- We will never ask you for personal info
-- We are a free product; there's no paid version
-- We don't have a token or ICO
-- We are not actively fundraising or seeking donations
+- We will never request your personal information.
+- Our product is completely free; no paid version exists.
+- We do not have a token or ICO.
+- We are a [bootstrapped company](https://en.wikipedia.org/wiki/Bootstrapping), and don't have any external investors (*yet*). We're open to exploring opportunities with strategic partners want to tackle [our mission](https://jan.ai/about#mission) together.
 
 ## License
 
-Jan is free and open source, under the AGPLv3 license.
+Jan is free and open source, under the **AGPLv3** license.
diff --git a/core/src/browser/extension.ts b/core/src/browser/extension.ts
index 603445745..d934e1c06 100644
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@@ -1,6 +1,8 @@
-import { SettingComponentProps } from '../types'
+import { Model, ModelEvent, SettingComponentProps } from '../types'
 import { getJanDataFolderPath, joinPath } from './core'
+import { events } from './events'
 import { fs } from './fs'
+import { ModelManager } from './models'
 
 export enum ExtensionTypeEnum {
   Assistant = 'assistant',
@@ -103,6 +105,22 @@ export abstract class BaseExtension implements ExtensionType {
     return undefined
   }
 
+  /**
+   * Registers models - it persists in-memory shared ModelManager instance's data map.
+   * @param models
+   */
+  async registerModels(models: Model[]): Promise<void> {
+    for (const model of models) {
+      ModelManager.instance().register(model)
+    }
+    events.emit(ModelEvent.OnModelsUpdate, {})
+  }
+
+  /**
+   * Register settings for the extension.
+   * @param settings
+   * @returns
+   */
   async registerSettings(settings: SettingComponentProps[]): Promise<void> {
     if (!this.name) {
       console.error('Extension name is not defined')
@@ -139,6 +157,12 @@ export abstract class BaseExtension implements ExtensionType {
     }
   }
 
+  /**
+   * Get the setting value for the key.
+   * @param key
+   * @param defaultValue
+   * @returns
+   */
   async getSetting<T>(key: string, defaultValue: T) {
     const keySetting = (await this.getSettings()).find((setting) => setting.key === key)
 
@@ -168,6 +192,10 @@ export abstract class BaseExtension implements ExtensionType {
     return
   }
 
+  /**
+   * Get the settings for the extension.
+   * @returns
+   */
   async getSettings(): Promise<SettingComponentProps[]> {
     if (!this.name) return []
 
@@ -189,6 +217,11 @@ export abstract class BaseExtension implements ExtensionType {
     }
   }
 
+  /**
+   * Update the settings for the extension.
+   * @param componentProps
+   * @returns
+   */
   async updateSettings(componentProps: Partial<SettingComponentProps>[]): Promise<void> {
     if (!this.name) return
 
diff --git a/core/src/browser/extensions/engines/AIEngine.test.ts b/core/src/browser/extensions/engines/AIEngine.test.ts
index 59dad280f..ab3280e1c 100644
--- a/core/src/browser/extensions/engines/AIEngine.test.ts
+++ b/core/src/browser/extensions/engines/AIEngine.test.ts
@@ -1,8 +1,6 @@
 import { AIEngine } from './AIEngine'
 import { events } from '../../events'
-import { ModelEvent, Model, ModelFile, InferenceEngine } from '../../../types'
-import { EngineManager } from './EngineManager'
-import { fs } from '../../fs'
+import { ModelEvent, Model } from '../../../types'
 
 jest.mock('../../events')
 jest.mock('./EngineManager')
@@ -26,7 +24,7 @@ describe('AIEngine', () => {
   })
 
   it('should load model if provider matches', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'test-provider' } as any
+    const model: any = { id: 'model1', engine: 'test-provider' } as any
 
     await engine.loadModel(model)
 
@@ -34,7 +32,7 @@ describe('AIEngine', () => {
   })
 
   it('should not load model if provider does not match', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'other-provider' } as any
+    const model: any = { id: 'model1', engine: 'other-provider' } as any
 
     await engine.loadModel(model)
 
diff --git a/core/src/browser/extensions/engines/AIEngine.ts b/core/src/browser/extensions/engines/AIEngine.ts
index 75354de88..d0528b0ab 100644
--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@@ -1,17 +1,14 @@
-import { getJanDataFolderPath, joinPath } from '../../core'
 import { events } from '../../events'
 import { BaseExtension } from '../../extension'
-import { fs } from '../../fs'
-import { MessageRequest, Model, ModelEvent, ModelFile } from '../../../types'
+import { MessageRequest, Model, ModelEvent } from '../../../types'
 import { EngineManager } from './EngineManager'
+import { ModelManager } from '../../models/manager'
 
 /**
  * Base AIEngine
  * Applicable to all AI Engines
  */
 export abstract class AIEngine extends BaseExtension {
-  private static modelsFolder = 'models'
-
   // The inference engine
   abstract provider: string
 
@@ -21,7 +18,7 @@ export abstract class AIEngine extends BaseExtension {
   override onLoad() {
     this.registerEngine()
 
-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
     events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
   }
 
@@ -32,53 +29,10 @@ export abstract class AIEngine extends BaseExtension {
     EngineManager.instance().register(this)
   }
 
-  async registerModels(models: Model[]): Promise<void> {
-    const modelFolderPath = await joinPath([await getJanDataFolderPath(), AIEngine.modelsFolder])
-
-    let shouldNotifyModelUpdate = false
-    for (const model of models) {
-      const modelPath = await joinPath([modelFolderPath, model.id])
-      const isExist = await fs.existsSync(modelPath)
-
-      if (isExist) {
-        await this.migrateModelIfNeeded(model, modelPath)
-        continue
-      }
-
-      await fs.mkdir(modelPath)
-      await fs.writeFileSync(
-        await joinPath([modelPath, 'model.json']),
-        JSON.stringify(model, null, 2)
-      )
-      shouldNotifyModelUpdate = true
-    }
-
-    if (shouldNotifyModelUpdate) {
-      events.emit(ModelEvent.OnModelsUpdate, {})
-    }
-  }
-
-  async migrateModelIfNeeded(model: Model, modelPath: string): Promise<void> {
-    try {
-      const modelJson = await fs.readFileSync(await joinPath([modelPath, 'model.json']), 'utf-8')
-      const currentModel: Model = JSON.parse(modelJson)
-      if (currentModel.version !== model.version) {
-        await fs.writeFileSync(
-          await joinPath([modelPath, 'model.json']),
-          JSON.stringify(model, null, 2)
-        )
-
-        events.emit(ModelEvent.OnModelsUpdate, {})
-      }
-    } catch (error) {
-      console.warn('Error while try to migrating model', error)
-    }
-  }
-
   /**
    * Loads the model.
    */
-  async loadModel(model: ModelFile): Promise<any> {
+  async loadModel(model: Model): Promise<any> {
     if (model.engine.toString() !== this.provider) return Promise.resolve()
     events.emit(ModelEvent.OnModelReady, model)
     return Promise.resolve()
diff --git a/core/src/browser/extensions/engines/EngineManager.ts b/core/src/browser/extensions/engines/EngineManager.ts
index 2980c5c65..90ce75ac5 100644
--- a/core/src/browser/extensions/engines/EngineManager.ts
+++ b/core/src/browser/extensions/engines/EngineManager.ts
@@ -1,3 +1,4 @@
+import { InferenceEngine } from '../../../types'
 import { AIEngine } from './AIEngine'
 
 /**
@@ -20,6 +21,22 @@ export class EngineManager {
    * @returns The engine, if found.
    */
   get<T extends AIEngine>(provider: string): T | undefined {
+    // Backward compatible provider
+    // nitro is migrated to cortex
+    if (
+      [
+        InferenceEngine.nitro,
+        InferenceEngine.cortex,
+        InferenceEngine.cortex_llamacpp,
+        InferenceEngine.cortex_onnx,
+        InferenceEngine.cortex_tensorrtllm,
+        InferenceEngine.cortex_onnx,
+      ]
+        .map((e) => e.toString())
+        .includes(provider)
+    )
+      provider = InferenceEngine.cortex
+
     return this.engines.get(provider) as T | undefined
   }
 
@@ -27,6 +44,6 @@ export class EngineManager {
    * The instance of the engine manager.
    */
   static instance(): EngineManager {
-    return window.core?.engineManager as EngineManager ?? new EngineManager()
+    return (window.core?.engineManager as EngineManager) ?? new EngineManager()
   }
 }
diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
index 4ae81496f..08fd947da 100644
--- a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
@@ -3,7 +3,7 @@
  */
 import { LocalOAIEngine } from './LocalOAIEngine'
 import { events } from '../../events'
-import { ModelEvent, ModelFile, Model } from '../../../types'
+import { ModelEvent, Model } from '../../../types'
 import { executeOnMain, systemInformation, dirName } from '../../core'
 
 jest.mock('../../core', () => ({
@@ -43,7 +43,7 @@ describe('LocalOAIEngine', () => {
   })
 
   it('should load model correctly', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
     const modelFolder = 'path/to'
     const systemInfo = { os: 'testOS' }
     const res = { error: null }
@@ -66,7 +66,7 @@ describe('LocalOAIEngine', () => {
   })
 
   it('should handle load model error', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
     const modelFolder = 'path/to'
     const systemInfo = { os: 'testOS' }
     const res = { error: 'load error' }
@@ -91,9 +91,7 @@ describe('LocalOAIEngine', () => {
 
   it('should not unload model if engine does not match', async () => {
     const model: Model = { engine: 'otherProvider' } as any
-
     await engine.unloadModel(model)
-
     expect(executeOnMain).not.toHaveBeenCalled()
     expect(events.emit).not.toHaveBeenCalledWith(ModelEvent.OnModelStopped, {})
   })
diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.ts b/core/src/browser/extensions/engines/LocalOAIEngine.ts
index 123b9a593..b54f8fbde 100644
--- a/core/src/browser/extensions/engines/LocalOAIEngine.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.ts
@@ -1,6 +1,6 @@
-import { executeOnMain, systemInformation, dirName } from '../../core'
+import { executeOnMain, systemInformation, dirName, joinPath, getJanDataFolderPath } from '../../core'
 import { events } from '../../events'
-import { Model, ModelEvent, ModelFile } from '../../../types'
+import { Model, ModelEvent } from '../../../types'
 import { OAIEngine } from './OAIEngine'
 
 /**
@@ -22,16 +22,16 @@ export abstract class LocalOAIEngine extends OAIEngine {
   override onLoad() {
     super.onLoad()
     // These events are applicable to local inference providers
-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
     events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
   }
 
   /**
    * Load the model.
    */
-  override async loadModel(model: ModelFile): Promise<void> {
+  override async loadModel(model: Model & { file_path?: string }): Promise<void> {
     if (model.engine.toString() !== this.provider) return
-    const modelFolder = await dirName(model.file_path)
+    const modelFolder = 'file_path' in model && model.file_path ? await dirName(model.file_path) : await this.getModelFilePath(model.id)
     const systemInfo = await systemInformation()
     const res = await executeOnMain(
       this.nodeModule,
@@ -63,4 +63,12 @@ export abstract class LocalOAIEngine extends OAIEngine {
       events.emit(ModelEvent.OnModelStopped, {})
     })
   }
+
+  /// Legacy
+  private getModelFilePath = async (
+    id: string,
+  ): Promise<string> => {
+    return joinPath([await getJanDataFolderPath(), 'models', id])
+  }
+  ///
 }
diff --git a/core/src/browser/extensions/engines/OAIEngine.ts b/core/src/browser/extensions/engines/OAIEngine.ts
index a8dde4677..694a87264 100644
--- a/core/src/browser/extensions/engines/OAIEngine.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.ts
@@ -55,7 +55,21 @@ export abstract class OAIEngine extends AIEngine {
    * Inference request
    */
   override async inference(data: MessageRequest) {
-    if (data.model?.engine?.toString() !== this.provider) return
+    if (!data.model?.id) {
+      events.emit(MessageEvent.OnMessageResponse, {
+        status: MessageStatus.Error,
+        content: [
+          {
+            type: ContentType.Text,
+            text: {
+              value: 'No model ID provided',
+              annotations: [],
+            },
+          },
+        ],
+      })
+      return
+    }
 
     const timestamp = Date.now()
     const message: ThreadMessage = {
@@ -89,7 +103,6 @@ export abstract class OAIEngine extends AIEngine {
       model: model.id,
       stream: true,
       ...model.parameters,
-      ...(this.provider === 'nitro' ? { engine: 'cortex.llamacpp'} : {}),
     }
     if (this.transformPayload) {
       requestBody = this.transformPayload(requestBody)
diff --git a/core/src/browser/extensions/engines/helpers/sse.ts b/core/src/browser/extensions/engines/helpers/sse.ts
index 024ced470..9713256b3 100644
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@@ -10,7 +10,7 @@ export function requestInference(
   requestBody: any,
   model: {
     id: string
-    parameters: ModelRuntimeParams
+    parameters?: ModelRuntimeParams
   },
   controller?: AbortController,
   headers?: HeadersInit,
@@ -22,7 +22,7 @@ export function requestInference(
       headers: {
         'Content-Type': 'application/json',
         'Access-Control-Allow-Origin': '*',
-        'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
+        'Accept': model.parameters?.stream ? 'text/event-stream' : 'application/json',
         ...headers,
       },
       body: JSON.stringify(requestBody),
@@ -45,7 +45,7 @@ export function requestInference(
           subscriber.complete()
           return
         }
-        if (model.parameters.stream === false) {
+        if (model.parameters?.stream === false) {
           const data = await response.json()
           if (transformResponse) {
             subscriber.next(transformResponse(data))
diff --git a/core/src/browser/extensions/model.ts b/core/src/browser/extensions/model.ts
index 040542927..e224ec5cc 100644
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@@ -1,13 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import {
-  GpuSetting,
-  HuggingFaceRepoData,
-  ImportingModel,
-  Model,
-  ModelFile,
-  ModelInterface,
-  OptionType,
-} from '../../types'
+import { Model, ModelInterface, OptionType } from '../../types'
 
 /**
  * Model extension for managing models.
@@ -20,17 +12,16 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
     return ExtensionTypeEnum.Model
   }
 
-  abstract downloadModel(
-    model: Model,
-    gpuSettings?: GpuSetting,
-    network?: { proxy: string; ignoreSSL?: boolean }
+  abstract getModels(): Promise<Model[]>
+  abstract pullModel(model: string, id?: string, name?: string): Promise<void>
+  abstract cancelModelPull(modelId: string): Promise<void>
+  abstract importModel(
+    model: string,
+    modePath: string,
+    name?: string,
+    optionType?: OptionType
   ): Promise<void>
-  abstract cancelModelDownload(modelId: string): Promise<void>
-  abstract deleteModel(model: ModelFile): Promise<void>
-  abstract getDownloadedModels(): Promise<ModelFile[]>
-  abstract getConfiguredModels(): Promise<ModelFile[]>
-  abstract importModels(models: ImportingModel[], optionType: OptionType): Promise<void>
-  abstract updateModelInfo(modelInfo: Partial<ModelFile>): Promise<ModelFile>
-  abstract fetchHuggingFaceRepoData(repoId: string): Promise<HuggingFaceRepoData>
-  abstract getDefaultModel(): Promise<Model>
+  abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
+  abstract deleteModel(model: string): Promise<void>
+  abstract isModelLoaded(model: string): Promise<boolean>
 }
diff --git a/core/src/browser/index.test.ts b/core/src/browser/index.test.ts
index 339cd9046..c8cabbb0b 100644
--- a/core/src/browser/index.test.ts
+++ b/core/src/browser/index.test.ts
@@ -1,32 +1,37 @@
-import * as Core from './core';
-import * as Events from './events';
-import * as FileSystem from './fs';
-import * as Extension from './extension';
-import * as Extensions from './extensions';
-import * as Tools from './tools';
+import * as Core from './core'
+import * as Events from './events'
+import * as FileSystem from './fs'
+import * as Extension from './extension'
+import * as Extensions from './extensions'
+import * as Tools from './tools'
+import * as Models from './models'
 
 describe('Module Tests', () => {
-    it('should export Core module', () => {
-        expect(Core).toBeDefined();
-    });
+  it('should export Core module', () => {
+    expect(Core).toBeDefined()
+  })
 
-    it('should export Event module', () => {
-        expect(Events).toBeDefined();
-    });
+  it('should export Event module', () => {
+    expect(Events).toBeDefined()
+  })
 
-    it('should export Filesystem module', () => {
-        expect(FileSystem).toBeDefined();
-    });
+  it('should export Filesystem module', () => {
+    expect(FileSystem).toBeDefined()
+  })
 
-    it('should export Extension module', () => {
-        expect(Extension).toBeDefined();
-    });
+  it('should export Extension module', () => {
+    expect(Extension).toBeDefined()
+  })
 
-    it('should export all base extensions', () => {
-        expect(Extensions).toBeDefined();
-    });
+  it('should export all base extensions', () => {
+    expect(Extensions).toBeDefined()
+  })
 
-    it('should export all base tools', () => {
-        expect(Tools).toBeDefined();
-    });
-});
\ No newline at end of file
+  it('should export all base tools', () => {
+    expect(Tools).toBeDefined()
+  })
+
+  it('should export all base tools', () => {
+    expect(Models).toBeDefined()
+  })
+})
diff --git a/core/src/browser/index.ts b/core/src/browser/index.ts
index a7803c7e0..a6ce187ca 100644
--- a/core/src/browser/index.ts
+++ b/core/src/browser/index.ts
@@ -33,3 +33,9 @@ export * from './extensions'
  * @module
  */
 export * from './tools'
+
+/**
+ * Export all base models.
+ * @module
+ */
+export * from './models'
diff --git a/core/src/browser/models/index.ts b/core/src/browser/models/index.ts
new file mode 100644
index 000000000..81d37e501
--- /dev/null
+++ b/core/src/browser/models/index.ts
@@ -0,0 +1,10 @@
+/**
+ * Export ModelManager
+ * @module
+ */
+export { ModelManager } from './manager'
+
+/**
+ * Export all utils
+ */
+export * from './utils'
diff --git a/core/src/browser/models/manager.ts b/core/src/browser/models/manager.ts
new file mode 100644
index 000000000..d5afe83d5
--- /dev/null
+++ b/core/src/browser/models/manager.ts
@@ -0,0 +1,47 @@
+import { Model, ModelEvent } from '../../types'
+import { events } from '../events'
+
+/**
+ * Manages the registered models across extensions.
+ */
+export class ModelManager {
+  public models = new Map<string, Model>()
+
+  constructor() {
+    if (window) {
+      window.core.modelManager = this
+    }
+  }
+
+  /**
+   * Registers a model.
+   * @param model - The model to register.
+   */
+  register<T extends Model>(model: T) {
+    if (this.models.has(model.id)) {
+      this.models.set(model.id, {
+        ...model,
+        ...this.models.get(model.id),
+      })
+    } else {
+      this.models.set(model.id, model)
+    }
+    events.emit(ModelEvent.OnModelsUpdate, {})
+  }
+
+  /**
+   * Retrieves a model by it's id.
+   * @param id - The id of the model to retrieve.
+   * @returns The model, if found.
+   */
+  get<T extends Model>(id: string): T | undefined {
+    return this.models.get(id) as T | undefined
+  }
+
+  /**
+   * The instance of the tool manager.
+   */
+  static instance(): ModelManager {
+    return (window.core?.modelManager as ModelManager) ?? new ModelManager()
+  }
+}
diff --git a/web/utils/modelParam.test.ts b/core/src/browser/models/utils.test.ts
similarity index 87%
rename from web/utils/modelParam.test.ts
rename to core/src/browser/models/utils.test.ts
index 97325d277..ac876c3dc 100644
--- a/web/utils/modelParam.test.ts
+++ b/core/src/browser/models/utils.test.ts
@@ -1,7 +1,10 @@
 // web/utils/modelParam.test.ts
-import { normalizeValue, validationRules } from './modelParam'
-import { extractModelLoadParams } from './modelParam';
-import { extractInferenceParams } from './modelParam';
+import {
+  normalizeValue,
+  validationRules,
+  extractModelLoadParams,
+  extractInferenceParams,
+} from './utils'
 
 describe('validationRules', () => {
   it('should validate temperature correctly', () => {
@@ -151,13 +154,12 @@ describe('validationRules', () => {
   })
 })
 
-
-  it('should normalize invalid values for keys not listed in validationRules', () => {
-    expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid')
-    expect(normalizeValue('invalid_key', 123)).toBe(123)
-    expect(normalizeValue('invalid_key', true)).toBe(true)
-    expect(normalizeValue('invalid_key', false)).toBe(false)
-  })
+it('should normalize invalid values for keys not listed in validationRules', () => {
+  expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid')
+  expect(normalizeValue('invalid_key', 123)).toBe(123)
+  expect(normalizeValue('invalid_key', true)).toBe(true)
+  expect(normalizeValue('invalid_key', false)).toBe(false)
+})
 
 describe('normalizeValue', () => {
   it('should normalize ctx_len correctly', () => {
@@ -192,19 +194,16 @@ describe('normalizeValue', () => {
   })
 })
 
+it('should handle invalid values correctly by falling back to originParams', () => {
+  const modelParams = { temperature: 'invalid', token_limit: -1 }
+  const originParams = { temperature: 0.5, token_limit: 100 }
+  expect(extractInferenceParams(modelParams as any, originParams)).toEqual(originParams)
+})
 
-  it('should handle invalid values correctly by falling back to originParams', () => {
-    const modelParams = { temperature: 'invalid', token_limit: -1 };
-    const originParams = { temperature: 0.5, token_limit: 100 };
-    expect(extractInferenceParams(modelParams, originParams)).toEqual(originParams);
-  });
+it('should return an empty object when no modelParams are provided', () => {
+  expect(extractModelLoadParams()).toEqual({})
+})
 
-
-  it('should return an empty object when no modelParams are provided', () => {
-    expect(extractModelLoadParams()).toEqual({});
-  });
-
-
-  it('should return an empty object when no modelParams are provided', () => {
-    expect(extractInferenceParams()).toEqual({});
-  });
+it('should return an empty object when no modelParams are provided', () => {
+  expect(extractInferenceParams()).toEqual({})
+})
diff --git a/web/utils/modelParam.ts b/core/src/browser/models/utils.ts
similarity index 86%
rename from web/utils/modelParam.ts
rename to core/src/browser/models/utils.ts
index 315aeaeb3..0e52441b2 100644
--- a/web/utils/modelParam.ts
+++ b/core/src/browser/models/utils.ts
@@ -1,26 +1,20 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 /* eslint-disable @typescript-eslint/naming-convention */
-import { ModelRuntimeParams, ModelSettingParams } from '@janhq/core'
-
-import { ModelParams } from '@/types/model'
+import { ModelParams, ModelRuntimeParams, ModelSettingParams } from '../../types'
 
 /**
  * Validation rules for model parameters
  */
 export const validationRules: { [key: string]: (value: any) => boolean } = {
-  temperature: (value: any) =>
-    typeof value === 'number' && value >= 0 && value <= 2,
+  temperature: (value: any) => typeof value === 'number' && value >= 0 && value <= 2,
   token_limit: (value: any) => Number.isInteger(value) && value >= 0,
   top_k: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
   top_p: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
   stream: (value: any) => typeof value === 'boolean',
   max_tokens: (value: any) => Number.isInteger(value) && value >= 0,
-  stop: (value: any) =>
-    Array.isArray(value) && value.every((v) => typeof v === 'string'),
-  frequency_penalty: (value: any) =>
-    typeof value === 'number' && value >= 0 && value <= 1,
-  presence_penalty: (value: any) =>
-    typeof value === 'number' && value >= 0 && value <= 1,
+  stop: (value: any) => Array.isArray(value) && value.every((v) => typeof v === 'string'),
+  frequency_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
+  presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
 
   ctx_len: (value: any) => Number.isInteger(value) && value >= 0,
   ngl: (value: any) => Number.isInteger(value) && value >= 0,
@@ -76,6 +70,7 @@ export const extractInferenceParams = (
     stop: undefined,
     frequency_penalty: undefined,
     presence_penalty: undefined,
+    engine: undefined,
   }
 
   const runtimeParams: ModelRuntimeParams = {}
@@ -119,11 +114,18 @@ export const extractModelLoadParams = (
     embedding: undefined,
     n_parallel: undefined,
     cpu_threads: undefined,
+    pre_prompt: undefined,
+    system_prompt: undefined,
+    ai_prompt: undefined,
+    user_prompt: undefined,
     prompt_template: undefined,
+    model_path: undefined,
     llama_model_path: undefined,
     mmproj: undefined,
+    cont_batching: undefined,
     vision_model: undefined,
     text_model: undefined,
+    engine: undefined,
   }
   const settingParams: ModelSettingParams = {}
 
diff --git a/core/src/node/api/processors/download.test.ts b/core/src/node/api/processors/download.test.ts
index 370f1746f..21d94165d 100644
--- a/core/src/node/api/processors/download.test.ts
+++ b/core/src/node/api/processors/download.test.ts
@@ -8,7 +8,8 @@ jest.mock('../../helper', () => ({
 
 jest.mock('../../helper/path', () => ({
   validatePath: jest.fn().mockReturnValue('path/to/folder'),
-  normalizeFilePath: () => process.platform === 'win32' ? 'C:\\Users\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
+  normalizeFilePath: () =>
+    process.platform === 'win32' ? 'C:\\Users\\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
 }))
 
 jest.mock(
diff --git a/core/src/node/api/processors/download.ts b/core/src/node/api/processors/download.ts
index 5db18a53a..ebeb7c299 100644
--- a/core/src/node/api/processors/download.ts
+++ b/core/src/node/api/processors/download.ts
@@ -1,6 +1,6 @@
 import { resolve, sep } from 'path'
 import { DownloadEvent } from '../../../types/api'
-import { normalizeFilePath, validatePath } from '../../helper/path'
+import { normalizeFilePath } from '../../helper/path'
 import { getJanDataFolderPath } from '../../helper'
 import { DownloadManager } from '../../helper/download'
 import { createWriteStream, renameSync } from 'fs'
@@ -37,7 +37,6 @@ export class Downloader implements Processor {
     const modelId = downloadRequest.modelId ?? array.pop() ?? ''
 
     const destination = resolve(getJanDataFolderPath(), normalizedPath)
-    validatePath(destination)
     const rq = request({ url, strictSSL, proxy })
 
     // Put request to download manager instance
@@ -50,11 +49,6 @@ export class Downloader implements Processor {
     const initialDownloadState: DownloadState = {
       modelId,
       fileName,
-      time: {
-        elapsed: 0,
-        remaining: 0,
-      },
-      speed: 0,
       percent: 0,
       size: {
         total: 0,
diff --git a/core/src/node/api/processors/fs.ts b/core/src/node/api/processors/fs.ts
index 0557d2187..ada744d53 100644
--- a/core/src/node/api/processors/fs.ts
+++ b/core/src/node/api/processors/fs.ts
@@ -1,5 +1,5 @@
 import { join, resolve } from 'path'
-import { normalizeFilePath, validatePath } from '../../helper/path'
+import { normalizeFilePath } from '../../helper/path'
 import { getJanDataFolderPath } from '../../helper'
 import { Processor } from './Processor'
 import fs from 'fs'
@@ -36,7 +36,6 @@ export class FileSystem implements Processor {
               return path
             }
             const absolutePath = resolve(path)
-            validatePath(absolutePath)
             return absolutePath
           })
         )
@@ -55,7 +54,6 @@ export class FileSystem implements Processor {
     }
 
     const absolutePath = resolve(path)
-    validatePath(absolutePath)
 
     return new Promise((resolve, reject) => {
       fs.rm(absolutePath, { recursive: true, force: true }, (err) => {
@@ -79,7 +77,6 @@ export class FileSystem implements Processor {
     }
 
     const absolutePath = resolve(path)
-    validatePath(absolutePath)
 
     return new Promise((resolve, reject) => {
       fs.mkdir(absolutePath, { recursive: true }, (err) => {
diff --git a/core/src/node/api/processors/fsExt.ts b/core/src/node/api/processors/fsExt.ts
index 4d113e1ee..846d0c26a 100644
--- a/core/src/node/api/processors/fsExt.ts
+++ b/core/src/node/api/processors/fsExt.ts
@@ -1,6 +1,6 @@
 import { basename, join } from 'path'
 import fs, { readdirSync } from 'fs'
-import { appResourcePath, normalizeFilePath, validatePath } from '../../helper/path'
+import { appResourcePath, normalizeFilePath } from '../../helper/path'
 import { defaultAppConfig, getJanDataFolderPath, getJanDataFolderPath as getPath } from '../../helper'
 import { Processor } from './Processor'
 import { FileStat } from '../../../types'
@@ -61,7 +61,6 @@ export class FSExt implements Processor {
       
       const dataBuffer = Buffer.from(data, 'base64')
       const writePath = join(getJanDataFolderPath(), normalizedPath)
-      validatePath(writePath)
       fs.writeFileSync(writePath, dataBuffer)
     } catch (err) {
       console.error(`writeFile ${path} result: ${err}`)
@@ -69,7 +68,6 @@ export class FSExt implements Processor {
   }
 
   copyFile(src: string, dest: string): Promise<void> {
-    validatePath(dest)
     return new Promise((resolve, reject) => {
       fs.copyFile(src, dest, (err) => {
         if (err) {
diff --git a/core/src/node/api/restful/common.ts b/core/src/node/api/restful/common.ts
index c8061c34a..39f7b8d8b 100644
--- a/core/src/node/api/restful/common.ts
+++ b/core/src/node/api/restful/common.ts
@@ -10,6 +10,7 @@ import {
   getMessages,
   retrieveMessage,
   updateThread,
+  models,
 } from './helper/builder'
 
 import { JanApiRouteConfiguration } from './helper/configuration'
@@ -26,9 +27,12 @@ export const commonRouter = async (app: HttpServer) => {
   // Common Routes
   // Read & Delete :: Threads | Models | Assistants
   Object.keys(JanApiRouteConfiguration).forEach((key) => {
-    app.get(`/${key}`, async (_request) =>
-      getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
-    )
+    app.get(`/${key}`, async (_req, _res) => {
+      if (key === 'models') {
+        return models(_req, _res)
+      }
+      return getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
+    })
 
     app.get(`/${key}/:id`, async (request: any) =>
       retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)
diff --git a/core/src/node/api/restful/helper/builder.test.ts b/core/src/node/api/restful/helper/builder.test.ts
index eb21e9401..f21257098 100644
--- a/core/src/node/api/restful/helper/builder.test.ts
+++ b/core/src/node/api/restful/helper/builder.test.ts
@@ -220,22 +220,6 @@ describe('builder helper functions', () => {
   })
 
   describe('chatCompletions', () => {
-    it('should return an error if model is not found', async () => {
-      const request = { body: { model: 'nonexistentModel' } }
-      const reply = { code: jest.fn().mockReturnThis(), send: jest.fn() }
-
-      await chatCompletions(request, reply)
-      expect(reply.code).toHaveBeenCalledWith(404)
-      expect(reply.send).toHaveBeenCalledWith({
-        error: {
-          message: 'The model nonexistentModel does not exist',
-          type: 'invalid_request_error',
-          param: null,
-          code: 'model_not_found',
-        },
-      })
-    })
-
     it('should return the error on status not ok', async () => {
       const request = { body: { model: 'model1' } }
       const mockSend = jest.fn()
diff --git a/core/src/node/api/restful/helper/builder.ts b/core/src/node/api/restful/helper/builder.ts
index db2000d69..c3493a8be 100644
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@@ -10,9 +10,9 @@ import {
 } from 'fs'
 import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
 import { join } from 'path'
-import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../types'
-import { getEngineConfiguration, getJanDataFolderPath } from '../../../helper'
-import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
+import { ContentType, InferenceEngine, MessageStatus, ThreadMessage } from '../../../../types'
+import { getJanDataFolderPath } from '../../../helper'
+import { CORTEX_API_URL } from './consts'
 
 // TODO: Refactor these
 export const getBuilder = async (configuration: RouteConfiguration) => {
@@ -297,57 +297,56 @@ export const downloadModel = async (
   }
 }
 
-export const chatCompletions = async (request: any, reply: any) => {
-  const modelList = await getBuilder(JanApiRouteConfiguration.models)
-  const modelId = request.body.model
-
-  const matchedModels = modelList.filter((model: Model) => model.id === modelId)
-  if (matchedModels.length === 0) {
-    const error = {
-      error: {
-        message: `The model ${request.body.model} does not exist`,
-        type: 'invalid_request_error',
-        param: null,
-        code: 'model_not_found',
-      },
-    }
-    reply.code(404).send(error)
-    return
-  }
-
-  const requestedModel = matchedModels[0]
-
-  const engineConfiguration = await getEngineConfiguration(requestedModel.engine)
-
-  let apiKey: string | undefined = undefined
-  let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL
-
-  if (engineConfiguration) {
-    apiKey = engineConfiguration.api_key
-    apiUrl = engineConfiguration.full_url ?? DEFAULT_CHAT_COMPLETION_URL
-  }
-
+/**
+ * Proxy /models to cortex
+ * @param request
+ * @param reply
+ */
+export const models = async (request: any, reply: any) => {
+  const fetch = require('node-fetch')
   const headers: Record<string, any> = {
     'Content-Type': 'application/json',
   }
 
-  if (apiKey) {
-    headers['Authorization'] = `Bearer ${apiKey}`
-    headers['api-key'] = apiKey
-  }
+  const response = await fetch(`${CORTEX_API_URL}/models`, {
+    method: request.method,
+    headers: headers,
+    body: JSON.stringify(request.body),
+  })
 
-  if (requestedModel.engine === 'openai' && request.body.stop) {
-    // openai only allows max 4 stop words
-    request.body.stop = request.body.stop.slice(0, 4)
+  if (response.status !== 200) {
+    // Forward the error response to client via reply
+    const responseBody = await response.text()
+    const responseHeaders = Object.fromEntries(response.headers)
+    reply.code(response.status).headers(responseHeaders).send(responseBody)
+  } else {
+    reply.raw.writeHead(200, {
+      'Content-Type': 'application/json',
+      'Cache-Control': 'no-cache',
+      'Connection': 'keep-alive',
+      'Access-Control-Allow-Origin': '*',
+    })
+    response.body.pipe(reply.raw)
+  }
+}
+
+/**
+ * Proxy chat completions
+ * @param request
+ * @param reply
+ */
+export const chatCompletions = async (request: any, reply: any) => {
+  const headers: Record<string, any> = {
+    'Content-Type': 'application/json',
   }
 
   // add engine for new cortex cpp engine
-  if (requestedModel.engine === 'nitro') {
-    request.body.engine = 'cortex.llamacpp'
+  if (request.body.engine === InferenceEngine.nitro) {
+    request.body.engine = InferenceEngine.cortex_llamacpp
   }
 
   const fetch = require('node-fetch')
-  const response = await fetch(apiUrl, {
+  const response = await fetch(`${CORTEX_API_URL}/chat/completions`, {
     method: 'POST',
     headers: headers,
     body: JSON.stringify(request.body),
diff --git a/core/src/node/api/restful/helper/consts.test.ts b/core/src/node/api/restful/helper/consts.test.ts
index 34d42dcf0..524f0cbeb 100644
--- a/core/src/node/api/restful/helper/consts.test.ts
+++ b/core/src/node/api/restful/helper/consts.test.ts
@@ -1,6 +1,5 @@
+import { CORTEX_DEFAULT_PORT } from './consts'
 
-import { NITRO_DEFAULT_PORT } from './consts';
-
-it('should test NITRO_DEFAULT_PORT', () => {
-  expect(NITRO_DEFAULT_PORT).toBe(3928);
-});
+it('should test CORTEX_DEFAULT_PORT', () => {
+  expect(CORTEX_DEFAULT_PORT).toBe(39291)
+})
diff --git a/core/src/node/api/restful/helper/consts.ts b/core/src/node/api/restful/helper/consts.ts
index 8d8f8e341..412d304ee 100644
--- a/core/src/node/api/restful/helper/consts.ts
+++ b/core/src/node/api/restful/helper/consts.ts
@@ -1,19 +1,7 @@
-// The PORT to use for the Nitro subprocess
-export const NITRO_DEFAULT_PORT = 3928
+export const CORTEX_DEFAULT_PORT = 39291
 
-// The HOST address to use for the Nitro subprocess
 export const LOCAL_HOST = '127.0.0.1'
 
 export const SUPPORTED_MODEL_FORMAT = '.gguf'
 
-// The URL for the Nitro subprocess
-const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
-// The URL for the Nitro subprocess to load a model
-export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
-// The URL for the Nitro subprocess to validate a model
-export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
-
-// The URL for the Nitro subprocess to kill itself
-export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
-
-export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
+export const CORTEX_API_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1`
diff --git a/core/src/node/api/restful/helper/startStopModel.test.ts b/core/src/node/api/restful/helper/startStopModel.test.ts
index a5475cc28..7c1a56cf1 100644
--- a/core/src/node/api/restful/helper/startStopModel.test.ts
+++ b/core/src/node/api/restful/helper/startStopModel.test.ts
@@ -1,16 +1,10 @@
+import { startModel } from './startStopModel'
 
+describe('startModel', () => {
+  it('test_startModel_error', async () => {
+    const modelId = 'testModelId'
+    const settingParams = undefined
 
-  import { startModel } from './startStopModel'
-  
-  describe('startModel', () => {
-    it('test_startModel_error', async () => {
-      const modelId = 'testModelId'
-      const settingParams = undefined
-  
-      const result = await startModel(modelId, settingParams)
-  
-      expect(result).toEqual({
-        error: expect.any(Error),
-      })
-    })
+    expect(startModel(modelId, settingParams)).resolves.toThrow()
   })
+})
diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts
index d1a23dca9..2e9db6d15 100644
--- a/core/src/node/api/restful/helper/startStopModel.ts
+++ b/core/src/node/api/restful/helper/startStopModel.ts
@@ -1,6 +1,5 @@
-import { join } from 'path'
-import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper'
 import { ModelSettingParams } from '../../../../types'
+import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'
 
 /**
  * Start a model
@@ -9,70 +8,18 @@ import { ModelSettingParams } from '../../../../types'
  * @returns
  */
 export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
-  try {
-    await runModel(modelId, settingParams)
-
-    return {
-      message: `Model ${modelId} started`,
-    }
-  } catch (e) {
-    return {
-      error: e,
-    }
-  }
+  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
+    method: 'POST',
+    body: JSON.stringify({ model: modelId, ...settingParams }),
+  })
 }
 
-/**
- * Run a model using installed cortex extension
- * @param model
- * @param settingParams
- */
-const runModel = async (model: string, settingParams?: ModelSettingParams): Promise<void> => {
-  const janDataFolderPath = getJanDataFolderPath()
-  const modelFolder = join(janDataFolderPath, 'models', model)
-  let module = join(
-    getJanExtensionsPath(),
-    '@janhq',
-    'inference-cortex-extension',
-    'dist',
-    'node',
-    'index.cjs'
-  )
-  // Just reuse the cortex extension implementation, don't duplicate then lost of sync
-  return import(module).then((extension) =>
-    extension
-      .loadModel(
-        {
-          modelFolder,
-          model,
-        },
-        settingParams
-      )
-      .then(() => log(`[SERVER]::Debug: Model is loaded`))
-      .then({
-        message: 'Model started',
-      })
-  )
-}
 /*
- * Stop model and kill nitro process.
+ * Stop model.
  */
-export const stopModel = async (_modelId: string) => {
-  let module = join(
-    getJanExtensionsPath(),
-    '@janhq',
-    'inference-cortex-extension',
-    'dist',
-    'node',
-    'index.cjs'
-  )
-  // Just reuse the cortex extension implementation, don't duplicate then lost of sync
-  return import(module).then((extension) =>
-    extension
-      .unloadModel()
-      .then(() => log(`[SERVER]::Debug: Model is unloaded`))
-      .then({
-        message: 'Model stopped',
-      })
-  )
+export const stopModel = async (modelId: string) => {
+  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
+    method: 'POST',
+    body: JSON.stringify({ model: modelId }),
+  })
 }
diff --git a/core/src/node/helper/path.ts b/core/src/node/helper/path.ts
index 8115383bb..4efbea463 100644
--- a/core/src/node/helper/path.ts
+++ b/core/src/node/helper/path.ts
@@ -34,18 +34,4 @@ export function appResourcePath() {
 
   // server
   return join(global.core.appPath(), '../../..')
-}
-
-export function validatePath(path: string) {
-  const appDataFolderPath = getJanDataFolderPath()
-  const resourcePath = appResourcePath()
-  const applicationSupportPath = global.core?.appPath() ?? resourcePath
-  const absolutePath = resolve(__dirname, path)
-  if (
-    ![appDataFolderPath, resourcePath, applicationSupportPath].some((whiteListedPath) =>
-      absolutePath.startsWith(whiteListedPath)
-    )
-  ) {
-    throw new Error(`Invalid path: ${absolutePath}`)
-  }
-}
+}
\ No newline at end of file
diff --git a/core/src/types/api/index.ts b/core/src/types/api/index.ts
index 8f1ff70bf..093314a15 100644
--- a/core/src/types/api/index.ts
+++ b/core/src/types/api/index.ts
@@ -72,6 +72,8 @@ export enum DownloadEvent {
   onFileDownloadUpdate = 'onFileDownloadUpdate',
   onFileDownloadError = 'onFileDownloadError',
   onFileDownloadSuccess = 'onFileDownloadSuccess',
+  onFileDownloadStopped = 'onFileDownloadStopped',
+  onFileDownloadStarted = 'onFileDownloadStarted',
   onFileUnzipSuccess = 'onFileUnzipSuccess',
 }
 
diff --git a/core/src/types/file/index.ts b/core/src/types/file/index.ts
index 9f3e32b3e..87d83c51d 100644
--- a/core/src/types/file/index.ts
+++ b/core/src/types/file/index.ts
@@ -6,8 +6,8 @@ export type FileStat = {
 export type DownloadState = {
   modelId: string // TODO: change to download id
   fileName: string
-  time: DownloadTime
-  speed: number
+  time?: DownloadTime
+  speed?: number
 
   percent: number
   size: DownloadSize
diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts
index 933c698c3..7b67a8e94 100644
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@@ -6,8 +6,8 @@ import { FileMetadata } from '../file'
  */
 export type ModelInfo = {
   id: string
-  settings: ModelSettingParams
-  parameters: ModelRuntimeParams
+  settings?: ModelSettingParams
+  parameters?: ModelRuntimeParams
   engine?: InferenceEngine
 }
 
@@ -15,7 +15,6 @@ export type ModelInfo = {
  * Represents the inference engine.
  * @stored
  */
-
 export enum InferenceEngine {
   anthropic = 'anthropic',
   mistral = 'mistral',
@@ -28,11 +27,13 @@ export enum InferenceEngine {
   nitro_tensorrt_llm = 'nitro-tensorrt-llm',
   cohere = 'cohere',
   nvidia = 'nvidia',
-  cortex_llamacpp = 'cortex.llamacpp',
-  cortex_onnx = 'cortex.onnx',
-  cortex_tensorrtllm = 'cortex.tensorrt-llm',
+  cortex = 'cortex',
+  cortex_llamacpp = 'llama-cpp',
+  cortex_onnx = 'onnxruntime',
+  cortex_tensorrtllm = 'tensorrt-llm',
 }
 
+// Represents an artifact of a model, including its filename and URL
 export type ModelArtifact = {
   filename: string
   url: string
@@ -104,6 +105,7 @@ export type Model = {
   engine: InferenceEngine
 }
 
+// Represents metadata associated with a model
 export type ModelMetadata = {
   author: string
   tags: string[]
@@ -124,14 +126,20 @@ export type ModelSettingParams = {
   n_parallel?: number
   cpu_threads?: number
   prompt_template?: string
+  pre_prompt?: string
   system_prompt?: string
   ai_prompt?: string
   user_prompt?: string
+  // path param
+  model_path?: string
+  // legacy path param
   llama_model_path?: string
+  // clip model path
   mmproj?: string
   cont_batching?: boolean
   vision_model?: boolean
   text_model?: boolean
+  engine?: boolean
 }
 
 /**
@@ -150,11 +158,12 @@ export type ModelRuntimeParams = {
   engine?: string
 }
 
+// Represents a model that failed to initialize, including the error
 export type ModelInitFailed = Model & {
   error: Error
 }
 
 /**
- * ModelFile is the model.json entity and it's file metadata
+ * ModelParams types
  */
-export type ModelFile = Model & FileMetadata
+export type ModelParams = ModelRuntimeParams | ModelSettingParams
diff --git a/core/src/types/model/modelImport.ts b/core/src/types/model/modelImport.ts
index 7c72a691b..3f0ddab10 100644
--- a/core/src/types/model/modelImport.ts
+++ b/core/src/types/model/modelImport.ts
@@ -1,4 +1,4 @@
-export type OptionType = 'SYMLINK' | 'MOVE_BINARY_FILE'
+export type OptionType = 'symlink' | 'copy'
 
 export type ModelImportOption = {
   type: OptionType
diff --git a/core/src/types/model/modelInterface.ts b/core/src/types/model/modelInterface.ts
index 08d456b7e..7ad1b136c 100644
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@@ -1,5 +1,5 @@
-import { GpuSetting } from '../miscellaneous'
-import { Model, ModelFile } from './modelEntity'
+import { Model } from './modelEntity'
+import { OptionType } from './modelImport'
 
 /**
  * Model extension for managing models.
@@ -8,38 +8,46 @@ export interface ModelInterface {
   /**
    * Downloads a model.
    * @param model - The model to download.
-   * @param network - Optional object to specify proxy/whether to ignore SSL certificates.
    * @returns A Promise that resolves when the model has been downloaded.
    */
-  downloadModel(
-    model: ModelFile,
-    gpuSettings?: GpuSetting,
-    network?: { ignoreSSL?: boolean; proxy?: string }
-  ): Promise<void>
+  pullModel(model: string, id?: string, name?: string): Promise<void>
 
   /**
    * Cancels the download of a specific model.
    * @param {string} modelId - The ID of the model to cancel the download for.
    * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
    */
-  cancelModelDownload(modelId: string): Promise<void>
+  cancelModelPull(model: string): Promise<void>
 
   /**
    * Deletes a model.
    * @param modelId - The ID of the model to delete.
    * @returns A Promise that resolves when the model has been deleted.
    */
-  deleteModel(model: ModelFile): Promise<void>
+  deleteModel(model: string): Promise<void>
 
   /**
-   * Gets a list of downloaded models.
+   * Gets downloaded models.
    * @returns A Promise that resolves with an array of downloaded models.
    */
-  getDownloadedModels(): Promise<ModelFile[]>
+  getModels(): Promise<Model[]>
 
   /**
-   * Gets a list of configured models.
-   * @returns A Promise that resolves with an array of configured models.
+   * Update a pulled model's metadata
+   * @param model - The model to update.
+   * @returns A Promise that resolves when the model has been updated.
    */
-  getConfiguredModels(): Promise<ModelFile[]>
+  updateModel(model: Partial<Model>): Promise<Model>
+
+  /**
+   * Import an existing model file.
+   * @param model id of the model to import
+   * @param modelPath - path of the model file
+   */
+  importModel(
+    model: string,
+    modePath: string,
+    name?: string,
+    optionType?: OptionType
+  ): Promise<void>
 }
diff --git a/core/src/types/monitoring/index.test.ts b/core/src/types/monitoring/index.test.ts
index 010fcb97a..56c5879e4 100644
--- a/core/src/types/monitoring/index.test.ts
+++ b/core/src/types/monitoring/index.test.ts
@@ -1,16 +1,13 @@
+import * as monitoringInterface from './monitoringInterface'
+import * as resourceInfo from './resourceInfo'
 
-import * as monitoringInterface from './monitoringInterface';
-import * as resourceInfo from './resourceInfo';
+import * as index from './index'
 
-    import * as index from './index';
-    import * as monitoringInterface from './monitoringInterface';
-    import * as resourceInfo from './resourceInfo';
-    
-    it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
-      for (const key in monitoringInterface) {
-        expect(index[key]).toBe(monitoringInterface[key]);
-      }
-      for (const key in resourceInfo) {
-        expect(index[key]).toBe(resourceInfo[key]);
-      }
-    });
+it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
+  for (const key in monitoringInterface) {
+    expect(index[key]).toBe(monitoringInterface[key])
+  }
+  for (const key in resourceInfo) {
+    expect(index[key]).toBe(resourceInfo[key])
+  }
+})
diff --git a/core/tsconfig.json b/core/tsconfig.json
index 02caf65e2..b30d65851 100644
--- a/core/tsconfig.json
+++ b/core/tsconfig.json
@@ -1,7 +1,7 @@
 {
   "compilerOptions": {
     "moduleResolution": "node",
-    "target": "es5",
+    "target": "ES2015",
     "module": "ES2020",
     "lib": ["es2015", "es2016", "es2017", "dom"],
     "strict": true,
@@ -13,7 +13,7 @@
     "declarationDir": "dist/types",
     "outDir": "dist/lib",
     "importHelpers": true,
-    "types": ["@types/jest"],
+    "types": ["@types/jest"]
   },
   "include": ["src"],
   "exclude": ["**/*.test.ts"]
diff --git a/docs/public/assets/images/changelog/jan-v0.5.5.jpeg b/docs/public/assets/images/changelog/jan-v0.5.5.jpeg
new file mode 100644
index 000000000..a0b1c4984
Binary files /dev/null and b/docs/public/assets/images/changelog/jan-v0.5.5.jpeg differ
diff --git a/docs/public/assets/images/changelog/jan-v0.5.7.gif b/docs/public/assets/images/changelog/jan-v0.5.7.gif
new file mode 100644
index 000000000..cddbf62b0
Binary files /dev/null and b/docs/public/assets/images/changelog/jan-v0.5.7.gif differ
diff --git a/docs/src/pages/changelog/2024-02-10-jan-is-more-stable.mdx b/docs/src/pages/changelog/2024-02-10-jan-is-more-stable.mdx
new file mode 100644
index 000000000..59e78a0a7
--- /dev/null
+++ b/docs/src/pages/changelog/2024-02-10-jan-is-more-stable.mdx
@@ -0,0 +1,27 @@
+---
+title: "Jan is more stable 👋"
+version: 0.5.5
+description: "Jan supports Llama 3.2 and Qwen 2.5"
+date: 2024-10-02
+ogImage: "/assets/images/changelog/jan-v0.5.5.jpeg"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+<ChangelogHeader title= "Jan is faster now" date="2024-09-01" ogImage= "/assets/images/changelog/jan-v0.5.5.jpeg"  />
+
+Highlights 🎉
+
+- Meta's Llama 3.2 and Alibaba's Qwen 2.5 added to the hub
+- Improved starter screen
+- Better local vs. cloud model navigation
+
+Fixes 💫
+
+- Solved GPU acceleration for GGUF models
+- Improved model caching & threading
+- Resolved input & toolbar overlaps
+
+Update your product or download the latest: https://jan.ai
+
+For more details, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.5.5).
\ No newline at end of file
diff --git a/docs/src/pages/changelog/2024-10-24-jan-stable.mdx b/docs/src/pages/changelog/2024-10-24-jan-stable.mdx
new file mode 100644
index 000000000..7c24d51ee
--- /dev/null
+++ b/docs/src/pages/changelog/2024-10-24-jan-stable.mdx
@@ -0,0 +1,26 @@
+---
+title: "Jan has Stable, Beta and Nightly versions"
+version: 0.5.7
+description: "This release is mostly focused on bug fixes."
+date: 2024-10-24
+ogImage: "/assets/images/changelog/jan-v0.5.7.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+<ChangelogHeader title= "Jan is faster now" date="2024-09-01" ogImage= "/assets/images/changelog/jan-v0.5.7.gif"  />
+
+Highlights 🎉
+
+- Jan has Stable, Beta and Nightly versions
+- Saving instructions for new threads is now stable
+
+Fixes 💫
+
+- Fixed broken links, hardware issues, and multi-modal download
+- Resolved text overlap, scrolling, and multi-monitor reset problems
+- Adjusted LLava model EOS token and context input
+
+Update your product or download the latest: https://jan.ai
+
+For more details, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.5.7).
\ No newline at end of file
diff --git a/docs/src/pages/docs/_assets/jan-app.png b/docs/src/pages/docs/_assets/jan-app.png
new file mode 100644
index 000000000..a45943055
Binary files /dev/null and b/docs/src/pages/docs/_assets/jan-app.png differ
diff --git a/docs/src/pages/docs/_assets/jan-display.png b/docs/src/pages/docs/_assets/jan-display.png
deleted file mode 100644
index 2fc3610ba..000000000
Binary files a/docs/src/pages/docs/_assets/jan-display.png and /dev/null differ
diff --git a/docs/src/pages/docs/index.mdx b/docs/src/pages/docs/index.mdx
index 027ad11b6..0efb1ba51 100644
--- a/docs/src/pages/docs/index.mdx
+++ b/docs/src/pages/docs/index.mdx
@@ -22,7 +22,7 @@ import FAQBox from '@/components/FaqBox'
 
 # Jan
 
-![Jan's Cover Image](./_assets/jan-display.png)
+![Jan's Cover Image](./_assets/jan-app.png)
 
 
 Jan is a ChatGPT-alternative that runs 100% offline on your [Desktop](/docs/desktop-installation). Our goal is to make it easy for a layperson[^1] to download and run LLMs and use AI with full control and [privacy](https://www.reuters.com/legal/legalindustry/privacy-paradox-with-ai-2023-10-31/).
diff --git a/electron/package.json b/electron/package.json
index feaee5e16..662a912f0 100644
--- a/electron/package.json
+++ b/electron/package.json
@@ -18,7 +18,8 @@
       "docs/**/*",
       "scripts/**/*",
       "icons/**/*",
-      "themes"
+      "themes",
+      "shared"
     ],
     "asarUnpack": [
       "pre-install",
@@ -26,7 +27,8 @@
       "docs",
       "scripts",
       "icons",
-      "themes"
+      "themes",
+      "shared"
     ],
     "publish": [
       {
@@ -111,7 +113,7 @@
     "@kirillvakalov/nut-tree__nut-js": "4.2.1-2"
   },
   "devDependencies": {
-    "@electron/notarize": "^2.1.0",
+    "@electron/notarize": "^2.5.0",
     "@playwright/test": "^1.38.1",
     "@types/npmcli__arborist": "^5.6.4",
     "@types/pacote": "^11.1.7",
diff --git a/electron/shared/.gitkeep b/electron/shared/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/electron/tests/config/fixtures.ts b/electron/tests/config/fixtures.ts
index 22d83b636..bc3f8a7d1 100644
--- a/electron/tests/config/fixtures.ts
+++ b/electron/tests/config/fixtures.ts
@@ -15,6 +15,8 @@ import {
 import { Constants } from './constants'
 import { HubPage } from '../pages/hubPage'
 import { CommonActions } from '../pages/commonActions'
+import { rmSync } from 'fs'
+import * as path from 'path'
 
 export let electronApp: ElectronApplication
 export let page: Page
@@ -103,10 +105,14 @@ export const test = base.extend<
     },
     { auto: true },
   ],
-  
 })
 
 test.beforeAll(async () => {
+  await rmSync(path.join(__dirname, '../../test-data'), {
+    recursive: true,
+    force: true,
+  })
+
   test.setTimeout(TIMEOUT)
   await setupElectron()
   await page.waitForSelector('img[alt="Jan - Logo"]', {
diff --git a/electron/tests/e2e/hub.e2e.spec.ts b/electron/tests/e2e/hub.e2e.spec.ts
index 23d4d0b6d..ef305e9c3 100644
--- a/electron/tests/e2e/hub.e2e.spec.ts
+++ b/electron/tests/e2e/hub.e2e.spec.ts
@@ -16,7 +16,8 @@ test.beforeAll(async () => {
 test('explores hub', async ({ hubPage }) => {
   await hubPage.navigateByMenu()
   await hubPage.verifyContainerVisible()
-  const useModelBtn= page.getByTestId(/^use-model-btn-.*/).first()
+  await hubPage.scrollToBottom()
+  const useModelBtn = page.getByTestId(/^use-model-btn-.*/).first()
 
   await expect(useModelBtn).toBeVisible({
     timeout: TIMEOUT,
diff --git a/electron/tests/pages/basePage.ts b/electron/tests/pages/basePage.ts
index 1817bc731..11e3ba81a 100644
--- a/electron/tests/pages/basePage.ts
+++ b/electron/tests/pages/basePage.ts
@@ -8,9 +8,8 @@ export class BasePage {
   constructor(
     protected readonly page: Page,
     readonly action: CommonActions,
-    protected containerId: string,
-  ) {
-  }
+    protected containerId: string
+  ) {}
 
   public getValue(key: string) {
     return this.action.getValue(key)
@@ -37,6 +36,12 @@ export class BasePage {
     expect(container.isVisible()).toBeTruthy()
   }
 
+  async scrollToBottom() {
+    await this.page.evaluate(() => {
+      window.scrollTo(0, document.body.scrollHeight)
+    })
+  }
+
   async waitUpdateLoader() {
     await this.isElementVisible('img[alt="Jan - Logo"]')
   }
diff --git a/electron/utils/migration.ts b/electron/utils/migration.ts
index 52ee45ed0..80851f9de 100644
--- a/electron/utils/migration.ts
+++ b/electron/utils/migration.ts
@@ -47,9 +47,6 @@ async function migrateThemes() {
   const themes = readdirSync(join(appResourcePath(), 'themes'))
   for (const theme of themes) {
     const themePath = join(appResourcePath(), 'themes', theme)
-    if (existsSync(themePath) && !lstatSync(themePath).isDirectory()) {
-      continue
-    }
     await checkAndMigrateTheme(theme, themePath)
   }
 }
@@ -64,21 +61,14 @@ async function checkAndMigrateTheme(
   )
   if (existingTheme) {
     const desTheme = join(janDataThemesFolder, existingTheme)
-    if (!existsSync(desTheme) || !lstatSync(desTheme).isDirectory()) return
-
-    const desThemeData = JSON.parse(
-      readFileSync(join(desTheme, 'theme.json'), 'utf-8')
-    )
-    const sourceThemeData = JSON.parse(
-      readFileSync(join(sourceThemePath, 'theme.json'), 'utf-8')
-    )
-    if (desThemeData.version !== sourceThemeData.version) {
-      console.debug('Updating theme', existingTheme)
-      rmdirSync(desTheme, { recursive: true })
-      cpSync(sourceThemePath, join(janDataThemesFolder, sourceThemeName), {
-        recursive: true,
-      })
+    if (!lstatSync(desTheme).isDirectory()) {
+      return
     }
+    console.debug('Updating theme', existingTheme)
+    rmdirSync(desTheme, { recursive: true })
+    cpSync(sourceThemePath, join(janDataThemesFolder, sourceThemeName), {
+      recursive: true,
+    })
   } else {
     console.debug('Adding new theme', sourceThemeName)
     cpSync(sourceThemePath, join(janDataThemesFolder, sourceThemeName), {
diff --git a/extensions/assistant-extension/src/index.ts b/extensions/assistant-extension/src/index.ts
index 12441995e..6705483d6 100644
--- a/extensions/assistant-extension/src/index.ts
+++ b/extensions/assistant-extension/src/index.ts
@@ -63,39 +63,46 @@ export default class JanAssistantExtension extends AssistantExtension {
   }
 
   async getAssistants(): Promise<Assistant[]> {
-    // get all the assistant directories
-    // get all the assistant metadata json
-    const results: Assistant[] = []
-    const allFileName: string[] = await fs.readdirSync(
-      JanAssistantExtension._homeDir
-    )
-    for (const fileName of allFileName) {
-      const filePath = await joinPath([
-        JanAssistantExtension._homeDir,
-        fileName,
-      ])
+    try {
+      // get all the assistant directories
+      // get all the assistant metadata json
+      const results: Assistant[] = []
 
-      if (!(await fs.fileStat(filePath))?.isDirectory) continue
-      const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter(
-        (file: string) => file === 'assistant.json'
+      const allFileName: string[] = await fs.readdirSync(
+        JanAssistantExtension._homeDir
       )
 
-      if (jsonFiles.length !== 1) {
-        // has more than one assistant file -> ignore
-        continue
+      for (const fileName of allFileName) {
+        const filePath = await joinPath([
+          JanAssistantExtension._homeDir,
+          fileName,
+        ])
+
+        if (!(await fs.fileStat(filePath))?.isDirectory) continue
+        const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter(
+          (file: string) => file === 'assistant.json'
+        )
+
+        if (jsonFiles.length !== 1) {
+          // has more than one assistant file -> ignore
+          continue
+        }
+
+        const content = await fs.readFileSync(
+          await joinPath([filePath, jsonFiles[0]]),
+          'utf-8'
+        )
+        const assistant: Assistant =
+          typeof content === 'object' ? content : JSON.parse(content)
+
+        results.push(assistant)
       }
 
-      const content = await fs.readFileSync(
-        await joinPath([filePath, jsonFiles[0]]),
-        'utf-8'
-      )
-      const assistant: Assistant =
-        typeof content === 'object' ? content : JSON.parse(content)
-
-      results.push(assistant)
+      return results
+    } catch (err) {
+      console.debug(err)
+      return [this.defaultAssistant]
     }
-
-    return results
   }
 
   async deleteAssistant(assistant: Assistant): Promise<void> {
@@ -112,39 +119,39 @@ export default class JanAssistantExtension extends AssistantExtension {
   }
 
   private async createJanAssistant(): Promise<void> {
-    const janAssistant: Assistant = {
-      avatar: '',
-      thread_location: undefined,
-      id: 'jan',
-      object: 'assistant',
-      created_at: Date.now(),
-      name: 'Jan',
-      description: 'A default assistant that can use all downloaded models',
-      model: '*',
-      instructions: '',
-      tools: [
-        {
-          type: 'retrieval',
-          enabled: false,
-          useTimeWeightedRetriever: false,
-          settings: {
-            top_k: 2,
-            chunk_size: 1024,
-            chunk_overlap: 64,
-            retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+    await this.createAssistant(this.defaultAssistant)
+  }
+
+  private defaultAssistant: Assistant = {
+    avatar: '',
+    thread_location: undefined,
+    id: 'jan',
+    object: 'assistant',
+    created_at: Date.now(),
+    name: 'Jan',
+    description: 'A default assistant that can use all downloaded models',
+    model: '*',
+    instructions: '',
+    tools: [
+      {
+        type: 'retrieval',
+        enabled: false,
+        useTimeWeightedRetriever: false,
+        settings: {
+          top_k: 2,
+          chunk_size: 1024,
+          chunk_overlap: 64,
+          retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
 ----------------
 CONTEXT: {CONTEXT}
 ----------------
 QUESTION: {QUESTION}
 ----------------
 Helpful Answer:`,
-          },
         },
-      ],
-      file_ids: [],
-      metadata: undefined,
-    }
-
-    await this.createAssistant(janAssistant)
+      },
+    ],
+    file_ids: [],
+    metadata: undefined,
   }
 }
diff --git a/extensions/assistant-extension/src/node/retrieval.ts b/extensions/assistant-extension/src/node/retrieval.ts
index 28d629aa8..3386029fa 100644
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@@ -10,8 +10,6 @@ import { HNSWLib } from 'langchain/vectorstores/hnswlib'
 import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
 import { readEmbeddingEngine } from './engine'
 
-import path from 'path'
-
 export class Retrieval {
   public chunkSize: number = 100
   public chunkOverlap?: number = 0
diff --git a/extensions/assistant-extension/tsconfig.json b/extensions/assistant-extension/tsconfig.json
index e425358c3..3838d1c42 100644
--- a/extensions/assistant-extension/tsconfig.json
+++ b/extensions/assistant-extension/tsconfig.json
@@ -1,7 +1,7 @@
 {
   "compilerOptions": {
     "moduleResolution": "node",
-    "target": "es5",
+    "target": "ES2015",
     "module": "ES2020",
     "lib": ["es2015", "es2016", "es2017", "dom"],
     "strict": true,
diff --git a/extensions/inference-nitro-extension/.gitignore b/extensions/inference-cortex-extension/.gitignore
similarity index 100%
rename from extensions/inference-nitro-extension/.gitignore
rename to extensions/inference-cortex-extension/.gitignore
diff --git a/extensions/inference-nitro-extension/README.md b/extensions/inference-cortex-extension/README.md
similarity index 100%
rename from extensions/inference-nitro-extension/README.md
rename to extensions/inference-cortex-extension/README.md
diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
new file mode 100644
index 000000000..e6d5cb833
--- /dev/null
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -0,0 +1 @@
+1.0.2
\ No newline at end of file
diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
new file mode 100644
index 000000000..ecff683c3
--- /dev/null
+++ b/extensions/inference-cortex-extension/download.bat
@@ -0,0 +1,41 @@
+@echo off
+set BIN_PATH=./bin
+set SHARED_PATH=./../../electron/shared
+set /p CORTEX_VERSION=<./bin/version.txt
+
+@REM Download cortex.llamacpp binaries
+set VERSION=v0.1.35
+set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
+set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
+set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
+
+call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
+call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
+call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
+
+move %BIN_PATH%\cortex-server-beta.exe %BIN_PATH%\cortex-server.exe
+del %BIN_PATH%\cortex-beta.exe
+del %BIN_PATH%\cortex.exe
+
+@REM Loop through each folder and move DLLs (excluding engine.dll)
+for %%F in (%SUBFOLDERS%) do (
+    echo Processing folder: %BIN_PATH%\%%F
+
+    @REM Move all .dll files except engine.dll
+    for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
+        if /I not "%%~nxD"=="engine.dll" (
+            move "%%D" "%BIN_PATH%"
+        )
+    )
+)
+
+echo DLL files moved successfully.
\ No newline at end of file
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
new file mode 100755
index 000000000..902a31e51
--- /dev/null
+++ b/extensions/inference-cortex-extension/download.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Read CORTEX_VERSION
+CORTEX_VERSION=$(cat ./bin/version.txt)
+CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
+CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
+# Detect platform
+OS_TYPE=$(uname)
+
+if [ "$OS_TYPE" == "Linux" ]; then
+    # Linux downloads
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
+    mv ./bin/cortex-server-beta ./bin/cortex-server
+    rm -rf ./bin/cortex
+    rm -rf ./bin/cortex-beta
+    chmod +x "./bin/cortex-server"
+
+    # Download engines for Linux
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+
+elif [ "$OS_TYPE" == "Darwin" ]; then
+    # macOS downloads
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1
+    mv ./bin/cortex-server-beta ./bin/cortex-server
+    rm -rf ./bin/cortex
+    rm -rf ./bin/cortex-beta
+    chmod +x "./bin/cortex-server"
+
+    # Download engines for macOS
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
+
+else
+    echo "Unsupported operating system: $OS_TYPE"
+    exit 1
+fi
diff --git a/extensions/inference-nitro-extension/jest.config.js b/extensions/inference-cortex-extension/jest.config.js
similarity index 100%
rename from extensions/inference-nitro-extension/jest.config.js
rename to extensions/inference-cortex-extension/jest.config.js
diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-cortex-extension/package.json
similarity index 80%
rename from extensions/inference-nitro-extension/package.json
rename to extensions/inference-cortex-extension/package.json
index 15ceaf566..d262ad5ec 100644
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@janhq/inference-cortex-extension",
   "productName": "Cortex Inference Engine",
-  "version": "1.0.20",
+  "version": "1.0.21",
   "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
   "main": "dist/index.js",
   "node": "dist/node/index.cjs.js",
@@ -10,12 +10,12 @@
   "scripts": {
     "test": "jest",
     "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux:darwin": "./download.sh",
-    "downloadnitro:win32": "download.bat",
-    "downloadnitro": "run-script-os",
-    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish": "yarn test && run-script-os"
+    "downloadcortex:linux:darwin": "./download.sh",
+    "downloadcortex:win32": "download.bat",
+    "downloadcortex": "run-script-os",
+    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish": "run-script-os"
   },
   "exports": {
     ".": "./dist/index.js",
@@ -50,6 +50,8 @@
     "cpu-instructions": "^0.0.13",
     "decompress": "^4.2.1",
     "fetch-retry": "^5.0.6",
+    "ky": "^1.7.2",
+    "p-queue": "^8.0.1",
     "rxjs": "^7.8.1",
     "tcp-port-used": "^1.0.2",
     "terminate": "2.6.1",
diff --git a/extensions/inference-nitro-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json
similarity index 100%
rename from extensions/inference-nitro-extension/resources/default_settings.json
rename to extensions/inference-cortex-extension/resources/default_settings.json
diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json
rename to extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
index 8c3029be0..f6e3d08e9 100644
--- a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
@@ -31,5 +31,5 @@
       "tags": ["34B", "Finetuned"],
       "size": 21556982144
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json
rename to extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
index 163373014..463f7eec7 100644
--- a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
@@ -31,5 +31,5 @@
       "tags": ["7B", "Finetuned"],
       "size": 5056982144
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/bakllava-1/model.json b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/bakllava-1/model.json
rename to extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
index 93f87c7f4..391c93990 100644
--- a/extensions/inference-nitro-extension/resources/models/bakllava-1/model.json
+++ b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
@@ -31,5 +31,5 @@
     "tags": ["Vision"],
     "size": 5750000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
index fb2a5f346..7bd5bf3a4 100644
--- a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
@@ -30,5 +30,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/codestral-22b/model.json
rename to extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
index f90f848dd..2cce063e6 100644
--- a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
@@ -31,6 +31,6 @@
       "tags": ["22B", "Finetuned", "Featured"],
       "size": 13341237440
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
diff --git a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/command-r-34b/model.json
rename to extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
index 6b166eea5..13518604c 100644
--- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
@@ -31,6 +31,6 @@
       "tags": ["34B", "Finetuned"],
       "size": 21500000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json
rename to extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
index 4d825cfeb..6722d253d 100644
--- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
@@ -31,5 +31,5 @@
     "tags": ["Tiny"],
     "size": 1430000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json
rename to extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
index e87d6a643..8a2e271cd 100644
--- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
@@ -31,5 +31,5 @@
     "tags": ["33B"],
     "size": 19940000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
index 837b10ce3..3278c9a81 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
@@ -31,5 +31,5 @@
     "tags": ["2B", "Finetuned", "Tiny"],
     "size": 1630000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
index b29043483..9a57f9b37 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 5330000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
index 4db74ac6f..66eaff7c2 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
@@ -37,5 +37,5 @@
     ],
     "size": 16600000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
index d85759f9b..60be558b8 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
@@ -38,5 +38,5 @@
     ],
     "size": 1710000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
index 8f6af15d9..67acaad09 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
@@ -37,5 +37,5 @@
     ],
     "size": 5760000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json
rename to extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
index 0c770b189..c91a0a73b 100644
--- a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
@@ -31,5 +31,5 @@
     "tags": ["70B", "Foundational Model"],
     "size": 43920000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
index 9efd634b5..4a28f6004 100644
--- a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["7B", "Foundational Model"],
     "size": 4080000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
index 4d84b9967..3456a185e 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
@@ -31,5 +31,5 @@
       "tags": ["8B"],
       "size": 4920000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
index a3601c8cd..718629fb0 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
@@ -34,5 +34,5 @@
       ],
       "size": 4920000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
similarity index 98%
rename from extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
index 1f4931e11..aec73719e 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
@@ -37,5 +37,5 @@
     ],
     "size": 42500000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
similarity index 98%
rename from extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
index 4b21534bc..ec9a0284b 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
@@ -37,5 +37,5 @@
     ],
     "size": 4920000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
index 5be08409d..0fe7d3316 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
@@ -31,5 +31,5 @@
     "tags": ["1B", "Featured"],
     "size": 1320000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
index aacb3f0f8..299362fbf 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
@@ -31,5 +31,5 @@
     "tags": ["3B", "Featured"],
     "size": 3420000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json
rename to extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
index 94b62ec82..3230df5b0 100644
--- a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
@@ -34,5 +34,5 @@
       ],
       "size": 1170000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/llava-13b/model.json b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llava-13b/model.json
rename to extensions/inference-cortex-extension/resources/models/llava-13b/model.json
index 6d94fd272..791c98749 100644
--- a/extensions/inference-nitro-extension/resources/models/llava-13b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
@@ -32,5 +32,5 @@
     "tags": ["Vision"],
     "size": 7870000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llava-7b/model.json b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llava-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/llava-7b/model.json
index 1fdd75247..b22899c96 100644
--- a/extensions/inference-nitro-extension/resources/models/llava-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
@@ -32,5 +32,5 @@
     "tags": ["Vision"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json
rename to extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
index 88f701466..9b568e468 100644
--- a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
@@ -32,5 +32,5 @@
     "size": 4370000000,
     "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png"
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
index 4413b415c..c711065ff 100644
--- a/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
@@ -30,5 +30,5 @@
     "tags": ["70B", "Foundational Model"],
     "size": 26440000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
index 10c17c310..1999035aa 100644
--- a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
index e743a74c9..05371b69e 100644
--- a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["Recommended", "7B", "Finetuned"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json
rename to extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
index 6459b049d..a2197dab2 100644
--- a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
@@ -34,5 +34,5 @@
       ],
       "size": 2320000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/phi3-medium/model.json
rename to extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
index 7331b2fd8..f7131ee98 100644
--- a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
@@ -34,5 +34,5 @@
       ],
       "size": 8366000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/phind-34b/model.json
rename to extensions/inference-cortex-extension/resources/models/phind-34b/model.json
index 14099a635..f6e302173 100644
--- a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json
@@ -31,5 +31,5 @@
     "tags": ["34B", "Finetuned"],
     "size": 20220000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
index 85081a605..be37cac0d 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 4770000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
index a7613982c..210848a43 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
@@ -31,6 +31,6 @@
       "tags": ["7B", "Finetuned"],
       "size": 4680000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
index 04913b874..96e4d214c 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["14B", "Featured"],
       "size": 8990000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
index 43ba30c56..20681dff4 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["32B"],
       "size": 19900000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
index 1852a0909..b741539eb 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["72B"],
       "size": 47400000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
index b47511f96..6741aef64 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["7B", "Featured"],
       "size": 4680000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json
new file mode 100644
index 000000000..a445ee2db
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json
@@ -0,0 +1,36 @@
+{
+    "sources": [
+      {
+        "filename": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf",
+        "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf"
+      }
+    ],
+    "id": "qwen2.5-coder-14b-instruct",
+    "object": "model",
+    "name": "Qwen2.5 Coder 14B Instruct Q4",
+    "version": "1.0",
+    "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 32768,
+      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+      "llama_model_path": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf",
+      "ngl": 29
+    },
+    "parameters": {
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 32768,
+      "stop": ["<|endoftext|>", "<|im_end|>"],
+      "frequency_penalty": 0,
+      "presence_penalty": 0
+    },
+    "metadata": {
+      "author": "QwenLM",
+      "tags": ["14B", "Featured"],
+      "size": 8990000000
+    },
+    "engine": "llama-cpp"
+  }
+  
\ No newline at end of file
diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json
new file mode 100644
index 000000000..cffdf03df
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json
@@ -0,0 +1,36 @@
+{
+    "sources": [
+      {
+        "filename": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf",
+        "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf"
+      }
+    ],
+    "id": "qwen2.5-coder-32b-instruct",
+    "object": "model",
+    "name": "Qwen2.5 Coder 32B Instruct Q4",
+    "version": "1.0",
+    "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 32768,
+      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+      "llama_model_path": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf",
+      "ngl": 29
+    },
+    "parameters": {
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 32768,
+      "stop": ["<|endoftext|>", "<|im_end|>"],
+      "frequency_penalty": 0,
+      "presence_penalty": 0
+    },
+    "metadata": {
+      "author": "QwenLM",
+      "tags": ["32B", "Featured"],
+      "size": 19900000000
+    },
+    "engine": "llama-cpp"
+  }
+  
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
index 2f1080b2c..9162c8a43 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["7B", "Featured"],
       "size": 4680000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json
rename to extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
index 938e03fb7..a6c84bd17 100644
--- a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
@@ -31,5 +31,5 @@
       "tags": ["3B", "Finetuned", "Tiny"],
       "size": 2970000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
index c17d1c35e..ffb32922e 100644
--- a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
@@ -30,5 +30,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json
rename to extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
index a49e79073..b6aeea3e3 100644
--- a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
@@ -31,5 +31,5 @@
     "tags": ["Tiny", "Foundation Model"],
     "size": 669000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
index 6c9aa2b89..fae5d0ca5 100644
--- a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
@@ -31,5 +31,5 @@
     "size": 4370000000,
     "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png"
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/vistral-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
index b84f2c676..46b6999a6 100644
--- a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
@@ -31,6 +31,6 @@
       "tags": ["7B", "Finetuned"],
       "size": 4410000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json
rename to extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
index 101eedfd1..cf39ad857 100644
--- a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
@@ -31,5 +31,5 @@
     "tags": ["Recommended", "13B", "Finetuned"],
     "size": 7870000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/yi-34b/model.json
rename to extensions/inference-cortex-extension/resources/models/yi-34b/model.json
index db7df9f2d..4f56650d7 100644
--- a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json
@@ -31,5 +31,5 @@
     "tags": ["34B", "Foundational Model"],
     "size": 20660000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts
similarity index 93%
rename from extensions/inference-nitro-extension/rollup.config.ts
rename to extensions/inference-cortex-extension/rollup.config.ts
index 1a8badb6f..34ad9295d 100644
--- a/extensions/inference-nitro-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@@ -49,6 +49,8 @@ const llama321bJson = require('./resources/models/llama3.2-1b-instruct/model.jso
 const llama323bJson = require('./resources/models/llama3.2-3b-instruct/model.json')
 const qwen257bJson = require('./resources/models/qwen2.5-7b-instruct/model.json')
 const qwen25coder7bJson = require('./resources/models/qwen2.5-coder-7b-instruct/model.json')
+const qwen25coder14bJson = require('./resources/models/qwen2.5-coder-14b-instruct/model.json')
+const qwen25coder32bJson = require('./resources/models/qwen2.5-coder-32b-instruct/model.json')
 const qwen2514bJson = require('./resources/models/qwen2.5-14b-instruct/model.json')
 const qwen2532bJson = require('./resources/models/qwen2.5-32b-instruct/model.json')
 const qwen2572bJson = require('./resources/models/qwen2.5-72b-instruct/model.json')
@@ -108,25 +110,16 @@ export default [
           llama323bJson,
           qwen257bJson,
           qwen25coder7bJson,
+          qwen25coder14bJson,
+          qwen25coder32bJson,
           qwen2514bJson,
           qwen2532bJson,
           qwen2572bJson,
         ]),
         NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
         DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
-        INFERENCE_URL: JSON.stringify(
-          process.env.INFERENCE_URL ||
-            'http://127.0.0.1:3928/inferences/server/chat_completion'
-        ),
-        TROUBLESHOOTING_URL: JSON.stringify(
-          'https://jan.ai/guides/troubleshooting'
-        ),
-        JAN_SERVER_INFERENCE_URL: JSON.stringify(
-          'http://localhost:1337/v1/chat/completions'
-        ),
-        CUDA_DOWNLOAD_URL: JSON.stringify(
-          'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz'
-        ),
+        CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
+        CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
       }),
       // Allow json resolution
       json(),
diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts
similarity index 70%
rename from extensions/inference-nitro-extension/src/@types/global.d.ts
rename to extensions/inference-cortex-extension/src/@types/global.d.ts
index 85c9b939f..48dbcd780 100644
--- a/extensions/inference-nitro-extension/src/@types/global.d.ts
+++ b/extensions/inference-cortex-extension/src/@types/global.d.ts
@@ -1,7 +1,6 @@
 declare const NODE: string
-declare const INFERENCE_URL: string
-declare const TROUBLESHOOTING_URL: string
-declare const JAN_SERVER_INFERENCE_URL: string
+declare const CORTEX_API_URL: string
+declare const CORTEX_SOCKET_URL: string
 declare const DEFAULT_SETTINGS: Array<any>
 declare const MODELS: Array<any>
 
diff --git a/extensions/inference-nitro-extension/src/babel.config.js b/extensions/inference-cortex-extension/src/babel.config.js
similarity index 100%
rename from extensions/inference-nitro-extension/src/babel.config.js
rename to extensions/inference-cortex-extension/src/babel.config.js
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
new file mode 100644
index 000000000..44ec423da
--- /dev/null
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -0,0 +1,236 @@
+/**
+ * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ * @version 1.0.0
+ * @module inference-extension/src/index
+ */
+
+import {
+  Model,
+  executeOnMain,
+  systemInformation,
+  joinPath,
+  LocalOAIEngine,
+  InferenceEngine,
+  getJanDataFolderPath,
+  extractModelLoadParams,
+  fs,
+  events,
+  ModelEvent
+} from '@janhq/core'
+import PQueue from 'p-queue'
+import ky from 'ky'
+
+/**
+ * Event subscription types of Downloader
+ */
+enum DownloadTypes {
+  DownloadUpdated = 'onFileDownloadUpdate',
+  DownloadError = 'onFileDownloadError',
+  DownloadSuccess = 'onFileDownloadSuccess',
+  DownloadStopped = 'onFileDownloadStopped',
+  DownloadStarted = 'onFileDownloadStarted',
+}
+
+/**
+ * A class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ */
+export default class JanInferenceCortexExtension extends LocalOAIEngine {
+  nodeModule: string = 'node'
+
+  queue = new PQueue({ concurrency: 1 })
+
+  provider: string = InferenceEngine.cortex
+
+  /**
+   * The URL for making inference requests.
+   */
+  inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
+
+  /**
+   * Socket instance of events subscription
+   */
+  socket?: WebSocket = undefined
+
+  /**
+   * Subscribes to events emitted by the @janhq/core package.
+   */
+  async onLoad() {
+    const models = MODELS as Model[]
+
+    this.registerModels(models)
+
+    super.onLoad()
+
+    // Run the process watchdog
+    const systemInfo = await systemInformation()
+    await this.clean()
+    await executeOnMain(NODE, 'run', systemInfo)
+
+    this.queue.add(() => this.healthz())
+
+    this.subscribeToEvents()
+
+    window.addEventListener('beforeunload', () => {
+      this.clean()
+    })
+  }
+
+  onUnload(): void {
+    this.clean()
+    executeOnMain(NODE, 'dispose')
+    super.onUnload()
+  }
+
+  override async loadModel(
+    model: Model & { file_path?: string }
+  ): Promise<void> {
+    if (
+      model.engine === InferenceEngine.nitro &&
+      model.settings.llama_model_path
+    ) {
+      // Legacy chat model support
+      model.settings = {
+        ...model.settings,
+        llama_model_path: await getModelFilePath(
+          model,
+          model.settings.llama_model_path
+        ),
+      }
+    } else {
+      const { llama_model_path, ...settings } = model.settings
+      model.settings = settings
+    }
+
+    if (model.engine === InferenceEngine.nitro && model.settings.mmproj) {
+      // Legacy clip vision model support
+      model.settings = {
+        ...model.settings,
+        mmproj: await getModelFilePath(model, model.settings.mmproj),
+      }
+    } else {
+      const { mmproj, ...settings } = model.settings
+      model.settings = settings
+    }
+
+    return await this.queue.add(() =>
+      ky
+        .post(`${CORTEX_API_URL}/v1/models/start`, {
+          json: {
+            ...extractModelLoadParams(model.settings),
+            model: model.id,
+            engine:
+              model.engine === InferenceEngine.nitro // Legacy model cache
+                ? InferenceEngine.cortex_llamacpp
+                : model.engine,
+          },
+        })
+        .json()
+        .catch(async (e) => {
+          throw (await e.response?.json()) ?? e
+        })
+        .then()
+    )
+  }
+
+  override async unloadModel(model: Model): Promise<void> {
+    return ky
+      .post(`${CORTEX_API_URL}/v1/models/stop`, {
+        json: { model: model.id },
+      })
+      .json()
+      .then()
+  }
+
+  /**
+   * Do health check on cortex.cpp
+   * @returns
+   */
+  healthz(): Promise<void> {
+    return ky
+      .get(`${CORTEX_API_URL}/healthz`, {
+        retry: {
+          limit: 10,
+          methods: ['get'],
+        },
+      })
+      .then(() => { })
+  }
+
+  /**
+   * Clean cortex processes
+   * @returns
+   */
+  clean(): Promise<any> {
+    return ky
+      .delete(`${CORTEX_API_URL}/processmanager/destroy`, {
+        timeout: 2000, // maximum 2 seconds
+      })
+      .catch(() => {
+        // Do nothing
+      })
+  }
+
+  /**
+   * Subscribe to cortex.cpp websocket events
+   */
+  subscribeToEvents() {
+    this.queue.add(
+      () =>
+        new Promise<void>((resolve) => {
+          this.socket = new WebSocket(`${CORTEX_SOCKET_URL}/events`)
+
+          this.socket.addEventListener('message', (event) => {
+            const data = JSON.parse(event.data)
+            const transferred = data.task.items.reduce(
+              (acc: number, cur: any) => acc + cur.downloadedBytes,
+              0
+            )
+            const total = data.task.items.reduce(
+              (acc: number, cur: any) => acc + cur.bytes,
+              0
+            )
+            const percent = total > 0 ? transferred / total : 0
+
+            events.emit(DownloadTypes[data.type as keyof typeof DownloadTypes], {
+              modelId: data.task.id,
+              percent: percent,
+              size: {
+                transferred: transferred,
+                total: total,
+              },
+            })
+            // Update models list from Hub
+            if (data.type === DownloadTypes.DownloadSuccess) {
+              // Delay for the state update from cortex.cpp
+              // Just to be sure
+              setTimeout(() => {
+                events.emit(ModelEvent.OnModelsUpdate, {})
+              }, 500)
+            }
+          })
+          resolve()
+        })
+    )
+  }
+
+}
+
+/// Legacy
+export const getModelFilePath = async (
+  model: Model,
+  file: string
+): Promise<string> => {
+  // Symlink to the model file
+  if (
+    !model.sources[0]?.url.startsWith('http') &&
+    (await fs.existsSync(model.sources[0].url))
+  ) {
+    return model.sources[0]?.url
+  }
+  return joinPath([await getJanDataFolderPath(), 'models', model.id, file])
+}
+///
diff --git a/extensions/inference-nitro-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts
similarity index 67%
rename from extensions/inference-nitro-extension/src/node/execute.test.ts
rename to extensions/inference-cortex-extension/src/node/execute.test.ts
index dfd8b35a9..622eb38af 100644
--- a/extensions/inference-nitro-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, it } from '@jest/globals'
-import { executableNitroFile } from './execute'
+import { executableCortexFile } from './execute'
 import { GpuSetting } from '@janhq/core'
 import { cpuInfo } from 'cpu-instructions'
 
@@ -27,10 +27,10 @@ jest.mock('cpu-instructions', () => ({
     cpuInfo: jest.fn(),
   },
 }))
-let mock = cpuInfo.cpuInfo as jest.Mock
-mock.mockReturnValue([])
+let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
+mockCpuInfo.mockReturnValue([])
 
-describe('test executable nitro file', () => {
+describe('test executable cortex file', () => {
   afterAll(function () {
     Object.defineProperty(process, 'platform', {
       value: originalPlatform,
@@ -44,10 +44,14 @@ describe('test executable nitro file', () => {
     Object.defineProperty(process, 'arch', {
       value: 'arm64',
     })
-    expect(executableNitroFile(testSettings)).toEqual(
+    expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`mac-arm64`),
-        executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(),
+        enginePath: expect.stringContaining(`arm64`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath:
+          originalPlatform === 'darwin'
+            ? expect.stringContaining(`cortex-server`)
+            : expect.anything(),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -55,10 +59,14 @@ describe('test executable nitro file', () => {
     Object.defineProperty(process, 'arch', {
       value: 'x64',
     })
-    expect(executableNitroFile(testSettings)).toEqual(
+    expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`mac-x64`),
-        executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
+        enginePath: expect.stringContaining(`x64`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath:
+          originalPlatform === 'darwin'
+            ? expect.stringContaining(`cortex-server`)
+            : expect.anything(),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -73,10 +81,12 @@ describe('test executable nitro file', () => {
       ...testSettings,
       run_mode: 'cpu',
     }
-    expect(executableNitroFile(settings)).toEqual(
+    mockCpuInfo.mockReturnValue(['avx'])
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`win`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        enginePath: expect.stringContaining(`avx`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -107,10 +117,12 @@ describe('test executable nitro file', () => {
         },
       ],
     }
-    expect(executableNitroFile(settings)).toEqual(
+    mockCpuInfo.mockReturnValue(['avx2'])
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`win-cuda-11-7`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        enginePath: expect.stringContaining(`avx2-cuda-11-7`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -141,10 +153,12 @@ describe('test executable nitro file', () => {
         },
       ],
     }
-    expect(executableNitroFile(settings)).toEqual(
+    mockCpuInfo.mockReturnValue(['noavx'])
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`win-cuda-12-0`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        enginePath: expect.stringContaining(`noavx-cuda-12-0`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -159,10 +173,11 @@ describe('test executable nitro file', () => {
       ...testSettings,
       run_mode: 'cpu',
     }
-    expect(executableNitroFile(settings)).toEqual(
+    mockCpuInfo.mockReturnValue(['noavx'])
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`linux`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        enginePath: expect.stringContaining(`noavx`),
+        executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -193,10 +208,11 @@ describe('test executable nitro file', () => {
         },
       ],
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`linux-cuda-11-7`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        enginePath: expect.stringContaining(`cuda-11-7`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -227,10 +243,11 @@ describe('test executable nitro file', () => {
         },
       ],
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`linux-cuda-12-0`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        enginePath: expect.stringContaining(`cuda-12-0`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -249,12 +266,13 @@ describe('test executable nitro file', () => {
 
     const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
     cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
+      mockCpuInfo.mockReturnValue([instruction])
 
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`linux-${instruction}`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          enginePath: expect.stringContaining(instruction),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server`),
 
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
@@ -273,11 +291,12 @@ describe('test executable nitro file', () => {
     }
     const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
     cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      mockCpuInfo.mockReturnValue([instruction])
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`win-${instruction}`),
-          executablePath: expect.stringContaining(`cortex-cpp.exe`),
+          enginePath: expect.stringContaining(instruction),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
@@ -312,11 +331,12 @@ describe('test executable nitro file', () => {
     }
     const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
     cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      mockCpuInfo.mockReturnValue([instruction])
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`win-cuda-12-0`),
-          executablePath: expect.stringContaining(`cortex-cpp.exe`),
+          enginePath: expect.stringContaining(`cuda-12-0`),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -351,11 +371,12 @@ describe('test executable nitro file', () => {
       ],
     }
     cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      mockCpuInfo.mockReturnValue([instruction])
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`linux-cuda-12-0`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          enginePath: expect.stringContaining(`cuda-12-0`),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -391,11 +412,12 @@ describe('test executable nitro file', () => {
       ],
     }
     cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      mockCpuInfo.mockReturnValue([instruction])
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`linux-vulkan`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          enginePath: expect.stringContaining(`vulkan`),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -417,11 +439,15 @@ describe('test executable nitro file', () => {
         ...testSettings,
         run_mode: 'cpu',
       }
-      mock.mockReturnValue([])
-      expect(executableNitroFile(settings)).toEqual(
+      mockCpuInfo.mockReturnValue([])
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`mac-x64`),
-          executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
+          enginePath: expect.stringContaining(`x64`),
+          binPath: expect.stringContaining(`bin`),
+          executablePath:
+            originalPlatform === 'darwin'
+              ? expect.stringContaining(`cortex-server`)
+              : expect.anything(),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts
similarity index 72%
rename from extensions/inference-nitro-extension/src/node/execute.ts
rename to extensions/inference-cortex-extension/src/node/execute.ts
index 595063ed4..74ffb48c6 100644
--- a/extensions/inference-nitro-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@@ -2,8 +2,9 @@ import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
 import { cpuInfo } from 'cpu-instructions'
 
-export interface NitroExecutableOptions {
+export interface CortexExecutableOptions {
   enginePath: string
+  binPath: string
   executablePath: string
   cudaVisibleDevices: string
   vkVisibleDevices: string
@@ -36,8 +37,8 @@ const os = (): string => {
     ? 'win'
     : process.platform === 'darwin'
       ? process.arch === 'arm64'
-        ? 'mac-arm64'
-        : 'mac-x64'
+        ? 'arm64'
+        : 'x64'
       : 'linux'
 }
 
@@ -66,7 +67,7 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
  * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
  * @returns
  */
-const cpuInstructions = () => {
+const cpuInstructions = (): string => {
   if (process.platform === 'darwin') return ''
   return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
     ? 'avx512'
@@ -81,29 +82,32 @@ const cpuInstructions = () => {
  * Find which executable file to run based on the current platform.
  * @returns The name of the executable file to run.
  */
-export const executableNitroFile = (
+export const executableCortexFile = (
   gpuSetting?: GpuSetting
-): NitroExecutableOptions => {
-  let engineFolder = [
-    os(),
-    ...(gpuSetting?.vulkan
-      ? []
+): CortexExecutableOptions => {
+  const cpuInstruction = cpuInstructions()
+  let engineFolder = gpuSetting?.vulkan
+    ? 'vulkan'
+    : process.platform === 'darwin'
+      ? os()
       : [
-          gpuRunMode(gpuSetting) !== 'cuda' ? cpuInstructions() : '',
-          gpuRunMode(gpuSetting),
-          cudaVersion(gpuSetting),
-        ]),
-    gpuSetting?.vulkan ? 'vulkan' : undefined,
-  ]
-    .filter((e) => !!e)
-    .join('-')
+        gpuRunMode(gpuSetting) !== 'cuda' ||
+          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
+          ? cpuInstruction
+          : 'noavx',
+        gpuRunMode(gpuSetting),
+        cudaVersion(gpuSetting),
+      ]
+        .filter((e) => !!e)
+        .join('-')
   let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
-  let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}`
-
+  let binaryName = `cortex-server${extension()}`
+  const binPath = path.join(__dirname, '..', 'bin')
   return {
-    enginePath: path.join(__dirname, '..', 'bin', engineFolder),
-    executablePath: path.join(__dirname, '..', 'bin', binaryName),
+    enginePath: path.join(binPath, engineFolder),
+    executablePath: path.join(binPath, binaryName),
+    binPath: binPath,
     cudaVisibleDevices,
     vkVisibleDevices,
   }
diff --git a/extensions/inference-cortex-extension/src/node/index.test.ts b/extensions/inference-cortex-extension/src/node/index.test.ts
new file mode 100644
index 000000000..ff9d7c2fc
--- /dev/null
+++ b/extensions/inference-cortex-extension/src/node/index.test.ts
@@ -0,0 +1,94 @@
+jest.mock('@janhq/core/node', () => ({
+  ...jest.requireActual('@janhq/core/node'),
+  getJanDataFolderPath: () => '',
+  getSystemResourceInfo: () => {
+    return {
+      cpu: {
+        cores: 1,
+        logicalCores: 1,
+        threads: 1,
+        model: 'model',
+        speed: 1,
+      },
+      memory: {
+        total: 1,
+        free: 1,
+      },
+      gpu: {
+        model: 'model',
+        memory: 1,
+        cuda: {
+          version: 'version',
+          devices: 'devices',
+        },
+        vulkan: {
+          version: 'version',
+          devices: 'devices',
+        },
+      },
+    }
+  },
+}))
+
+jest.mock('fs', () => ({
+  default: {
+    readdirSync: () => [],
+  },
+}))
+
+jest.mock('child_process', () => ({
+  exec: () => {
+    return {
+      stdout: { on: jest.fn() },
+      stderr: { on: jest.fn() },
+      on: jest.fn(),
+    }
+  },
+  spawn: () => {
+    return {
+      stdout: { on: jest.fn() },
+      stderr: { on: jest.fn() },
+      on: jest.fn(),
+      pid: '111',
+    }
+  },
+}))
+
+jest.mock('./execute', () => ({
+  executableCortexFile: () => {
+    return {
+      enginePath: 'enginePath',
+      executablePath: 'executablePath',
+      cudaVisibleDevices: 'cudaVisibleDevices',
+      vkVisibleDevices: 'vkVisibleDevices',
+    }
+  },
+}))
+
+import index from './index'
+
+describe('dispose', () => {
+  it('should dispose a model successfully on Mac', async () => {
+    Object.defineProperty(process, 'platform', {
+      value: 'darwin',
+    })
+
+    // Call the dispose function
+    const result = await index.dispose()
+
+    // Assert that the result is as expected
+    expect(result).toBeUndefined()
+  })
+
+  it('should kill the subprocess successfully on Windows', async () => {
+    Object.defineProperty(process, 'platform', {
+      value: 'win32',
+    })
+
+    // Call the killSubprocess function
+    const result = await index.dispose()
+
+    // Assert that the result is as expected
+    expect(result).toBeUndefined()
+  })
+})
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
new file mode 100644
index 000000000..3816605d2
--- /dev/null
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -0,0 +1,108 @@
+import path from 'path'
+import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
+import { executableCortexFile } from './execute'
+import { ProcessWatchdog } from './watchdog'
+import { appResourcePath } from '@janhq/core/node'
+
+// The HOST address to use for the Nitro subprocess
+const LOCAL_PORT = '39291'
+let watchdog: ProcessWatchdog | undefined = undefined
+
+/**
+ * Spawns a Nitro subprocess.
+ * @returns A promise that resolves when the Nitro subprocess is started.
+ */
+function run(systemInfo?: SystemInformation): Promise<any> {
+  log(`[CORTEX]:: Spawning cortex subprocess...`)
+
+  return new Promise<void>(async (resolve, reject) => {
+    let executableOptions = executableCortexFile(
+      // If ngl is not set or equal to 0, run on CPU with correct instructions
+      systemInfo?.gpuSetting
+        ? {
+          ...systemInfo.gpuSetting,
+          run_mode: systemInfo.gpuSetting.run_mode,
+        }
+        : undefined
+    )
+
+    // Execute the binary
+    log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
+    log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
+
+    addEnvPaths(path.join(appResourcePath(), 'shared'))
+    addEnvPaths(executableOptions.binPath)
+    addEnvPaths(executableOptions.enginePath)
+    // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH
+    // This is required for the cortex engine to run for now since dlls are not moved to the root
+    addEnvPaths(
+      path.join(executableOptions.enginePath, 'engines', 'cortex.llamacpp')
+    )
+
+    const dataFolderPath = getJanDataFolderPath()
+    watchdog = new ProcessWatchdog(
+      executableOptions.executablePath,
+      [
+        '--start-server',
+        '--port',
+        LOCAL_PORT.toString(),
+        '--config_file_path',
+        `${path.join(dataFolderPath, '.janrc')}`,
+        '--data_folder_path',
+        dataFolderPath,
+      ],
+      {
+        cwd: executableOptions.enginePath,
+        env: {
+          ...process.env,
+          ENGINE_PATH: executableOptions.enginePath,
+          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
+          // Vulkan - Support 1 device at a time for now
+          ...(executableOptions.vkVisibleDevices?.length > 0 && {
+            GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
+          }),
+        },
+      }
+    )
+    watchdog.start()
+    resolve()
+  })
+}
+
+/**
+ * Every module should have a dispose function
+ * This will be called when the extension is unloaded and should clean up any resources
+ * Also called when app is closed
+ */
+function dispose() {
+  watchdog?.terminate()
+}
+
+function addEnvPaths(dest: string) {
+  // Add engine path to the PATH and LD_LIBRARY_PATH
+  if (process.platform === 'win32') {
+    process.env.PATH = (process.env.PATH || '').concat(
+      path.delimiter,
+      dest,
+    )
+    log(`[CORTEX] PATH: ${process.env.PATH}`)
+  } else {
+    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
+      path.delimiter,
+      dest,
+    )
+    log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
+  }
+}
+
+/**
+ * Cortex process info
+ */
+export interface CortexProcessInfo {
+  isRunning: boolean
+}
+
+export default {
+  run,
+  dispose,
+}
diff --git a/extensions/inference-cortex-extension/src/node/watchdog.ts b/extensions/inference-cortex-extension/src/node/watchdog.ts
new file mode 100644
index 000000000..3e2b81d70
--- /dev/null
+++ b/extensions/inference-cortex-extension/src/node/watchdog.ts
@@ -0,0 +1,84 @@
+import { log } from '@janhq/core/node'
+import { spawn, ChildProcess } from 'child_process'
+import { EventEmitter } from 'events'
+
+interface WatchdogOptions {
+  cwd?: string
+  restartDelay?: number
+  maxRestarts?: number
+  env?: NodeJS.ProcessEnv
+}
+
+export class ProcessWatchdog extends EventEmitter {
+  private command: string
+  private args: string[]
+  private options: WatchdogOptions
+  private process: ChildProcess | null
+  private restartDelay: number
+  private maxRestarts: number
+  private restartCount: number
+  private isTerminating: boolean
+
+  constructor(command: string, args: string[], options: WatchdogOptions = {}) {
+    super()
+    this.command = command
+    this.args = args
+    this.options = options
+    this.process = null
+    this.restartDelay = options.restartDelay || 5000
+    this.maxRestarts = options.maxRestarts || 5
+    this.restartCount = 0
+    this.isTerminating = false
+  }
+
+  start(): void {
+    this.spawnProcess()
+  }
+
+  private spawnProcess(): void {
+    if (this.isTerminating) return
+
+    log(`Starting process: ${this.command} ${this.args.join(' ')}`)
+    this.process = spawn(this.command, this.args, this.options)
+
+    this.process.stdout?.on('data', (data: Buffer) => {
+      log(`Process output: ${data}`)
+      this.emit('output', data.toString())
+    })
+
+    this.process.stderr?.on('data', (data: Buffer) => {
+      log(`Process error: ${data}`)
+      this.emit('error', data.toString())
+    })
+
+    this.process.on('close', (code: number | null) => {
+      log(`Process exited with code ${code}`)
+      this.emit('close', code)
+      if (!this.isTerminating) {
+        this.restartProcess()
+      }
+    })
+  }
+
+  private restartProcess(): void {
+    if (this.restartCount < this.maxRestarts) {
+      this.restartCount++
+      log(
+        `Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})`
+      )
+      setTimeout(() => this.spawnProcess(), this.restartDelay)
+    } else {
+      log('Max restart attempts reached. Exiting watchdog.')
+      this.emit('maxRestartsReached')
+    }
+  }
+
+  terminate(): void {
+    this.isTerminating = true
+    if (this.process) {
+      log('Terminating watched process...')
+      this.process.kill()
+    }
+    this.emit('terminated')
+  }
+}
diff --git a/extensions/inference-nitro-extension/tsconfig.json b/extensions/inference-cortex-extension/tsconfig.json
similarity index 81%
rename from extensions/inference-nitro-extension/tsconfig.json
rename to extensions/inference-cortex-extension/tsconfig.json
index 19d8572b5..af00a035a 100644
--- a/extensions/inference-nitro-extension/tsconfig.json
+++ b/extensions/inference-cortex-extension/tsconfig.json
@@ -1,9 +1,8 @@
 {
   "compilerOptions": {
     "moduleResolution": "node",
-    "target": "es5",
-    "module": "ES2020",
-    "lib": ["es2015", "es2016", "es2017", "dom"],
+    "target": "es2016",
+    "module": "esnext",
     "strict": true,
     "sourceMap": true,
     "declaration": true,
diff --git a/extensions/inference-nitro-extension/bin/version.txt b/extensions/inference-nitro-extension/bin/version.txt
deleted file mode 100644
index 8f0916f76..000000000
--- a/extensions/inference-nitro-extension/bin/version.txt
+++ /dev/null
@@ -1 +0,0 @@
-0.5.0
diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat
deleted file mode 100644
index 7acd385d5..000000000
--- a/extensions/inference-nitro-extension/download.bat
+++ /dev/null
@@ -1,31 +0,0 @@
-@echo off
-set BIN_PATH=./bin
-set /p CORTEX_VERSION=<./bin/version.txt
-
-@REM Download cortex.llamacpp binaries
-set VERSION=v0.1.25
-set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.25-windows-amd64
-set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan
-
-call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-12-0/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-11-7/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/win-noavx/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx2/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx512/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/win-vulkan/engines/cortex.llamacpp
-
-@REM Loop through each folder and move DLLs (excluding engine.dll)
-for %%F in (%SUBFOLDERS%) do (
-    echo Processing folder: %BIN_PATH%\%%F
-
-    @REM Move all .dll files except engine.dll
-    for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
-        if /I not "%%~nxD"=="engine.dll" (
-            move "%%D" "%BIN_PATH%"
-        )
-    )
-)
-
-echo DLL files moved successfully.
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/download.sh b/extensions/inference-nitro-extension/download.sh
deleted file mode 100755
index 98ed8504a..000000000
--- a/extensions/inference-nitro-extension/download.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-# Read CORTEX_VERSION
-CORTEX_VERSION=$(cat ./bin/version.txt)
-CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
-
-# Detect platform
-OS_TYPE=$(uname)
-
-if [ "$OS_TYPE" == "Linux" ]; then
-    # Linux downloads
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz"  -e --strip 1 -o "./bin"
-    chmod +x "./bin/cortex-cpp"
-
-    ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64"
-
-    # Download engines for Linux
-    download "${ENGINE_DOWNLOAD_URL}-noavx.tar.gz"  -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx.tar.gz"  -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx2.tar.gz"  -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx512.tar.gz"  -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-12-0.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-11-7.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-vulkan.tar.gz"  -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1
-
-elif [ "$OS_TYPE" == "Darwin" ]; then
-    # macOS downloads
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz"  -e --strip 1 -o "./bin/mac-arm64" 1
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz"  -e --strip 1 -o "./bin/mac-x64" 1
-    chmod +x "./bin/mac-arm64/cortex-cpp"
-    chmod +x "./bin/mac-x64/cortex-cpp"
-
-    ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac"
-    # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp
-    download "${ENGINE_DOWNLOAD_URL}-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp
-
-else
-    echo "Unsupported operating system: $OS_TYPE"
-    exit 1
-fi
diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
deleted file mode 100644
index 6e825e8fd..000000000
--- a/extensions/inference-nitro-extension/src/index.ts
+++ /dev/null
@@ -1,193 +0,0 @@
-/**
- * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- * @version 1.0.0
- * @module inference-extension/src/index
- */
-
-import {
-  events,
-  executeOnMain,
-  Model,
-  ModelEvent,
-  LocalOAIEngine,
-  InstallationState,
-  systemInformation,
-  fs,
-  getJanDataFolderPath,
-  joinPath,
-  DownloadRequest,
-  baseName,
-  downloadFile,
-  DownloadState,
-  DownloadEvent,
-  ModelFile,
-} from '@janhq/core'
-
-declare const CUDA_DOWNLOAD_URL: string
-/**
- * A class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- */
-export default class JanInferenceNitroExtension extends LocalOAIEngine {
-  nodeModule: string = NODE
-  provider: string = 'nitro'
-
-  /**
-   * Checking the health for Nitro's process each 5 secs.
-   */
-  private static readonly _intervalHealthCheck = 5 * 1000
-
-  /**
-   * The interval id for the health check. Used to stop the health check.
-   */
-  private getNitroProcessHealthIntervalId: NodeJS.Timeout | undefined = undefined
-
-  /**
-   * Tracking the current state of nitro process.
-   */
-  private nitroProcessInfo: any = undefined
-
-  /**
-   * The URL for making inference requests.
-   */
-  inferenceUrl = ''
-
-  /**
-   * Subscribes to events emitted by the @janhq/core package.
-   */
-  async onLoad() {
-    this.inferenceUrl = INFERENCE_URL
-
-    // If the extension is running in the browser, use the base API URL from the core package.
-    if (!('electronAPI' in window)) {
-      this.inferenceUrl = `${window.core?.api?.baseApiUrl}/v1/chat/completions`
-    }
-
-    this.getNitroProcessHealthIntervalId = setInterval(
-      () => this.periodicallyGetNitroHealth(),
-      JanInferenceNitroExtension._intervalHealthCheck
-    )
-    const models = MODELS as unknown as Model[]
-    this.registerModels(models)
-    super.onLoad()
-
-    // Add additional dependencies PATH to the env
-    executeOnMain(NODE, 'addAdditionalDependencies', {
-      name: this.name,
-      version: this.version,
-    })
-  }
-
-  /**
-   * Periodically check for nitro process's health.
-   */
-  private async periodicallyGetNitroHealth(): Promise<void> {
-    const health = await executeOnMain(NODE, 'getCurrentNitroProcessInfo')
-
-    const isRunning = this.nitroProcessInfo?.isRunning ?? false
-    if (isRunning && health.isRunning === false) {
-      console.debug('Nitro process is stopped')
-      events.emit(ModelEvent.OnModelStopped, {})
-    }
-    this.nitroProcessInfo = health
-  }
-
-  override loadModel(model: ModelFile): Promise<void> {
-    if (model.engine !== this.provider) return Promise.resolve()
-    this.getNitroProcessHealthIntervalId = setInterval(
-      () => this.periodicallyGetNitroHealth(),
-      JanInferenceNitroExtension._intervalHealthCheck
-    )
-    return super.loadModel(model)
-  }
-
-  override async unloadModel(model?: Model): Promise<void> {
-    if (model?.engine && model.engine !== this.provider) return
-
-    // stop the periocally health check
-    if (this.getNitroProcessHealthIntervalId) {
-      clearInterval(this.getNitroProcessHealthIntervalId)
-      this.getNitroProcessHealthIntervalId = undefined
-    }
-    return super.unloadModel(model)
-  }
-
-  override async install(): Promise<void> {
-    const info = await systemInformation()
-
-    const platform = info.osInfo?.platform === 'win32' ? 'windows' : 'linux'
-    const downloadUrl = CUDA_DOWNLOAD_URL
-
-    const url = downloadUrl
-      .replace('<version>', info.gpuSetting?.cuda?.version ?? '12.4')
-      .replace('<platform>', platform)
-
-    console.debug('Downloading Cuda Toolkit Dependency: ', url)
-
-    const janDataFolderPath = await getJanDataFolderPath()
-
-    const executableFolderPath = await joinPath([
-      janDataFolderPath,
-      'engines',
-      this.name ?? 'cortex-cpp',
-      this.version ?? '1.0.0',
-    ])
-
-    if (!(await fs.existsSync(executableFolderPath))) {
-      await fs.mkdir(executableFolderPath)
-    }
-
-    const tarball = await baseName(url)
-    const tarballFullPath = await joinPath([executableFolderPath, tarball])
-
-    const downloadRequest: DownloadRequest = {
-      url,
-      localPath: tarballFullPath,
-      extensionId: this.name,
-      downloadType: 'extension',
-    }
-    downloadFile(downloadRequest)
-
-    const onFileDownloadSuccess = async (state: DownloadState) => {
-      console.log(state)
-      // if other download, ignore
-      if (state.fileName !== tarball) return
-      events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
-      await executeOnMain(
-        NODE,
-        'decompressRunner',
-        tarballFullPath,
-        executableFolderPath
-      )
-      events.emit(DownloadEvent.onFileUnzipSuccess, state)
-    }
-    events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
-  }
-
-  override async installationState(): Promise<InstallationState> {
-    const info = await systemInformation()
-    if (
-      info.gpuSetting?.run_mode === 'gpu' &&
-      !info.gpuSetting?.vulkan &&
-      info.osInfo &&
-      info.osInfo.platform !== 'darwin' &&
-      !info.gpuSetting?.cuda?.exist
-    ) {
-      const janDataFolderPath = await getJanDataFolderPath()
-
-      const executableFolderPath = await joinPath([
-        janDataFolderPath,
-        'engines',
-        this.name ?? 'cortex-cpp',
-        this.version ?? '1.0.0',
-      ])
-
-      if (!(await fs.existsSync(executableFolderPath))) return 'NotInstalled'
-      return 'Installed'
-    }
-    return 'NotRequired'
-  }
-}
diff --git a/extensions/inference-nitro-extension/src/node/index.test.ts b/extensions/inference-nitro-extension/src/node/index.test.ts
deleted file mode 100644
index 6e64b4a06..000000000
--- a/extensions/inference-nitro-extension/src/node/index.test.ts
+++ /dev/null
@@ -1,465 +0,0 @@
-jest.mock('fetch-retry', () => ({
-  default: () => () => {
-    return Promise.resolve({
-      ok: true,
-      status: 200,
-      json: () =>
-        Promise.resolve({
-          model_loaded: true,
-        }),
-      text: () => Promise.resolve(''),
-    })
-  },
-}))
-
-jest.mock('path', () => ({
-  default: {
-    isAbsolute: jest.fn(),
-    join: jest.fn(),
-    parse: () => {
-      return { dir: 'dir' }
-    },
-    delimiter: { concat: () => '' },
-  },
-}))
-
-jest.mock('decompress', () => ({
-  default: () => {
-    return Promise.resolve()
-  },
-}))
-
-jest.mock('@janhq/core/node', () => ({
-  ...jest.requireActual('@janhq/core/node'),
-  getJanDataFolderPath: () => '',
-  getSystemResourceInfo: () => {
-    return {
-      cpu: {
-        cores: 1,
-        logicalCores: 1,
-        threads: 1,
-        model: 'model',
-        speed: 1,
-      },
-      memory: {
-        total: 1,
-        free: 1,
-      },
-      gpu: {
-        model: 'model',
-        memory: 1,
-        cuda: {
-          version: 'version',
-          devices: 'devices',
-        },
-        vulkan: {
-          version: 'version',
-          devices: 'devices',
-        },
-      },
-    }
-  },
-}))
-
-jest.mock('fs', () => ({
-  default: {
-    readdirSync: () => [],
-  },
-}))
-
-jest.mock('child_process', () => ({
-  exec: () => {
-    return {
-      stdout: { on: jest.fn() },
-      stderr: { on: jest.fn() },
-      on: jest.fn(),
-    }
-  },
-  spawn: () => {
-    return {
-      stdout: { on: jest.fn() },
-      stderr: { on: jest.fn() },
-      on: jest.fn(),
-      pid: '111',
-    }
-  },
-}))
-
-jest.mock('tcp-port-used', () => ({
-  default: {
-    waitUntilFree: () => Promise.resolve(true),
-    waitUntilUsed: () => Promise.resolve(true),
-  },
-}))
-
-jest.mock('./execute', () => ({
-  executableNitroFile: () => {
-    return {
-      enginePath: 'enginePath',
-      executablePath: 'executablePath',
-      cudaVisibleDevices: 'cudaVisibleDevices',
-      vkVisibleDevices: 'vkVisibleDevices',
-    }
-  },
-}))
-
-jest.mock('terminate', () => ({
-  default: (id: String, func: Function) => {
-    console.log(id)
-    func()
-  },
-}))
-
-import * as execute from './execute'
-import index from './index'
-
-let executeMock = execute
-
-const modelInitOptions: any = {
-  modelFolder: '/path/to/model',
-  model: {
-    id: 'test',
-    name: 'test',
-    engine: 'nitro',
-    version: '0.0',
-    format: 'GGUF',
-    object: 'model',
-    sources: [],
-    created: 0,
-    description: 'test',
-    parameters: {},
-    metadata: {
-      author: '',
-      tags: [],
-      size: 0,
-    },
-    settings: {
-      prompt_template: '{prompt}',
-      llama_model_path: 'model.gguf',
-    },
-  },
-}
-
-describe('loadModel', () => {
-  it('should load a model successfully', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-
-    // Call the loadModel function
-    const result = await index.loadModel(modelInitOptions, systemInfo)
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-
-  it('should reject with an error message if the model is not a nitro model', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-    modelInitOptions.model.engine = 'not-nitro'
-    // Call the loadModel function
-    try {
-      await index.loadModel(modelInitOptions, systemInfo)
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Not a cortex model')
-    }
-    modelInitOptions.model.engine = 'nitro'
-  })
-
-  it('should reject if model load failed with an error message', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-    // Mock the fetch-retry module to return a failed response
-    jest.mock('fetch-retry', () => ({
-      default: () => () => {
-        return Promise.resolve({
-          ok: false,
-          status: 500,
-          json: () =>
-            Promise.resolve({
-              model_loaded: false,
-            }),
-          text: () => Promise.resolve('Failed to load model'),
-        })
-      },
-    }))
-
-    // Call the loadModel function
-    try {
-      await index.loadModel(modelInitOptions, systemInfo)
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Failed to load model')
-    }
-  })
-
-  it('should reject if port not available', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-
-    // Mock the tcp-port-used module to return false
-    jest.mock('tcp-port-used', () => ({
-      default: {
-        waitUntilFree: () => Promise.resolve(false),
-        waitUntilUsed: () => Promise.resolve(false),
-      },
-    }))
-
-    // Call the loadModel function
-    try {
-      await index.loadModel(modelInitOptions, systemInfo)
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Port not available')
-    }
-  })
-
-  it('should run on GPU model if ngl is set', async () => {
-    const systemInfo: any = {
-      gpuSetting: {
-        run_mode: 'gpu',
-      },
-    }
-    // Spy executableNitroFile
-    jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
-      enginePath: '',
-      executablePath: '',
-      cudaVisibleDevices: '',
-      vkVisibleDevices: '',
-    })
-
-    Object.defineProperty(process, 'platform', { value: 'win32' })
-    await index.loadModel(
-      {
-        ...modelInitOptions,
-        model: {
-          ...modelInitOptions.model,
-          settings: {
-            ...modelInitOptions.model.settings,
-            ngl: 40,
-          },
-        },
-      },
-      systemInfo
-    )
-    expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
-      run_mode: 'gpu',
-    })
-  })
-
-  it('should run on correct CPU instructions if ngl is not set', async () => {
-    const systemInfo: any = {
-      gpuSetting: {
-        run_mode: 'gpu',
-      },
-    }
-    // Spy executableNitroFile
-    jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
-      enginePath: '',
-      executablePath: '',
-      cudaVisibleDevices: '',
-      vkVisibleDevices: '',
-    })
-
-    Object.defineProperty(process, 'platform', { value: 'win32' })
-    await index.loadModel(
-      {
-        ...modelInitOptions,
-        model: {
-          ...modelInitOptions.model,
-          settings: {
-            ...modelInitOptions.model.settings,
-            ngl: undefined,
-          },
-        },
-      },
-      systemInfo
-    )
-    expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
-      run_mode: 'cpu',
-    })
-  })
-
-  it('should run on correct CPU instructions if ngl is 0', async () => {
-    const systemInfo: any = {
-      gpuSetting: {
-        run_mode: 'gpu',
-      },
-    }
-    // Spy executableNitroFile
-    jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
-      enginePath: '',
-      executablePath: '',
-      cudaVisibleDevices: '',
-      vkVisibleDevices: '',
-    })
-
-    Object.defineProperty(process, 'platform', { value: 'win32' })
-    await index.loadModel(
-      {
-        ...modelInitOptions,
-        model: {
-          ...modelInitOptions.model,
-          settings: {
-            ...modelInitOptions.model.settings,
-            ngl: 0,
-          },
-        },
-      },
-      systemInfo
-    )
-    expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
-      run_mode: 'cpu',
-    })
-  })
-})
-
-describe('unloadModel', () => {
-  it('should unload a model successfully', async () => {
-    // Call the unloadModel function
-    const result = await index.unloadModel()
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-
-  it('should reject with an error message if the model is not a nitro model', async () => {
-    // Call the unloadModel function
-    try {
-      await index.unloadModel()
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Not a cortex model')
-    }
-  })
-
-  it('should reject if model unload failed with an error message', async () => {
-    // Mock the fetch-retry module to return a failed response
-    jest.mock('fetch-retry', () => ({
-      default: () => () => {
-        return Promise.resolve({
-          ok: false,
-          status: 500,
-          json: () =>
-            Promise.resolve({
-              model_unloaded: false,
-            }),
-          text: () => Promise.resolve('Failed to unload model'),
-        })
-      },
-    }))
-
-    // Call the unloadModel function
-    try {
-      await index.unloadModel()
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Failed to unload model')
-    }
-  })
-
-  it('should reject if port not available', async () => {
-    // Mock the tcp-port-used module to return false
-    jest.mock('tcp-port-used', () => ({
-      default: {
-        waitUntilFree: () => Promise.resolve(false),
-        waitUntilUsed: () => Promise.resolve(false),
-      },
-    }))
-
-    // Call the unloadModel function
-    try {
-      await index.unloadModel()
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Port not available')
-    }
-  })
-})
-describe('dispose', () => {
-  it('should dispose a model successfully on Mac', async () => {
-    Object.defineProperty(process, 'platform', {
-      value: 'darwin',
-    })
-
-    // Call the dispose function
-    const result = await index.dispose()
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-
-  it('should kill the subprocess successfully on Windows', async () => {
-    Object.defineProperty(process, 'platform', {
-      value: 'win32',
-    })
-
-    // Call the killSubprocess function
-    const result = await index.dispose()
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-})
-
-describe('getCurrentNitroProcessInfo', () => {
-  it('should return the current nitro process info', async () => {
-    // Call the getCurrentNitroProcessInfo function
-    const result = await index.getCurrentNitroProcessInfo()
-
-    // Assert that the result is as expected
-    expect(result).toEqual({
-      isRunning: true,
-    })
-  })
-})
-
-describe('decompressRunner', () => {
-  it('should decompress the runner successfully', async () => {
-    jest.mock('decompress', () => ({
-      default: () => {
-        return Promise.resolve()
-      },
-    }))
-    // Call the decompressRunner function
-    const result = await index.decompressRunner('', '')
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-  it('should not reject if decompression failed', async () => {
-    jest.mock('decompress', () => ({
-      default: () => {
-        return Promise.reject('Failed to decompress')
-      },
-    }))
-    // Call the decompressRunner function
-    const result = await index.decompressRunner('', '')
-    expect(result).toBeUndefined()
-  })
-})
-
-describe('addAdditionalDependencies', () => {
-  it('should add additional dependencies successfully', async () => {
-    // Call the addAdditionalDependencies function
-    const result = await index.addAdditionalDependencies({
-      name: 'name',
-      version: 'version',
-    })
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-})
diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts
deleted file mode 100644
index 98ca4572f..000000000
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ /dev/null
@@ -1,501 +0,0 @@
-import fs from 'fs'
-import path from 'path'
-import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
-import tcpPortUsed from 'tcp-port-used'
-import fetchRT from 'fetch-retry'
-import {
-  log,
-  getSystemResourceInfo,
-  InferenceEngine,
-  ModelSettingParams,
-  PromptTemplate,
-  SystemInformation,
-  getJanDataFolderPath,
-  ModelFile,
-} from '@janhq/core/node'
-import { executableNitroFile } from './execute'
-import terminate from 'terminate'
-import decompress from 'decompress'
-
-// Polyfill fetch with retry
-const fetchRetry = fetchRT(fetch)
-
-/**
- * The response object for model init operation.
- */
-interface ModelInitOptions {
-  modelFolder: string
-  model: ModelFile
-}
-// The PORT to use for the Nitro subprocess
-const PORT = 3928
-// The HOST address to use for the Nitro subprocess
-const LOCAL_HOST = '127.0.0.1'
-// The URL for the Nitro subprocess
-const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
-// The URL for the Nitro subprocess to load a model
-const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
-// The URL for the Nitro subprocess to validate a model
-const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
-// The URL for the Nitro subprocess to kill itself
-const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
-
-const NITRO_PORT_FREE_CHECK_INTERVAL = 100
-
-// The supported model format
-// TODO: Should be an array to support more models
-const SUPPORTED_MODEL_FORMAT = '.gguf'
-
-// The subprocess instance for Nitro
-let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
-
-// The current model settings
-let currentSettings: (ModelSettingParams & { model?: string }) | undefined =
-  undefined
-
-/**
- * Stops a Nitro subprocess.
- * @param wrapper - The model wrapper.
- * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
- */
-function unloadModel(): Promise<void> {
-  return killSubprocess()
-}
-
-/**
- * Initializes a Nitro subprocess to load a machine learning model.
- * @param wrapper - The model wrapper.
- * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
- * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
- */
-async function loadModel(
-  params: ModelInitOptions,
-  systemInfo?: SystemInformation
-): Promise<ModelOperationResponse | void> {
-  if (params.model.engine !== InferenceEngine.nitro) {
-    // Not a nitro model
-    return Promise.resolve()
-  }
-
-  if (params.model.engine !== InferenceEngine.nitro) {
-    return Promise.reject('Not a cortex model')
-  } else {
-    const nitroResourceProbe = await getSystemResourceInfo()
-    // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
-    if (params.model.settings.prompt_template) {
-      const promptTemplate = params.model.settings.prompt_template
-      const prompt = promptTemplateConverter(promptTemplate)
-      if (prompt?.error) {
-        return Promise.reject(prompt.error)
-      }
-      params.model.settings.system_prompt = prompt.system_prompt
-      params.model.settings.user_prompt = prompt.user_prompt
-      params.model.settings.ai_prompt = prompt.ai_prompt
-    }
-
-    // modelFolder is the absolute path to the running model folder
-    // e.g. ~/jan/models/llama-2
-    let modelFolder = params.modelFolder
-
-    let llama_model_path = params.model.settings.llama_model_path
-
-    // Absolute model path support
-    if (
-      params.model?.sources.length &&
-      params.model.sources.every((e) => fs.existsSync(e.url))
-    ) {
-      llama_model_path =
-        params.model.sources.length === 1
-          ? params.model.sources[0].url
-          : params.model.sources.find((e) =>
-              e.url.includes(llama_model_path ?? params.model.id)
-            )?.url
-    }
-
-    if (!llama_model_path || !path.isAbsolute(llama_model_path)) {
-      // Look for GGUF model file
-      const modelFiles: string[] = fs.readdirSync(modelFolder)
-      const ggufBinFile = modelFiles.find(
-        (file) =>
-          // 1. Prioritize llama_model_path (predefined)
-          (llama_model_path && file === llama_model_path) ||
-          // 2. Prioritize GGUF File (manual import)
-          file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) ||
-          // 3. Fallback Model ID (for backward compatibility)
-          file === params.model.id
-      )
-      if (ggufBinFile) llama_model_path = path.join(modelFolder, ggufBinFile)
-    }
-
-    // Look for absolute source path for single model
-
-    if (!llama_model_path) return Promise.reject('No GGUF model file found')
-
-    currentSettings = {
-      cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
-      // model.settings can override the default settings
-      ...params.model.settings,
-      llama_model_path,
-      model: params.model.id,
-      // This is critical and requires real CPU physical core count (or performance core)
-      ...(params.model.settings.mmproj && {
-        mmproj: path.isAbsolute(params.model.settings.mmproj)
-          ? params.model.settings.mmproj
-          : path.join(modelFolder, params.model.settings.mmproj),
-      }),
-    }
-    return runNitroAndLoadModel(params.model.id, systemInfo)
-  }
-}
-
-/**
- * 1. Spawn Nitro process
- * 2. Load model into Nitro subprocess
- * 3. Validate model status
- * @returns
- */
-async function runNitroAndLoadModel(
-  modelId: string,
-  systemInfo?: SystemInformation
-) {
-  // Gather system information for CPU physical cores and memory
-  return killSubprocess()
-    .then(() =>
-      tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
-    )
-    .then(() => spawnNitroProcess(systemInfo))
-    .then(() => loadLLMModel(currentSettings))
-    .then(() => validateModelStatus(modelId))
-    .catch((err) => {
-      // TODO: Broadcast error so app could display proper error message
-      log(`[CORTEX]::Error: ${err}`)
-      return { error: err }
-    })
-}
-
-/**
- * Parse prompt template into agrs settings
- * @param promptTemplate Template as string
- * @returns
- */
-function promptTemplateConverter(promptTemplate: string): PromptTemplate {
-  // Split the string using the markers
-  const systemMarker = '{system_message}'
-  const promptMarker = '{prompt}'
-
-  if (
-    promptTemplate.includes(systemMarker) &&
-    promptTemplate.includes(promptMarker)
-  ) {
-    // Find the indices of the markers
-    const systemIndex = promptTemplate.indexOf(systemMarker)
-    const promptIndex = promptTemplate.indexOf(promptMarker)
-
-    // Extract the parts of the string
-    const system_prompt = promptTemplate.substring(0, systemIndex)
-    const user_prompt = promptTemplate.substring(
-      systemIndex + systemMarker.length,
-      promptIndex
-    )
-    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
-    )
-
-    // Return the split parts
-    return { system_prompt, user_prompt, ai_prompt }
-  } else if (promptTemplate.includes(promptMarker)) {
-    // Extract the parts of the string for the case where only promptMarker is present
-    const promptIndex = promptTemplate.indexOf(promptMarker)
-    const user_prompt = promptTemplate.substring(0, promptIndex)
-    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
-    )
-
-    // Return the split parts
-    return { user_prompt, ai_prompt }
-  }
-
-  // Return an error if none of the conditions are met
-  return { error: 'Cannot split prompt template' }
-}
-
-/**
- * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
- * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
- */
-function loadLLMModel(settings: any): Promise<Response> {
-  if (!settings?.ngl) {
-    settings.ngl = 100
-  }
-  log(`[CORTEX]:: Loading model with params ${JSON.stringify(settings)}`)
-  return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-    },
-    body: JSON.stringify(settings),
-    retries: 3,
-    retryDelay: 300,
-  })
-    .then((res) => {
-      log(
-        `[CORTEX]:: Load model success with response ${JSON.stringify(
-          res
-        )}`
-      )
-      return Promise.resolve(res)
-    })
-    .catch((err) => {
-      log(`[CORTEX]::Error: Load model failed with error ${err}`)
-      return Promise.reject(err)
-    })
-}
-
-/**
- * Validates the status of a model.
- * @returns {Promise<ModelOperationResponse>} A promise that resolves to an object.
- * If the model is loaded successfully, the object is empty.
- * If the model is not loaded successfully, the object contains an error message.
- */
-async function validateModelStatus(modelId: string): Promise<void> {
-  // Send a GET request to the validation URL.
-  // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
-  log(`[CORTEX]:: Validating model ${modelId}`)
-  return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
-    method: 'POST',
-    body: JSON.stringify({
-      model: modelId,
-      // TODO: force to use cortex llamacpp by default
-      engine: 'cortex.llamacpp',
-    }),
-    headers: {
-      'Content-Type': 'application/json',
-    },
-    retries: 5,
-    retryDelay: 300,
-  }).then(async (res: Response) => {
-    log(
-      `[CORTEX]:: Validate model state with response ${JSON.stringify(
-        res.status
-      )}`
-    )
-    // If the response is OK, check model_loaded status.
-    if (res.ok) {
-      const body = await res.json()
-      // If the model is loaded, return an empty object.
-      // Otherwise, return an object with an error message.
-      if (body.model_loaded) {
-        log(
-          `[CORTEX]:: Validate model state success with response ${JSON.stringify(
-            body
-          )}`
-        )
-        return Promise.resolve()
-      }
-    }
-    const errorBody = await res.text()
-    log(
-      `[CORTEX]:: Validate model state failed with response ${errorBody} and status is ${JSON.stringify(
-        res.statusText
-      )}`
-    )
-    return Promise.reject('Validate model status failed')
-  })
-}
-
-/**
- * Terminates the Nitro subprocess.
- * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
- */
-async function killSubprocess(): Promise<void> {
-  const controller = new AbortController()
-  setTimeout(() => controller.abort(), 5000)
-  log(`[CORTEX]:: Request to kill cortex`)
-
-  const killRequest = () => {
-    return fetch(NITRO_HTTP_KILL_URL, {
-      method: 'DELETE',
-      signal: controller.signal,
-    })
-      .catch(() => {}) // Do nothing with this attempt
-      .then(() =>
-        tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
-      )
-      .then(() => log(`[CORTEX]:: cortex process is terminated`))
-      .catch((err) => {
-        log(
-          `[CORTEX]:: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
-        )
-        throw 'PORT_NOT_AVAILABLE'
-      })
-  }
-
-  if (subprocess?.pid && process.platform !== 'darwin') {
-    log(`[CORTEX]:: Killing PID ${subprocess.pid}`)
-    const pid = subprocess.pid
-    return new Promise((resolve, reject) => {
-      terminate(pid, function (err) {
-        if (err) {
-          log('[CORTEX]::Failed to kill PID - sending request to kill')
-          killRequest().then(resolve).catch(reject)
-        } else {
-          tcpPortUsed
-            .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
-            .then(() => log(`[CORTEX]:: cortex process is terminated`))
-            .then(() => resolve())
-            .catch(() => {
-              log(
-                '[CORTEX]::Failed to kill PID (Port check timeout) - sending request to kill'
-              )
-              killRequest().then(resolve).catch(reject)
-            })
-        }
-      })
-    })
-  } else {
-    return killRequest()
-  }
-}
-
-/**
- * Spawns a Nitro subprocess.
- * @returns A promise that resolves when the Nitro subprocess is started.
- */
-function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
-  log(`[CORTEX]:: Spawning cortex subprocess...`)
-
-  return new Promise<void>(async (resolve, reject) => {
-    let executableOptions = executableNitroFile(
-      // If ngl is not set or equal to 0, run on CPU with correct instructions
-      systemInfo?.gpuSetting
-        ? {
-            ...systemInfo.gpuSetting,
-            run_mode:
-              currentSettings?.ngl === undefined || currentSettings.ngl === 0
-                ? 'cpu'
-                : systemInfo.gpuSetting.run_mode,
-          }
-        : undefined
-    )
-
-    const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
-    // Execute the binary
-    log(
-      `[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
-    )
-    log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`)
-
-    // Add engine path to the PATH and LD_LIBRARY_PATH
-    process.env.PATH = (process.env.PATH || '').concat(
-      path.delimiter,
-      executableOptions.enginePath
-    )
-    log(`[CORTEX] PATH: ${process.env.PATH}`)
-    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
-      path.delimiter,
-      executableOptions.enginePath
-    )
-
-    subprocess = spawn(
-      executableOptions.executablePath,
-      ['1', LOCAL_HOST, PORT.toString()],
-      {
-        cwd: path.join(path.parse(executableOptions.executablePath).dir),
-        env: {
-          ...process.env,
-          ENGINE_PATH: executableOptions.enginePath,
-          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
-          // Vulkan - Support 1 device at a time for now
-          ...(executableOptions.vkVisibleDevices?.length > 0 && {
-            GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
-          }),
-        },
-      }
-    )
-
-    // Handle subprocess output
-    subprocess.stdout.on('data', (data: any) => {
-      log(`[CORTEX]:: ${data}`)
-    })
-
-    subprocess.stderr.on('data', (data: any) => {
-      log(`[CORTEX]::Error: ${data}`)
-    })
-
-    subprocess.on('close', (code: any) => {
-      log(`[CORTEX]:: cortex exited with code: ${code}`)
-      subprocess = undefined
-      reject(`child process exited with code ${code}`)
-    })
-
-    tcpPortUsed
-      .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
-      .then(() => {
-        log(`[CORTEX]:: cortex is ready`)
-        resolve()
-      })
-  })
-}
-
-/**
- * Every module should have a dispose function
- * This will be called when the extension is unloaded and should clean up any resources
- * Also called when app is closed
- */
-function dispose() {
-  // clean other registered resources here
-  killSubprocess()
-}
-
-/**
- * Nitro process info
- */
-export interface NitroProcessInfo {
-  isRunning: boolean
-}
-
-/**
- * Retrieve current nitro process
- */
-const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
-  return {
-    isRunning: subprocess != null,
-  }
-}
-
-const addAdditionalDependencies = (data: { name: string; version: string }) => {
-  log(
-    `[CORTEX]::Debug: Adding additional dependencies for ${data.name} ${data.version}`
-  )
-  const additionalPath = path.delimiter.concat(
-    path.join(getJanDataFolderPath(), 'engines', data.name, data.version)
-  )
-  // Set the updated PATH
-  process.env.PATH = (process.env.PATH || '').concat(
-    path.delimiter,
-    additionalPath
-  )
-  process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
-    path.delimiter,
-    additionalPath
-  )
-}
-
-const decompressRunner = async (zipPath: string, output: string) => {
-  console.debug(`Decompressing ${zipPath} to ${output}...`)
-  try {
-    const files = await decompress(zipPath, output)
-    console.debug('Decompress finished!', files)
-  } catch (err) {
-    console.error(`Decompress ${zipPath} failed: ${err}`)
-  }
-}
-
-export default {
-  loadModel,
-  unloadModel,
-  dispose,
-  getCurrentNitroProcessInfo,
-  addAdditionalDependencies,
-  decompressRunner,
-}
diff --git a/extensions/inference-openai-extension/package.json b/extensions/inference-openai-extension/package.json
index cd776257c..50fe12349 100644
--- a/extensions/inference-openai-extension/package.json
+++ b/extensions/inference-openai-extension/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@janhq/inference-openai-extension",
   "productName": "OpenAI Inference Engine",
-  "version": "1.0.2",
+  "version": "1.0.3",
   "description": "This extension enables OpenAI chat completion API calls",
   "main": "dist/index.js",
   "module": "dist/module.js",
diff --git a/extensions/inference-openai-extension/resources/models.json b/extensions/inference-openai-extension/resources/models.json
index 72517d540..124e123b9 100644
--- a/extensions/inference-openai-extension/resources/models.json
+++ b/extensions/inference-openai-extension/resources/models.json
@@ -23,40 +23,7 @@
     },
     "metadata": {
       "author": "OpenAI",
-      "tags": [
-        "General"
-      ]
-    },
-    "engine": "openai"
-  },
-  {
-    "sources": [
-      {
-        "url": "https://openai.com"
-      }
-    ],
-    "id": "gpt-4-vision-preview",
-    "object": "model",
-    "name": "OpenAI GPT 4 with Vision (Preview)",
-    "version": "1.1",
-    "description": "OpenAI GPT-4 Vision model features vision understanding capabilities",
-    "format": "api",
-    "settings": {
-      "vision_model": true,
-      "textModel": false
-    },
-    "parameters": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true
-    },
-    "metadata": {
-      "author": "OpenAI",
-      "tags": [
-        "General",
-        "Vision"
-      ]
+      "tags": ["General"]
     },
     "engine": "openai"
   },
@@ -84,9 +51,7 @@
     },
     "metadata": {
       "author": "OpenAI",
-      "tags": [
-        "General"
-      ]
+      "tags": ["General"]
     },
     "engine": "openai"
   },
@@ -114,9 +79,7 @@
     },
     "metadata": {
       "author": "OpenAI",
-      "tags": [
-        "General"
-      ]
+      "tags": ["General"]
     },
     "engine": "openai"
   },
@@ -144,9 +107,7 @@
     },
     "metadata": {
       "author": "OpenAI",
-      "tags": [
-        "General"
-      ]
+      "tags": ["General"]
     },
     "engine": "openai"
   },
@@ -174,9 +135,7 @@
     },
     "metadata": {
       "author": "OpenAI",
-      "tags": [
-        "General"
-      ]
+      "tags": ["General"]
     },
     "engine": "openai"
   }
diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 44c243adf..64880b678 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -70,16 +70,17 @@ export default class JanInferenceOpenAIExtension extends RemoteOAIEngine {
    * Tranform the payload before sending it to the inference endpoint.
    * The new preview models such as o1-mini and o1-preview replaced max_tokens by max_completion_tokens parameter.
    * Others do not.
-   * @param payload 
-   * @returns 
+   * @param payload
+   * @returns
    */
   transformPayload = (payload: OpenAIPayloadType): OpenAIPayloadType => {
     // Transform the payload for preview models
     if (this.previewModels.includes(payload.model)) {
-      const { max_tokens, ...params } = payload
+      const { max_tokens, temperature, top_p, stop, ...params } = payload
       return {
         ...params,
         max_completion_tokens: max_tokens,
+        stream: false // o1 only support stream = false
       }
     }
     // Pass through for non-preview models
diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json
index 3a694e5a0..bd834454a 100644
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@@ -4,7 +4,6 @@
   "version": "1.0.34",
   "description": "Model Management Extension provides model exploration and seamless downloads",
   "main": "dist/index.js",
-  "node": "dist/node/index.cjs.js",
   "author": "Jan <service@jan.ai>",
   "license": "AGPL-3.0",
   "scripts": {
@@ -36,15 +35,9 @@
     "README.md"
   ],
   "dependencies": {
-    "@huggingface/gguf": "^0.0.11",
-    "@huggingface/jinja": "^0.3.0",
     "@janhq/core": "file:../../core",
-    "hyllama": "^0.2.2",
-    "python-shell": "^5.0.0"
+    "ky": "^1.7.2",
+    "p-queue": "^8.0.1"
   },
-  "bundleDependencies": [
-    "hyllama",
-    "@huggingface/gguf",
-    "@huggingface/jinja"
-  ]
+  "bundleDependencies": []
 }
diff --git a/extensions/model-extension/resources/default-model.json b/extensions/model-extension/resources/default-model.json
deleted file mode 100644
index c02008cd6..000000000
--- a/extensions/model-extension/resources/default-model.json
+++ /dev/null
@@ -1,36 +0,0 @@
-{
-  "object": "model",
-  "version": "1.0",
-  "format": "gguf",
-  "sources": [
-    {
-      "url": "N/A",
-      "filename": "N/A"
-    }
-  ],
-  "id": "N/A",
-  "name": "N/A",
-  "created": 0,
-  "description": "User self import model",
-  "settings": {
-    "ctx_len": 2048,
-    "embedding": false,
-    "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:",
-    "llama_model_path": "N/A"
-  },
-  "parameters": {
-    "temperature": 0.7,
-    "top_p": 0.95,
-    "stream": true,
-    "max_tokens": 2048,
-    "stop": ["<|END_OF_TURN_TOKEN|>", "<end_of_turn>", "[/INST]", "<|end_of_text|>", "<|eot_id|>", "<|im_end|>", "<|end|>"],
-    "frequency_penalty": 0,
-    "presence_penalty": 0
-  },
-  "metadata": {
-    "author": "User",
-    "tags": [],
-    "size": 0
-  },
-  "engine": "nitro"
-}
diff --git a/extensions/model-extension/rollup.config.ts b/extensions/model-extension/rollup.config.ts
index d36d8ffac..64e62480f 100644
--- a/extensions/model-extension/rollup.config.ts
+++ b/extensions/model-extension/rollup.config.ts
@@ -6,7 +6,6 @@ import replace from '@rollup/plugin-replace'
 import commonjs from '@rollup/plugin-commonjs'
 const settingJson = require('./resources/settings.json')
 const packageJson = require('./package.json')
-const defaultModelJson = require('./resources/default-model.json')
 
 export default [
   {
@@ -20,17 +19,20 @@ export default [
     plugins: [
       replace({
         preventAssignment: true,
-        DEFAULT_MODEL: JSON.stringify(defaultModelJson),
         SETTINGS: JSON.stringify(settingJson),
-        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
+        API_URL: JSON.stringify('http://127.0.0.1:39291'),
+        SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
       }),
       // Allow json resolution
       json(),
       //     Compile TypeScript files
-      typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }),
+      typescript({
+        useTsconfigDeclarationDir: true,
+        exclude: ['**/__tests__', '**/*.test.ts'],
+      }),
       // Compile TypeScript files
       // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
-      // commonjs(),
+      commonjs(),
       // Allow node_modules resolution, so you can use 'external' to control
       // which external modules to include in the bundle
       // https://github.com/rollup/rollup-plugin-node-resolve#usage
@@ -39,39 +41,6 @@ export default [
         browser: true,
       }),
 
-      // Resolve source maps to the original source
-      sourceMaps(),
-    ],
-  },
-  {
-    input: `src/node/index.ts`,
-    output: [
-      {
-        file: 'dist/node/index.cjs.js',
-        format: 'cjs',
-        sourcemap: true,
-        inlineDynamicImports: true,
-      },
-    ],
-    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
-    external: ['@janhq/core/node'],
-    watch: {
-      include: 'src/node/**',
-    },
-    plugins: [
-      // Allow json resolution
-      json(),
-      // Compile TypeScript files
-      typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }),
-      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
-      commonjs(),
-      // Allow node_modules resolution, so you can use 'external' to control
-      // which external modules to include in the bundle
-      // https://github.com/rollup/rollup-plugin-node-resolve#usage
-      resolve({
-        extensions: ['.ts', '.js', '.json'],
-      }),
-
       // Resolve source maps to the original source
       sourceMaps(),
     ],
diff --git a/extensions/model-extension/src/@types/InvalidHostError.ts b/extensions/model-extension/src/@types/InvalidHostError.ts
deleted file mode 100644
index 47262206e..000000000
--- a/extensions/model-extension/src/@types/InvalidHostError.ts
+++ /dev/null
@@ -1,6 +0,0 @@
-export class InvalidHostError extends Error {
-  constructor(message: string) {
-    super(message)
-    this.name = 'InvalidHostError'
-  }
-}
diff --git a/extensions/model-extension/src/@types/NotSupportModelError.ts b/extensions/model-extension/src/@types/NotSupportModelError.ts
deleted file mode 100644
index 0a1946176..000000000
--- a/extensions/model-extension/src/@types/NotSupportModelError.ts
+++ /dev/null
@@ -1,6 +0,0 @@
-export class NotSupportedModelError extends Error {
-  constructor(message: string) {
-    super(message)
-    this.name = 'NotSupportedModelError'
-  }
-}
diff --git a/extensions/model-extension/src/@types/global.d.ts b/extensions/model-extension/src/@types/global.d.ts
index 3878d4bf2..bff3811e3 100644
--- a/extensions/model-extension/src/@types/global.d.ts
+++ b/extensions/model-extension/src/@types/global.d.ts
@@ -1,7 +1,8 @@
 export {}
 declare global {
-  declare const DEFAULT_MODEL: object
   declare const NODE: string
+  declare const API_URL: string
+  declare const SOCKET_URL: string
 
   interface Core {
     api: APIFunctions
diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
new file mode 100644
index 000000000..7a65e8e3f
--- /dev/null
+++ b/extensions/model-extension/src/cortex.ts
@@ -0,0 +1,185 @@
+import PQueue from 'p-queue'
+import ky from 'ky'
+import {  extractModelLoadParams, Model } from '@janhq/core'
+import { extractInferenceParams } from '@janhq/core'
+/**
+ * cortex.cpp Model APIs interface
+ */
+interface ICortexAPI {
+  getModel(model: string): Promise<Model>
+  getModels(): Promise<Model[]>
+  pullModel(model: string, id?: string, name?: string): Promise<void>
+  importModel(
+    path: string,
+    modelPath: string,
+    name?: string,
+    option?: string
+  ): Promise<void>
+  deleteModel(model: string): Promise<void>
+  updateModel(model: object): Promise<void>
+  cancelModelPull(model: string): Promise<void>
+}
+
+type ModelList = {
+  data: any[]
+}
+
+export class CortexAPI implements ICortexAPI {
+  queue = new PQueue({ concurrency: 1 })
+
+  constructor() {
+    this.queue.add(() => this.healthz())
+  }
+
+  /**
+   * Fetches a model detail from cortex.cpp
+   * @param model
+   * @returns
+   */
+  getModel(model: string): Promise<any> {
+    return this.queue.add(() =>
+      ky
+        .get(`${API_URL}/v1/models/${model}`)
+        .json()
+        .then((e) => this.transformModel(e))
+    )
+  }
+
+  /**
+   * Fetches models list from cortex.cpp
+   * @param model
+   * @returns
+   */
+  getModels(): Promise<Model[]> {
+    return this.queue
+      .add(() => ky.get(`${API_URL}/models`).json<ModelList>())
+      .then((e) =>
+        typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : []
+      )
+  }
+
+  /**
+   * Pulls a model from HuggingFace via cortex.cpp
+   * @param model
+   * @returns
+   */
+  pullModel(model: string, id?: string, name?: string): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .post(`${API_URL}/v1/models/pull`, { json: { model, id, name } })
+        .json()
+        .catch(async (e) => {
+          throw (await e.response?.json()) ?? e
+        })
+        .then()
+    )
+  }
+
+  /**
+   * Imports a model from a local path via cortex.cpp
+   * @param model
+   * @returns
+   */
+  importModel(
+    model: string,
+    modelPath: string,
+    name?: string,
+    option?: string
+  ): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .post(`${API_URL}/v1/models/import`, {
+          json: { model, modelPath, name, option },
+        })
+        .json()
+        .catch((e) => console.debug(e)) // Ignore error
+        .then()
+    )
+  }
+
+  /**
+   * Deletes a model from cortex.cpp
+   * @param model
+   * @returns
+   */
+  deleteModel(model: string): Promise<void> {
+    return this.queue.add(() =>
+      ky.delete(`${API_URL}/models/${model}`).json().then()
+    )
+  }
+
+  /**
+   * Update a model in cortex.cpp
+   * @param model
+   * @returns
+   */
+  updateModel(model: Partial<Model>): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .patch(`${API_URL}/v1/models/${model.id}`, { json: { ...model } })
+        .json()
+        .then()
+    )
+  }
+
+  /**
+   * Cancel model pull in cortex.cpp
+   * @param model
+   * @returns
+   */
+  cancelModelPull(model: string): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .delete(`${API_URL}/models/pull`, { json: { taskId: model } })
+        .json()
+        .then()
+    )
+  }
+
+  /**
+   * Check model status
+   * @param model
+   */
+  async getModelStatus(model: string): Promise<boolean> {
+    return this.queue
+      .add(() => ky.get(`${API_URL}/models/status/${model}`))
+      .then((e) => true)
+      .catch(() => false)
+  }
+
+  /**
+   * Do health check on cortex.cpp
+   * @returns
+   */
+  healthz(): Promise<void> {
+    return ky
+      .get(`${API_URL}/healthz`, {
+        retry: {
+          limit: 10,
+          methods: ['get'],
+        },
+      })
+      .then(() => {})
+  }
+
+  /**
+   * TRansform model to the expected format (e.g. parameters, settings, metadata)
+   * @param model
+   * @returns
+   */
+  private transformModel(model: any) {
+    model.parameters = {
+      ...extractInferenceParams(model),
+      ...model.parameters,
+    }
+    model.settings = {
+      ...extractModelLoadParams(model),
+      ...model.settings,
+    }
+    model.metadata = model.metadata ?? {
+      tags: [],
+      size: model.size ?? model.metadata?.size ?? 0,
+    }
+    return model as Model
+  }
+}
diff --git a/extensions/model-extension/src/helpers/path.test.ts b/extensions/model-extension/src/helpers/path.test.ts
deleted file mode 100644
index 64ca65d8a..000000000
--- a/extensions/model-extension/src/helpers/path.test.ts
+++ /dev/null
@@ -1,87 +0,0 @@
-import { extractFileName } from './path';
-
-describe('extractFileName Function', () => {
-  it('should correctly extract the file name with the provided file extension', () => {
-    const url = 'http://example.com/some/path/to/file.ext';
-    const fileExtension = '.ext';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.ext');
-  });
-
-  it('should correctly append the file extension if it does not already exist in the file name', () => {
-    const url = 'http://example.com/some/path/to/file';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.txt');
-  });
-
-  it('should handle cases where the URL does not have a file extension correctly', () => {
-    const url = 'http://example.com/some/path/to/file';
-    const fileExtension = '.jpg';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.jpg');
-  });
-
-  it('should correctly handle URLs without a trailing slash', () => {
-    const url = 'http://example.com/some/path/tofile';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('tofile.txt');
-  });
-
-  it('should correctly handle URLs with multiple file extensions', () => {
-    const url = 'http://example.com/some/path/tofile.tar.gz';
-    const fileExtension = '.gz';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('tofile.tar.gz');
-  });
-
-  it('should correctly handle URLs with special characters', () => {
-    const url = 'http://example.com/some/path/tófílë.extë';
-    const fileExtension = '.extë';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('tófílë.extë');
-  });
-
-  it('should correctly handle URLs that are just a file with no path', () => {
-    const url = 'http://example.com/file.txt';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.txt');
-  });
-
-  it('should correctly handle URLs that have special query parameters', () => {
-    const url = 'http://example.com/some/path/tofile.ext?query=1';
-    const fileExtension = '.ext';
-    const fileName = extractFileName(url.split('?')[0], fileExtension);
-    expect(fileName).toBe('tofile.ext');
-  });
-
-  it('should correctly handle URLs that have uppercase characters', () => {
-    const url = 'http://EXAMPLE.COM/PATH/TO/FILE.EXT';
-    const fileExtension = '.ext';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('FILE.EXT');
-  });
-
-  it('should correctly handle invalid URLs', () => {
-    const url = 'invalid-url';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('invalid-url.txt');
-  });
-
-  it('should correctly handle empty URLs', () => {
-    const url = '';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('.txt');
-  });
-
-  it('should correctly handle undefined URLs', () => {
-    const url = undefined;
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url as any, fileExtension);
-    expect(fileName).toBe('.txt');
-  });
-});
diff --git a/extensions/model-extension/src/helpers/path.ts b/extensions/model-extension/src/helpers/path.ts
deleted file mode 100644
index 6091005b8..000000000
--- a/extensions/model-extension/src/helpers/path.ts
+++ /dev/null
@@ -1,13 +0,0 @@
-/**
- *  try to retrieve the download file name from the source url
- */
-
-export function extractFileName(url: string, fileExtension: string): string {
-  if(!url) return fileExtension
-
-  const extractedFileName = url.split('/').pop()
-  const fileName = extractedFileName.toLowerCase().endsWith(fileExtension)
-    ? extractedFileName
-    : extractedFileName + fileExtension
-  return fileName
-}
diff --git a/extensions/model-extension/src/index.test.ts b/extensions/model-extension/src/index.test.ts
index 3f804b6d6..e514f8ce3 100644
--- a/extensions/model-extension/src/index.test.ts
+++ b/extensions/model-extension/src/index.test.ts
@@ -1,846 +1,89 @@
-/**
- * @jest-environment jsdom
- */
-const readDirSyncMock = jest.fn()
-const existMock = jest.fn()
-const readFileSyncMock = jest.fn()
-const downloadMock = jest.fn()
-const mkdirMock = jest.fn()
-const writeFileSyncMock = jest.fn()
-const copyFileMock = jest.fn()
-const dirNameMock = jest.fn()
-const executeMock = jest.fn()
+import JanModelExtension from './index'
+
+let SETTINGS = []
+// @ts-ignore
+global.SETTINGS = SETTINGS
 
 jest.mock('@janhq/core', () => ({
   ...jest.requireActual('@janhq/core/node'),
   events: {
     emit: jest.fn(),
   },
-  fs: {
-    existsSync: existMock,
-    readdirSync: readDirSyncMock,
-    readFileSync: readFileSyncMock,
-    writeFileSync: writeFileSyncMock,
-    mkdir: mkdirMock,
-    copyFile: copyFileMock,
-    fileStat: () => ({
-      isDirectory: false,
-    }),
-  },
-  dirName: dirNameMock,
   joinPath: (paths) => paths.join('/'),
-  ModelExtension: jest.fn(),
-  downloadFile: downloadMock,
-  executeOnMain: executeMock,
+  ModelExtension: jest.fn().mockImplementation(function () {
+    // @ts-ignore
+    this.registerSettings = () => {
+      return Promise.resolve()
+    }
+    // @ts-ignore
+    return this
+  }),
 }))
 
-jest.mock('@huggingface/gguf')
-
-global.fetch = jest.fn(() =>
-  Promise.resolve({
-    json: () => Promise.resolve({ test: 100 }),
-    arrayBuffer: jest.fn(),
-  })
-) as jest.Mock
-
-import JanModelExtension from '.'
-import { fs, dirName } from '@janhq/core'
-import { gguf } from '@huggingface/gguf'
-
 describe('JanModelExtension', () => {
-  let sut: JanModelExtension
-
-  beforeAll(() => {
-    // @ts-ignore
-    sut = new JanModelExtension()
-  })
+  let extension: JanModelExtension
+  let mockCortexAPI: any
 
   beforeEach(() => {
-    jest.clearAllMocks()
-  })
-
-  describe('getConfiguredModels', () => {
-    describe("when there's no models are pre-populated", () => {
-      it('should return empty array', async () => {
-        // Mock configured models data
-        const configuredModels = []
-        existMock.mockReturnValue(true)
-        readDirSyncMock.mockReturnValue([])
-
-        const result = await sut.getConfiguredModels()
-        expect(result).toEqual([])
-      })
-    })
-
-    describe("when there's are pre-populated models - all flattened", () => {
-      it('returns configured models data - flatten folder - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2']
-          else return ['model.json']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getConfiguredModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-
-    describe("when there's are pre-populated models - there are nested folders", () => {
-      it('returns configured models data - flatten folder - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2/model2-1']
-          else return ['model.json']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else if (path.includes('model2/model2-1'))
-            return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getConfiguredModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model2-1/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-  })
-
-  describe('getDownloadedModels', () => {
-    describe('no models downloaded', () => {
-      it('should return empty array', async () => {
-        // Mock downloaded models data
-        existMock.mockReturnValue(true)
-        readDirSyncMock.mockReturnValue([])
-
-        const result = await sut.getDownloadedModels()
-        expect(result).toEqual([])
-      })
-    })
-    describe('only one model is downloaded', () => {
-      describe('flatten folder', () => {
-        it('returns downloaded models - with correct file_path and model id', async () => {
-          // Mock configured models data
-          const configuredModels = [
-            {
-              id: '1',
-              name: 'Model 1',
-              version: '1.0.0',
-              description: 'Model 1 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model1',
-              },
-              format: 'onnx',
-              sources: [],
-              created: new Date(),
-              updated: new Date(),
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-            {
-              id: '2',
-              name: 'Model 2',
-              version: '2.0.0',
-              description: 'Model 2 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model2',
-              },
-              format: 'onnx',
-              sources: [],
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-          ]
-          existMock.mockReturnValue(true)
-
-          readDirSyncMock.mockImplementation((path) => {
-            if (path === 'file://models') return ['model1', 'model2']
-            else if (path === 'file://models/model1')
-              return ['model.json', 'test.gguf']
-            else return ['model.json']
-          })
-
-          readFileSyncMock.mockImplementation((path) => {
-            if (path.includes('model1'))
-              return JSON.stringify(configuredModels[0])
-            else return JSON.stringify(configuredModels[1])
-          })
-
-          const result = await sut.getDownloadedModels()
-          expect(result).toEqual(
-            expect.arrayContaining([
-              expect.objectContaining({
-                file_path: 'file://models/model1/model.json',
-                id: '1',
-              }),
-            ])
-          )
-        })
-      })
-    })
-
-    describe('all models are downloaded', () => {
-      describe('nested folders', () => {
-        it('returns downloaded models - with correct file_path and model id', async () => {
-          // Mock configured models data
-          const configuredModels = [
-            {
-              id: '1',
-              name: 'Model 1',
-              version: '1.0.0',
-              description: 'Model 1 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model1',
-              },
-              format: 'onnx',
-              sources: [],
-              created: new Date(),
-              updated: new Date(),
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-            {
-              id: '2',
-              name: 'Model 2',
-              version: '2.0.0',
-              description: 'Model 2 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model2',
-              },
-              format: 'onnx',
-              sources: [],
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-          ]
-          existMock.mockReturnValue(true)
-
-          readDirSyncMock.mockImplementation((path) => {
-            if (path === 'file://models') return ['model1', 'model2/model2-1']
-            else return ['model.json', 'test.gguf']
-          })
-
-          readFileSyncMock.mockImplementation((path) => {
-            if (path.includes('model1'))
-              return JSON.stringify(configuredModels[0])
-            else return JSON.stringify(configuredModels[1])
-          })
-
-          const result = await sut.getDownloadedModels()
-          expect(result).toEqual(
-            expect.arrayContaining([
-              expect.objectContaining({
-                file_path: 'file://models/model1/model.json',
-                id: '1',
-              }),
-              expect.objectContaining({
-                file_path: 'file://models/model2/model2-1/model.json',
-                id: '2',
-              }),
-            ])
-          )
-        })
-      })
-    })
-
-    describe('all models are downloaded with uppercased GGUF files', () => {
-      it('returns downloaded models - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2/model2-1']
-          else if (path === 'file://models/model1')
-            return ['model.json', 'test.GGUF']
-          else return ['model.json', 'test.gguf']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getDownloadedModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model2-1/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-
-    describe('all models are downloaded - GGUF & Tensort RT', () => {
-      it('returns downloaded models - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2/model2-1']
-          else if (path === 'file://models/model1')
-            return ['model.json', 'test.gguf']
-          else return ['model.json', 'test.engine']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getDownloadedModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model2-1/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-  })
-
-  describe('deleteModel', () => {
-    describe('model is a GGUF model', () => {
-      it('should delete the GGUF file', async () => {
-        fs.unlinkSync = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
-
-        readDirSyncMock.mockImplementation((path) => {
-          return ['model.json', 'test.gguf']
-        })
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.unlinkSync).toHaveBeenCalledWith(
-          'file://models/model1/test.gguf'
-        )
-      })
-
-      it('no gguf file presented', async () => {
-        fs.unlinkSync = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
-
-        readDirSyncMock.mockReturnValue(['model.json'])
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.unlinkSync).toHaveBeenCalledTimes(0)
-      })
-
-      it('delete an imported model', async () => {
-        fs.rm = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        readDirSyncMock.mockReturnValue(['model.json', 'test.gguf'])
-
-        // MARK: This is a tricky logic implement?
-        // I will just add test for now but will align on the legacy implementation
-        fs.readFileSync = jest.fn().mockReturnValue(
-          JSON.stringify({
-            metadata: {
-              author: 'user',
-            },
-          })
-        )
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.rm).toHaveBeenCalledWith('file://models/model1')
-      })
-
-      it('delete tensorrt-models', async () => {
-        fs.rm = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        readDirSyncMock.mockReturnValue(['model.json', 'test.engine'])
-
-        fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.unlinkSync).toHaveBeenCalledWith(
-          'file://models/model1/test.engine'
-        )
-      })
-    })
-  })
-
-  describe('downloadModel', () => {
-    const model: any = {
-      id: 'model-id',
-      name: 'Test Model',
-      sources: [
-        { url: 'http://example.com/model.gguf', filename: 'model.gguf' },
-      ],
-      engine: 'test-engine',
+    mockCortexAPI = {
+      getModels: jest.fn().mockResolvedValue([]),
+      pullModel: jest.fn().mockResolvedValue(undefined),
+      importModel: jest.fn().mockResolvedValue(undefined),
+      deleteModel: jest.fn().mockResolvedValue(undefined),
+      updateModel: jest.fn().mockResolvedValue({}),
+      cancelModelPull: jest.fn().mockResolvedValue(undefined),
     }
 
-    const network = {
-      ignoreSSL: true,
-      proxy: 'http://proxy.example.com',
-    }
+    // @ts-ignore
+    extension = new JanModelExtension()
+    extension.cortexAPI = mockCortexAPI
+  })
 
-    const gpuSettings: any = {
-      gpus: [{ name: 'nvidia-rtx-3080', arch: 'ampere' }],
-    }
+  it('should register settings on load', async () => {
+    // @ts-ignore
+    const registerSettingsSpy = jest.spyOn(extension, 'registerSettings')
+    await extension.onLoad()
+    expect(registerSettingsSpy).toHaveBeenCalledWith(SETTINGS)
+  })
 
-    it('should reject with invalid gguf metadata', async () => {
-      existMock.mockImplementation(() => false)
+  it('should pull a model', async () => {
+    const model = 'test-model'
+    await extension.pullModel(model)
+    expect(mockCortexAPI.pullModel).toHaveBeenCalledWith(model)
+  })
 
-      expect(
-        sut.downloadModel(model, gpuSettings, network)
-      ).rejects.toBeTruthy()
-    })
+  it('should cancel model download', async () => {
+    const model = 'test-model'
+    await extension.cancelModelPull(model)
+    expect(mockCortexAPI.cancelModelPull).toHaveBeenCalledWith(model)
+  })
 
-    it('should download corresponding ID', async () => {
-      existMock.mockImplementation(() => true)
-      dirNameMock.mockImplementation(() => 'file://models/model1')
-      downloadMock.mockImplementation(() => {
-        return Promise.resolve({})
-      })
+  it('should delete a model', async () => {
+    const model = 'test-model'
+    await extension.deleteModel(model)
+    expect(mockCortexAPI.deleteModel).toHaveBeenCalledWith(model)
+  })
 
-      expect(
-        await sut.downloadModel(
-          { ...model, file_path: 'file://models/model1/model.json' },
-          gpuSettings,
-          network
-        )
-      ).toBeUndefined()
+  it('should get all models', async () => {
+    const models = await extension.getModels()
+    expect(models).toEqual([])
+    expect(mockCortexAPI.getModels).toHaveBeenCalled()
+  })
 
-      expect(downloadMock).toHaveBeenCalledWith(
-        {
-          localPath: 'file://models/model1/model.gguf',
-          modelId: 'model-id',
-          url: 'http://example.com/model.gguf',
-        },
-        { ignoreSSL: true, proxy: 'http://proxy.example.com' }
-      )
-    })
+  it('should update a model', async () => {
+    const model = { id: 'test-model' }
+    const updatedModel = await extension.updateModel(model)
+    expect(updatedModel).toEqual({})
+    expect(mockCortexAPI.updateModel).toHaveBeenCalledWith(model)
+  })
 
-    it('should handle invalid model file', async () => {
-      executeMock.mockResolvedValue({})
-
-      fs.readFileSync = jest.fn(() => {
-        return JSON.stringify({ metadata: { author: 'user' } })
-      })
-
-      expect(
-        sut.downloadModel(
-          { ...model, file_path: 'file://models/model1/model.json' },
-          gpuSettings,
-          network
-        )
-      ).resolves.not.toThrow()
-
-      expect(downloadMock).not.toHaveBeenCalled()
-    })
-    it('should handle model file with no sources', async () => {
-      executeMock.mockResolvedValue({})
-      const modelWithoutSources = { ...model, sources: [] }
-
-      expect(
-        sut.downloadModel(
-          {
-            ...modelWithoutSources,
-            file_path: 'file://models/model1/model.json',
-          },
-          gpuSettings,
-          network
-        )
-      ).resolves.toBe(undefined)
-
-      expect(downloadMock).not.toHaveBeenCalled()
-    })
-
-    it('should handle model file with multiple sources', async () => {
-      const modelWithMultipleSources = {
-        ...model,
-        sources: [
-          { url: 'http://example.com/model1.gguf', filename: 'model1.gguf' },
-          { url: 'http://example.com/model2.gguf', filename: 'model2.gguf' },
-        ],
-      }
-
-      executeMock.mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-      }
-      downloadMock.mockImplementation(() => {
-        return Promise.resolve({})
-      })
-
-      expect(
-        await sut.downloadModel(
-          {
-            ...modelWithMultipleSources,
-            file_path: 'file://models/model1/model.json',
-          },
-          gpuSettings,
-          network
-        )
-      ).toBeUndefined()
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        {
-          localPath: 'file://models/model1/model1.gguf',
-          modelId: 'model-id',
-          url: 'http://example.com/model1.gguf',
-        },
-        { ignoreSSL: true, proxy: 'http://proxy.example.com' }
-      )
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        {
-          localPath: 'file://models/model1/model2.gguf',
-          modelId: 'model-id',
-          url: 'http://example.com/model2.gguf',
-        },
-        { ignoreSSL: true, proxy: 'http://proxy.example.com' }
-      )
-    })
-
-    it('should handle model file with no file_path', async () => {
-      executeMock.mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-      }
-      const modelWithoutFilepath = { ...model, file_path: undefined }
-
-      await sut.downloadModel(modelWithoutFilepath, gpuSettings, network)
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        expect.objectContaining({
-          localPath: 'file://models/model-id/model.gguf',
-        }),
-        expect.anything()
-      )
-    })
-
-    it('should handle model file with invalid file_path', async () => {
-      executeMock.mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-      }
-      const modelWithInvalidFilepath = {
-        ...model,
-        file_path: 'file://models/invalid-model.json',
-      }
-
-      await sut.downloadModel(modelWithInvalidFilepath, gpuSettings, network)
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        expect.objectContaining({
-          localPath: 'file://models/model1/model.gguf',
-        }),
-        expect.anything()
-      )
-    })
-
-    it('should handle model with valid chat_template', async () => {
-      executeMock.mockResolvedValue('{prompt}')
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: {},
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-        settings: {
-          prompt_template: '<|im-start|>{prompt}<|im-end|>',
-        },
-      }
-
-      const result = await sut.retrieveGGUFMetadata({})
-
-      expect(result).toEqual({
-        parameters: {
-          stop: [],
-        },
-        settings: {
-          ctx_len: 4096,
-          ngl: 33,
-          prompt_template: '{prompt}',
-        },
-      })
-    })
-
-    it('should handle model without chat_template', async () => {
-      executeMock.mockRejectedValue({})
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: {},
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-        settings: {
-          prompt_template: '<|im-start|>{prompt}<|im-end|>',
-        },
-      }
-
-      const result = await sut.retrieveGGUFMetadata({})
-
-      expect(result).toEqual({
-        parameters: {
-          stop: [],
-        },
-        settings: {
-          ctx_len: 4096,
-          ngl: 33,
-          prompt_template: '<|im-start|>{prompt}<|im-end|>',
-        },
-      })
-    })
+  it('should import a model', async () => {
+    const model: any = { path: 'test-path' }
+    const optionType: any = 'test-option'
+    await extension.importModel(model, optionType)
+    expect(mockCortexAPI.importModel).toHaveBeenCalledWith(
+      model.path,
+      optionType
+    )
   })
 })
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 7e7c12469..b3ad2a012 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -1,65 +1,51 @@
 import {
-  fs,
-  downloadFile,
-  abortDownload,
-  InferenceEngine,
-  joinPath,
   ModelExtension,
   Model,
-  getJanDataFolderPath,
+  InferenceEngine,
+  joinPath,
+  dirName,
+  fs,
+  ModelManager,
+  abortDownload,
+  DownloadState,
   events,
   DownloadEvent,
-  DownloadRoute,
-  DownloadState,
   OptionType,
-  ImportingModel,
-  LocalImportModelEvent,
-  baseName,
-  GpuSetting,
-  DownloadRequest,
-  executeOnMain,
-  HuggingFaceRepoData,
-  getFileSize,
-  AllQuantizations,
-  ModelEvent,
-  ModelFile,
-  dirName,
 } from '@janhq/core'
-
-import { extractFileName } from './helpers/path'
-import { GGUFMetadata, gguf } from '@huggingface/gguf'
-import { NotSupportedModelError } from './@types/NotSupportModelError'
-import { InvalidHostError } from './@types/InvalidHostError'
+import { CortexAPI } from './cortex'
+import { scanModelsFolder } from './legacy/model-json'
+import { downloadModel } from './legacy/download'
+import { systemInformation } from '@janhq/core'
+import { deleteModelFiles } from './legacy/delete'
 
 declare const SETTINGS: Array<any>
-enum Settings {
-  huggingFaceAccessToken = 'hugging-face-access-token',
+
+/**
+ * Extension enum
+ */
+enum ExtensionEnum {
+  downloadedModels = 'downloadedModels',
 }
 
 /**
  * A extension for models
  */
 export default class JanModelExtension extends ModelExtension {
-  private static readonly _homeDir = 'file://models'
-  private static readonly _modelMetadataFileName = 'model.json'
-  private static readonly _supportedModelFormat = '.gguf'
-  private static readonly _incompletedModelFileName = '.download'
-  private static readonly _offlineInferenceEngine = [
-    InferenceEngine.nitro,
-    InferenceEngine.nitro_tensorrt_llm,
-  ]
-  private static readonly _tensorRtEngineFormat = '.engine'
-  private static readonly _supportedGpuArch = ['ampere', 'ada']
-
-  interrupted = false
+  cortexAPI: CortexAPI = new CortexAPI()
 
   /**
    * Called when the extension is loaded.
    * @override
    */
   async onLoad() {
-    // Handle Desktop Events
     this.registerSettings(SETTINGS)
+
+    // Try get models from cortex.cpp
+    this.getModels().then((models) => {
+      this.registerModels(models)
+    })
+
+    // Listen to app download events
     this.handleDesktopEvents()
   }
 
@@ -72,574 +58,203 @@ export default class JanModelExtension extends ModelExtension {
   /**
    * Downloads a machine learning model.
    * @param model - The model to download.
-   * @param network - Optional object to specify proxy/whether to ignore SSL certificates.
    * @returns A Promise that resolves when the model is downloaded.
    */
-  async downloadModel(
-    model: ModelFile,
-    gpuSettings?: GpuSetting,
-    network?: { ignoreSSL?: boolean; proxy?: string }
-  ): Promise<void> {
-    // Create corresponding directory
-    const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id])
-    if (!(await fs.existsSync(modelDirPath))) await fs.mkdir(modelDirPath)
-    const modelJsonPath =
-      model.file_path ?? (await joinPath([modelDirPath, 'model.json']))
-
-    // Download HF model - model.json not exist
-    if (!(await fs.existsSync(modelJsonPath))) {
-      // It supports only one source for HF download
-      const metadata = await this.fetchModelMetadata(model.sources[0].url)
-      const updatedModel = await this.retrieveGGUFMetadata(metadata)
-      if (updatedModel) {
-        // Update model settings
-        model.settings = {
-          ...model.settings,
-          ...updatedModel.settings,
-        }
-        model.parameters = {
-          ...model.parameters,
-          ...updatedModel.parameters,
-        }
-      }
-      await fs.writeFileSync(modelJsonPath, JSON.stringify(model, null, 2))
-      events.emit(ModelEvent.OnModelsUpdate, {})
-    }
-    if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
-      if (!gpuSettings || gpuSettings.gpus.length === 0) {
-        console.error('No GPU found. Please check your GPU setting.')
-        return
-      }
-      const firstGpu = gpuSettings.gpus[0]
-      if (!firstGpu.name.toLowerCase().includes('nvidia')) {
-        console.error('No Nvidia GPU found. Please check your GPU setting.')
-        return
-      }
-      const gpuArch = firstGpu.arch
-      if (gpuArch === undefined) {
-        console.error(
-          'No GPU architecture found. Please check your GPU setting.'
-        )
-        return
-      }
-
-      if (!JanModelExtension._supportedGpuArch.includes(gpuArch)) {
-        console.debug(
-          `Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
-        )
-        return
-      }
-
-      const os = 'windows' // TODO: remove this hard coded value
-
-      const newSources = model.sources.map((source) => {
-        const newSource = { ...source }
-        newSource.url = newSource.url
-          .replace(/<os>/g, os)
-          .replace(/<gpuarch>/g, gpuArch)
-        return newSource
-      })
-      model.sources = newSources
-    }
-
-    console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
-
-    if (model.sources.length > 1) {
-      // path to model binaries
-      for (const source of model.sources) {
-        let path = extractFileName(
-          source.url,
-          JanModelExtension._supportedModelFormat
-        )
-        if (source.filename) {
-          path = model.file_path
-            ? await joinPath([await dirName(model.file_path), source.filename])
-            : await joinPath([modelDirPath, source.filename])
-        }
-
-        const downloadRequest: DownloadRequest = {
-          url: source.url,
-          localPath: path,
-          modelId: model.id,
-        }
-        downloadFile(downloadRequest, network)
-      }
-      // TODO: handle multiple binaries for web later
-    } else {
-      const fileName = extractFileName(
-        model.sources[0]?.url,
-        JanModelExtension._supportedModelFormat
-      )
-      const path = model.file_path
-        ? await joinPath([await dirName(model.file_path), fileName])
-        : await joinPath([modelDirPath, fileName])
-      const downloadRequest: DownloadRequest = {
-        url: model.sources[0]?.url,
-        localPath: path,
-        modelId: model.id,
-      }
-      downloadFile(downloadRequest, network)
-
-      if (window && window.core?.api && window.core.api.baseApiUrl) {
-        this.startPollingDownloadProgress(model.id)
+  async pullModel(model: string, id?: string, name?: string): Promise<void> {
+    if (id) {
+      const model: Model = ModelManager.instance().get(id)
+      // Clip vision model - should not be handled by cortex.cpp
+      // TensorRT model - should not be handled by cortex.cpp
+      if (
+        model &&
+        (model.engine === InferenceEngine.nitro_tensorrt_llm ||
+          model.settings.vision_model)
+      ) {
+        return downloadModel(model, (await systemInformation()).gpuSetting)
       }
     }
-  }
-
-  private toHuggingFaceUrl(repoId: string): string {
-    try {
-      const url = new URL(repoId)
-      if (url.host !== 'huggingface.co') {
-        throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`)
-      }
-
-      const paths = url.pathname.split('/').filter((e) => e.trim().length > 0)
-      if (paths.length < 2) {
-        throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`)
-      }
-
-      return `${url.origin}/api/models/${paths[0]}/${paths[1]}`
-    } catch (err) {
-      if (err instanceof InvalidHostError) {
-        throw err
-      }
-
-      if (repoId.startsWith('https')) {
-        throw new Error(`Cannot parse url: ${repoId}`)
-      }
-
-      return `https://huggingface.co/api/models/${repoId}`
-    }
-  }
-
-  async fetchHuggingFaceRepoData(repoId: string): Promise<HuggingFaceRepoData> {
-    const sanitizedUrl = this.toHuggingFaceUrl(repoId)
-    console.debug('sanitizedUrl', sanitizedUrl)
-
-    const huggingFaceAccessToken = (
-      await this.getSetting<string>(Settings.huggingFaceAccessToken, '')
-    ).trim()
-
-    const headers = {
-      Accept: 'application/json',
-    }
-
-    if (huggingFaceAccessToken.length > 0) {
-      headers['Authorization'] = `Bearer ${huggingFaceAccessToken}`
-    }
-
-    const res = await fetch(sanitizedUrl, {
-      headers: headers,
-    })
-    const response = await res.json()
-    if (response['error'] != null) {
-      throw new Error(response['error'])
-    }
-
-    const data = response as HuggingFaceRepoData
-
-    if (data.tags.indexOf('gguf') === -1) {
-      throw new NotSupportedModelError(
-        `${repoId} is not supported. Only GGUF models are supported.`
-      )
-    }
-
-    const promises: Promise<number>[] = []
-
-    // fetching file sizes
-    const url = new URL(sanitizedUrl)
-    const paths = url.pathname.split('/').filter((e) => e.trim().length > 0)
-
-    for (const sibling of data.siblings) {
-      const downloadUrl = `https://huggingface.co/${paths[2]}/${paths[3]}/resolve/main/${sibling.rfilename}`
-      sibling.downloadUrl = downloadUrl
-      promises.push(getFileSize(downloadUrl))
-    }
-
-    const result = await Promise.all(promises)
-    for (let i = 0; i < data.siblings.length; i++) {
-      data.siblings[i].fileSize = result[i]
-    }
-
-    AllQuantizations.forEach((quantization) => {
-      data.siblings.forEach((sibling) => {
-        if (!sibling.quantization && sibling.rfilename.includes(quantization)) {
-          sibling.quantization = quantization
-        }
-      })
-    })
-
-    data.modelUrl = `https://huggingface.co/${paths[2]}/${paths[3]}`
-    return data
-  }
-
-  async fetchModelMetadata(url: string): Promise<GGUFMetadata> {
-    const { metadata } = await gguf(url)
-    return metadata
-  }
-
-  /**
-   * Specifically for Jan server.
-   */
-  private async startPollingDownloadProgress(modelId: string): Promise<void> {
-    // wait for some seconds before polling
-    await new Promise((resolve) => setTimeout(resolve, 3000))
-
-    return new Promise((resolve) => {
-      const interval = setInterval(async () => {
-        fetch(
-          `${window.core.api.baseApiUrl}/v1/download/${DownloadRoute.getDownloadProgress}/${modelId}`,
-          {
-            method: 'GET',
-            headers: { contentType: 'application/json' },
-          }
-        ).then(async (res) => {
-          const state: DownloadState = await res.json()
-          if (state.downloadState === 'end') {
-            events.emit(DownloadEvent.onFileDownloadSuccess, state)
-            clearInterval(interval)
-            resolve()
-            return
-          }
-
-          if (state.downloadState === 'error') {
-            events.emit(DownloadEvent.onFileDownloadError, state)
-            clearInterval(interval)
-            resolve()
-            return
-          }
-
-          events.emit(DownloadEvent.onFileDownloadUpdate, state)
-        })
-      }, 1000)
-    })
+    /**
+     * Sending POST to /models/pull/{id} endpoint to pull the model
+     */
+    return this.cortexAPI.pullModel(model, id, name)
   }
 
   /**
    * Cancels the download of a specific machine learning model.
    *
-   * @param {string} modelId - The ID of the model whose download is to be cancelled.
+   * @param {string} model - The ID of the model whose download is to be cancelled.
    * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
    */
-  async cancelModelDownload(modelId: string): Promise<void> {
-    const path = await joinPath([JanModelExtension._homeDir, modelId, modelId])
-    try {
-      await abortDownload(path)
-      await fs.unlinkSync(path)
-    } catch (e) {
-      console.error(e)
+  async cancelModelPull(model: string): Promise<void> {
+    if (model) {
+      const modelDto: Model = ModelManager.instance().get(model)
+      // Clip vision model - should not be handled by cortex.cpp
+      // TensorRT model - should not be handled by cortex.cpp
+      if (
+        modelDto &&
+        (modelDto.engine === InferenceEngine.nitro_tensorrt_llm ||
+          modelDto.settings.vision_model)
+      ) {
+        for (const source of modelDto.sources) {
+          const path = await joinPath(['models', modelDto.id, source.filename])
+          await abortDownload(path)
+        }
+      }
     }
+    /**
+     * Sending DELETE to /models/pull/{id} endpoint to cancel a model pull
+     */
+    return this.cortexAPI.cancelModelPull(model)
   }
 
   /**
-   * Deletes a machine learning model.
-   * @param filePath - The path to the model file to delete.
+   * Deletes a pulled model
+   * @param model - The model to delete
    * @returns A Promise that resolves when the model is deleted.
    */
-  async deleteModel(model: ModelFile): Promise<void> {
-    try {
-      const dirPath = await dirName(model.file_path)
-      const jsonFilePath = await joinPath([
-        dirPath,
-        JanModelExtension._modelMetadataFileName,
-      ])
-      const modelInfo = JSON.parse(
-        await this.readModelMetadata(jsonFilePath)
-      ) as Model
-
-      // TODO: This is so tricky?
-      // Should depend on sources?
-      const isUserImportModel =
-        modelInfo.metadata?.author?.toLowerCase() === 'user'
-      if (isUserImportModel) {
-        // just delete the folder
-        return fs.rm(dirPath)
-      }
-
-      // remove all files under dirPath except model.json
-      const files = await fs.readdirSync(dirPath)
-      const deletePromises = files.map(async (fileName: string) => {
-        if (fileName !== JanModelExtension._modelMetadataFileName) {
-          return fs.unlinkSync(await joinPath([dirPath, fileName]))
-        }
+  async deleteModel(model: string): Promise<void> {
+    return this.cortexAPI
+      .deleteModel(model)
+      .catch((e) => console.debug(e))
+      .finally(async () => {
+        // Delete legacy model files
+        await deleteModelFiles(model).catch((e) => console.debug(e))
       })
-      await Promise.allSettled(deletePromises)
-    } catch (err) {
-      console.error(err)
-    }
   }
 
   /**
-   * Gets all downloaded models.
+   * Gets all pulled models
    * @returns A Promise that resolves with an array of all models.
    */
-  async getDownloadedModels(): Promise<ModelFile[]> {
-    return await this.getModelsMetadata(
-      async (modelDir: string, model: Model) => {
-        if (!JanModelExtension._offlineInferenceEngine.includes(model.engine))
-          return true
+  async getModels(): Promise<Model[]> {
+    /**
+     * In this action, if return empty array right away
+     * it would reset app cache and app will not function properly
+     * should compare and try import
+     */
+    let currentModels: Model[] = []
 
-        // model binaries (sources) are absolute path & exist
-        const existFiles = await Promise.all(
-          model.sources.map(
-            (source) =>
-              // Supposed to be a local file url
-              !source.url.startsWith(`http://`) &&
-              !source.url.startsWith(`https://`)
-          )
-        )
-        if (existFiles.every((exist) => exist)) return true
+    /**
+     * Legacy models should be supported
+     */
+    let legacyModels = await scanModelsFolder()
 
-        const result = await fs
-          .readdirSync(await joinPath([JanModelExtension._homeDir, modelDir]))
-          .then((files: string[]) => {
-            // Model binary exists in the directory
-            // Model binary name can match model ID or be a .gguf file and not be an incompleted model file
-            return (
-              files.includes(modelDir) ||
-              files.filter((file) => {
-                if (
-                  file.endsWith(JanModelExtension._incompletedModelFileName)
-                ) {
-                  return false
-                }
-                return (
-                  file
-                    .toLowerCase()
-                    .includes(JanModelExtension._supportedModelFormat) ||
-                  file
-                    .toLowerCase()
-                    .includes(JanModelExtension._tensorRtEngineFormat)
-                )
-                // Check if the number of matched files equals the number of sources
-              })?.length >= model.sources.length
-            )
+    try {
+      if (!localStorage.getItem(ExtensionEnum.downloadedModels)) {
+        // Updated from an older version than 0.5.5
+        // Scan through the models folder and import them (Legacy flow)
+        // Return models immediately
+        currentModels = legacyModels
+      } else {
+        currentModels = JSON.parse(
+          localStorage.getItem(ExtensionEnum.downloadedModels)
+        ) as Model[]
+      }
+    } catch (e) {
+      currentModels = []
+      console.error(e)
+    }
+
+    /**
+     * Here we are filtering out the models that are not imported
+     * and are not using llama.cpp engine
+     */
+    var toImportModels = currentModels.filter(
+      (e) => e.engine === InferenceEngine.nitro
+    )
+
+    await this.cortexAPI.getModels().then((models) => {
+      const existingIds = models.map((e) => e.id)
+      toImportModels = toImportModels.filter(
+        (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model
+      )
+    })
+
+    console.log('To import models:', toImportModels.length)
+    /**
+     * There are models to import
+     * do not return models from cortex.cpp yet
+     * otherwise it will reset the app cache
+     * */
+    if (toImportModels.length > 0) {
+      // Import models
+      await Promise.all(
+        toImportModels.map(async (model: Model & { file_path: string }) => {
+          return this.importModel(
+            model.id,
+            model.sources[0].url.startsWith('http') ||
+              !(await fs.existsSync(model.sources[0].url))
+              ? await joinPath([
+                  await dirName(model.file_path),
+                  model.sources[0]?.filename ??
+                    model.settings?.llama_model_path ??
+                    model.sources[0]?.url.split('/').pop() ??
+                    model.id,
+                ]) // Copied models
+              : model.sources[0].url, // Symlink models,
+            model.name
+          ).then((e) => {
+            this.updateModel({
+              id: model.id,
+              ...model.settings,
+              ...model.parameters,
+            } as Partial<Model>)
           })
+        })
+      )
 
-        return result
-      }
-    )
-  }
-
-  private async getModelJsonPath(
-    folderFullPath: string
-  ): Promise<string | undefined> {
-    // try to find model.json recursively inside each folder
-    if (!(await fs.existsSync(folderFullPath))) return undefined
-
-    const files: string[] = await fs.readdirSync(folderFullPath)
-    if (files.length === 0) return undefined
-
-    if (files.includes(JanModelExtension._modelMetadataFileName)) {
-      return joinPath([
-        folderFullPath,
-        JanModelExtension._modelMetadataFileName,
-      ])
+      return currentModels
     }
-    // continue recursive
-    for (const file of files) {
-      const path = await joinPath([folderFullPath, file])
-      const fileStats = await fs.fileStat(path)
-      if (fileStats.isDirectory) {
-        const result = await this.getModelJsonPath(path)
-        if (result) return result
-      }
-    }
-  }
 
-  private async getModelsMetadata(
-    selector?: (path: string, model: Model) => Promise<boolean>
-  ): Promise<ModelFile[]> {
-    try {
-      if (!(await fs.existsSync(JanModelExtension._homeDir))) {
-        console.debug('Model folder not found')
-        return []
-      }
-
-      const files: string[] = await fs.readdirSync(JanModelExtension._homeDir)
-
-      const allDirectories: string[] = []
-      for (const file of files) {
-        if (file === '.DS_Store') continue
-        if (file === 'config') continue
-        allDirectories.push(file)
-      }
-
-      const readJsonPromises = allDirectories.map(async (dirName) => {
-        // filter out directories that don't match the selector
-        // read model.json
-        const folderFullPath = await joinPath([
-          JanModelExtension._homeDir,
-          dirName,
-        ])
-
-        const jsonPath = await this.getModelJsonPath(folderFullPath)
-
-        if (await fs.existsSync(jsonPath)) {
-          // if we have the model.json file, read it
-          let model = await this.readModelMetadata(jsonPath)
-
-          model = typeof model === 'object' ? model : JSON.parse(model)
-
-          // This to ensure backward compatibility with `model.json` with `source_url`
-          if (model['source_url'] != null) {
-            model['sources'] = [
-              {
-                filename: model.id,
-                url: model['source_url'],
-              },
-            ]
-          }
-          model.file_path = jsonPath
-          model.file_name = JanModelExtension._modelMetadataFileName
-
-          if (selector && !(await selector?.(dirName, model))) {
-            return
-          }
-          return model
-        } else {
-          // otherwise, we generate our own model file
-          // TODO: we might have more than one binary file here. This will be addressed with new version of Model file
-          //  which is the PR from Hiro on branch Jan can see
-          return this.generateModelMetadata(dirName)
-        }
+    /**
+     * Models are imported successfully before
+     * Now return models from cortex.cpp and merge with legacy models which are not imported
+     */
+    return await this.cortexAPI
+      .getModels()
+      .then((models) => {
+        return models.concat(
+          legacyModels.filter((e) => !models.some((x) => x.id === e.id))
+        )
       })
-      const results = await Promise.allSettled(readJsonPromises)
-      const modelData = results.map((result) => {
-        if (result.status === 'fulfilled' && result.value) {
-          try {
-            const model =
-              typeof result.value === 'object'
-                ? result.value
-                : JSON.parse(result.value)
-            return model as ModelFile
-          } catch {
-            console.debug(`Unable to parse model metadata: ${result.value}`)
-          }
-        }
-        return undefined
-      })
-
-      return modelData.filter((e) => !!e)
-    } catch (err) {
-      console.error(err)
-      return []
-    }
-  }
-
-  private readModelMetadata(path: string) {
-    return fs.readFileSync(path, 'utf-8')
+      .catch(() => Promise.resolve(legacyModels))
   }
 
   /**
-   * Handle the case where we have the model directory but we don't have the corresponding
-   * model.json file associated with it.
-   *
-   * This function will create a model.json file for the model.
-   * It works only with single binary file model.
-   *
-   * @param dirName the director which reside in ~/jan/models but does not have model.json file.
+   * Update a pulled model metadata
+   * @param model - The metadata of the model
    */
-  private async generateModelMetadata(dirName: string): Promise<Model> {
-    const files: string[] = await fs.readdirSync(
-      await joinPath([JanModelExtension._homeDir, dirName])
-    )
-
-    // sort files by name
-    files.sort()
-
-    // find the first file which is not a directory
-    let binaryFileName: string | undefined = undefined
-    let binaryFileSize: number | undefined = undefined
-
-    for (const file of files) {
-      if (file.endsWith(JanModelExtension._supportedModelFormat)) {
-        const path = await joinPath([JanModelExtension._homeDir, dirName, file])
-        const fileStats = await fs.fileStat(path)
-        if (fileStats.isDirectory) continue
-        binaryFileSize = fileStats.size
-        binaryFileName = file
-        break
-      }
-    }
-
-    if (!binaryFileName) {
-      console.warn(`Unable to find binary file for model ${dirName}`)
-      return
-    }
-
-    const defaultModel = (await this.getDefaultModel()) as Model
-    const metadata = await executeOnMain(
-      NODE,
-      'retrieveGGUFMetadata',
-      await joinPath([
-        await getJanDataFolderPath(),
-        'models',
-        dirName,
-        binaryFileName,
-      ])
-    ).catch(() => undefined)
-
-    const updatedModel = await this.retrieveGGUFMetadata(metadata)
-
-    if (!defaultModel) {
-      console.error('Unable to find default model')
-      return
-    }
-
-    const model: Model = {
-      ...defaultModel,
-      // Overwrite default N/A fields
-      id: dirName,
-      name: dirName,
-      sources: [
-        {
-          url: binaryFileName,
-          filename: binaryFileName,
-        },
-      ],
-      parameters: {
-        ...defaultModel.parameters,
-        ...updatedModel.parameters,
-      },
-      settings: {
-        ...defaultModel.settings,
-        ...updatedModel.settings,
-        llama_model_path: binaryFileName,
-      },
-      created: Date.now(),
-      description: '',
-      metadata: {
-        size: binaryFileSize,
-        author: 'User',
-        tags: [],
-      },
-    }
-
-    const modelFilePath = await joinPath([
-      JanModelExtension._homeDir,
-      dirName,
-      JanModelExtension._modelMetadataFileName,
-    ])
-
-    await fs.writeFileSync(modelFilePath, JSON.stringify(model, null, 2))
-
-    return model
-  }
-
-  override async getDefaultModel(): Promise<Model> {
-    const defaultModel = DEFAULT_MODEL as Model
-    return defaultModel
+  async updateModel(model: Partial<Model>): Promise<Model> {
+    return this.cortexAPI
+      ?.updateModel(model)
+      .then(() => this.cortexAPI!.getModel(model.id))
   }
 
   /**
-   * Gets all available models.
-   * @returns A Promise that resolves with an array of all models.
+   * Import an existing model file
+   * @param model
+   * @param optionType
    */
-  async getConfiguredModels(): Promise<ModelFile[]> {
-    return this.getModelsMetadata()
+  async importModel(
+    model: string,
+    modelPath: string,
+    name?: string,
+    option?: OptionType
+  ): Promise<void> {
+    return this.cortexAPI.importModel(model, modelPath, name, option)
   }
 
+  /**
+   * Check model status
+   * @param model
+   */
+  async isModelLoaded(model: string): Promise<boolean> {
+    return this.cortexAPI.getModelStatus(model)
+  }
+
+  /**
+   * Handle download state from main app
+   */
   handleDesktopEvents() {
     if (window && window.electronAPI) {
       window.electronAPI.onFileDownloadUpdate(
@@ -663,248 +278,4 @@ export default class JanModelExtension extends ModelExtension {
       )
     }
   }
-
-  private async importModelSymlink(
-    modelBinaryPath: string,
-    modelFolderName: string,
-    modelFolderPath: string
-  ): Promise<ModelFile> {
-    const fileStats = await fs.fileStat(modelBinaryPath, true)
-    const binaryFileSize = fileStats.size
-
-    // Just need to generate model.json there
-    const defaultModel = (await this.getDefaultModel()) as Model
-    if (!defaultModel) {
-      console.error('Unable to find default model')
-      return
-    }
-
-    const metadata = await executeOnMain(
-      NODE,
-      'retrieveGGUFMetadata',
-      modelBinaryPath
-    )
-
-    const binaryFileName = await baseName(modelBinaryPath)
-    const updatedModel = await this.retrieveGGUFMetadata(metadata)
-
-    const model: Model = {
-      ...defaultModel,
-      id: modelFolderName,
-      name: modelFolderName,
-      sources: [
-        {
-          url: modelBinaryPath,
-          filename: binaryFileName,
-        },
-      ],
-      parameters: {
-        ...defaultModel.parameters,
-        ...updatedModel.parameters,
-      },
-
-      settings: {
-        ...defaultModel.settings,
-        ...updatedModel.settings,
-        llama_model_path: binaryFileName,
-      },
-      created: Date.now(),
-      description: '',
-      metadata: {
-        size: binaryFileSize,
-        author: 'User',
-        tags: [],
-      },
-    }
-
-    const modelFilePath = await joinPath([
-      modelFolderPath,
-      JanModelExtension._modelMetadataFileName,
-    ])
-
-    await fs.writeFileSync(modelFilePath, JSON.stringify(model, null, 2))
-
-    return {
-      ...model,
-      file_path: modelFilePath,
-      file_name: JanModelExtension._modelMetadataFileName,
-    }
-  }
-
-  async updateModelInfo(modelInfo: Partial<ModelFile>): Promise<ModelFile> {
-    if (modelInfo.id == null) throw new Error('Model ID is required')
-
-    const model = JSON.parse(
-      await this.readModelMetadata(modelInfo.file_path)
-    ) as ModelFile
-
-    const updatedModel: ModelFile = {
-      ...model,
-      ...modelInfo,
-      parameters: {
-        ...model.parameters,
-        ...modelInfo.parameters,
-      },
-      settings: {
-        ...model.settings,
-        ...modelInfo.settings,
-      },
-      metadata: {
-        ...model.metadata,
-        ...modelInfo.metadata,
-      },
-      // Should not persist file_path & file_name
-      file_path: undefined,
-      file_name: undefined,
-    }
-
-    await fs.writeFileSync(
-      modelInfo.file_path,
-      JSON.stringify(updatedModel, null, 2)
-    )
-    return updatedModel
-  }
-
-  private async importModel(
-    model: ImportingModel,
-    optionType: OptionType
-  ): Promise<Model> {
-    const binaryName = (await baseName(model.path)).replace(/\s/g, '')
-
-    let modelFolderName = binaryName
-    if (binaryName.endsWith(JanModelExtension._supportedModelFormat)) {
-      modelFolderName = binaryName.replace(
-        JanModelExtension._supportedModelFormat,
-        ''
-      )
-    }
-
-    const modelFolderPath = await this.getModelFolderName(modelFolderName)
-    await fs.mkdir(modelFolderPath)
-
-    const uniqueFolderName = await baseName(modelFolderPath)
-    const modelBinaryFile = binaryName.endsWith(
-      JanModelExtension._supportedModelFormat
-    )
-      ? binaryName
-      : `${binaryName}${JanModelExtension._supportedModelFormat}`
-
-    const binaryPath = await joinPath([modelFolderPath, modelBinaryFile])
-
-    if (optionType === 'SYMLINK') {
-      return this.importModelSymlink(
-        model.path,
-        uniqueFolderName,
-        modelFolderPath
-      )
-    }
-
-    const srcStat = await fs.fileStat(model.path, true)
-
-    // interval getting the file size to calculate the percentage
-    const interval = setInterval(async () => {
-      const destStats = await fs.fileStat(binaryPath, true)
-      const percentage = destStats.size / srcStat.size
-      events.emit(LocalImportModelEvent.onLocalImportModelUpdate, {
-        ...model,
-        percentage,
-      })
-    }, 1000)
-
-    await fs.copyFile(model.path, binaryPath)
-
-    clearInterval(interval)
-
-    // generate model json
-    return this.generateModelMetadata(uniqueFolderName)
-  }
-
-  private async getModelFolderName(
-    modelFolderName: string,
-    count?: number
-  ): Promise<string> {
-    const newModelFolderName = count
-      ? `${modelFolderName}-${count}`
-      : modelFolderName
-
-    const janDataFolderPath = await getJanDataFolderPath()
-    const modelFolderPath = await joinPath([
-      janDataFolderPath,
-      'models',
-      newModelFolderName,
-    ])
-
-    const isFolderExist = await fs.existsSync(modelFolderPath)
-    if (!isFolderExist) {
-      return modelFolderPath
-    } else {
-      const newCount = (count ?? 0) + 1
-      return this.getModelFolderName(modelFolderName, newCount)
-    }
-  }
-
-  async importModels(
-    models: ImportingModel[],
-    optionType: OptionType
-  ): Promise<void> {
-    const importedModels: Model[] = []
-
-    for (const model of models) {
-      events.emit(LocalImportModelEvent.onLocalImportModelUpdate, model)
-      try {
-        const importedModel = await this.importModel(model, optionType)
-        events.emit(LocalImportModelEvent.onLocalImportModelSuccess, {
-          ...model,
-          modelId: importedModel.id,
-        })
-        importedModels.push(importedModel)
-      } catch (err) {
-        events.emit(LocalImportModelEvent.onLocalImportModelFailed, {
-          ...model,
-          error: err,
-        })
-      }
-    }
-
-    events.emit(
-      LocalImportModelEvent.onLocalImportModelFinished,
-      importedModels
-    )
-  }
-
-  /**
-   * Retrieve Model Settings from GGUF Metadata
-   * @param metadata
-   * @returns
-   */
-  async retrieveGGUFMetadata(metadata: any): Promise<Partial<Model>> {
-    const defaultModel = DEFAULT_MODEL as Model
-    var template = await executeOnMain(
-      NODE,
-      'renderJinjaTemplate',
-      metadata
-    ).catch(() => undefined)
-
-    const eos_id = metadata['tokenizer.ggml.eos_token_id']
-    const architecture = metadata['general.architecture']
-
-    return {
-      settings: {
-        prompt_template: template ?? defaultModel.settings.prompt_template,
-        ctx_len:
-          metadata[`${architecture}.context_length`] ??
-          metadata['llama.context_length'] ??
-          4096,
-        ngl:
-          (metadata[`${architecture}.block_count`] ??
-            metadata['llama.block_count'] ??
-            32) + 1,
-      },
-      parameters: {
-        stop: eos_id
-          ? [metadata?.['tokenizer.ggml.tokens'][eos_id] ?? '']
-          : defaultModel.parameters.stop,
-      },
-    }
-  }
 }
diff --git a/extensions/model-extension/src/legacy/delete.ts b/extensions/model-extension/src/legacy/delete.ts
new file mode 100644
index 000000000..5288e30ee
--- /dev/null
+++ b/extensions/model-extension/src/legacy/delete.ts
@@ -0,0 +1,11 @@
+import { fs, joinPath } from '@janhq/core'
+
+export const deleteModelFiles = async (id: string) => {
+  try {
+    const dirPath = await joinPath(['file://models', id])
+    // remove model folder directory
+    await fs.rm(dirPath)
+  } catch (err) {
+    console.error(err)
+  }
+}
diff --git a/extensions/model-extension/src/legacy/download.ts b/extensions/model-extension/src/legacy/download.ts
new file mode 100644
index 000000000..d4d6c62d9
--- /dev/null
+++ b/extensions/model-extension/src/legacy/download.ts
@@ -0,0 +1,105 @@
+import {
+  downloadFile,
+  DownloadRequest,
+  fs,
+  GpuSetting,
+  InferenceEngine,
+  joinPath,
+  Model,
+} from '@janhq/core'
+
+export const downloadModel = async (
+  model: Model,
+  gpuSettings?: GpuSetting,
+  network?: { ignoreSSL?: boolean; proxy?: string }
+): Promise<void> => {
+  const homedir = 'file://models'
+  const supportedGpuArch = ['ampere', 'ada']
+  // Create corresponding directory
+  const modelDirPath = await joinPath([homedir, model.id])
+  if (!(await fs.existsSync(modelDirPath))) await fs.mkdir(modelDirPath)
+
+  const jsonFilePath = await joinPath([modelDirPath, 'model.json'])
+  // Write model.json on download
+  if (!(await fs.existsSync(jsonFilePath)))
+    await fs.writeFileSync(
+      jsonFilePath,
+      JSON.stringify(model, null, 2)
+    )
+
+  if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
+    if (!gpuSettings || gpuSettings.gpus.length === 0) {
+      console.error('No GPU found. Please check your GPU setting.')
+      return
+    }
+    const firstGpu = gpuSettings.gpus[0]
+    if (!firstGpu.name.toLowerCase().includes('nvidia')) {
+      console.error('No Nvidia GPU found. Please check your GPU setting.')
+      return
+    }
+    const gpuArch = firstGpu.arch
+    if (gpuArch === undefined) {
+      console.error('No GPU architecture found. Please check your GPU setting.')
+      return
+    }
+
+    if (!supportedGpuArch.includes(gpuArch)) {
+      console.debug(
+        `Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
+      )
+      return
+    }
+
+    const os = 'windows' // TODO: remove this hard coded value
+
+    const newSources = model.sources.map((source) => {
+      const newSource = { ...source }
+      newSource.url = newSource.url
+        .replace(/<os>/g, os)
+        .replace(/<gpuarch>/g, gpuArch)
+      return newSource
+    })
+    model.sources = newSources
+  }
+
+  console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
+
+  if (model.sources.length > 1) {
+    // path to model binaries
+    for (const source of model.sources) {
+      let path = extractFileName(source.url, '.gguf')
+      if (source.filename) {
+        path = await joinPath([modelDirPath, source.filename])
+      }
+
+      const downloadRequest: DownloadRequest = {
+        url: source.url,
+        localPath: path,
+        modelId: model.id,
+      }
+      downloadFile(downloadRequest, network)
+    }
+  } else {
+    const fileName = extractFileName(model.sources[0]?.url, '.gguf')
+    const path = await joinPath([modelDirPath, fileName])
+    const downloadRequest: DownloadRequest = {
+      url: model.sources[0]?.url,
+      localPath: path,
+      modelId: model.id,
+    }
+    downloadFile(downloadRequest, network)
+  }
+}
+
+/**
+ *  try to retrieve the download file name from the source url
+ */
+function extractFileName(url: string, fileExtension: string): string {
+  if (!url) return fileExtension
+
+  const extractedFileName = url.split('/').pop()
+  const fileName = extractedFileName.toLowerCase().endsWith(fileExtension)
+    ? extractedFileName
+    : extractedFileName + fileExtension
+  return fileName
+}
diff --git a/extensions/model-extension/src/legacy/model-json.test.ts b/extensions/model-extension/src/legacy/model-json.test.ts
new file mode 100644
index 000000000..a4ea5bc0b
--- /dev/null
+++ b/extensions/model-extension/src/legacy/model-json.test.ts
@@ -0,0 +1,80 @@
+import { scanModelsFolder, getModelJsonPath } from './model-json'
+
+// Mock the @janhq/core module
+jest.mock('@janhq/core', () => ({
+  fs: {
+    existsSync: jest.fn(),
+    readdirSync: jest.fn(),
+    fileStat: jest.fn(),
+    readFileSync: jest.fn(),
+  },
+  joinPath: jest.fn((paths) => paths.join('/')),
+}))
+
+// Import the mocked fs and joinPath after the mock is set up
+const { fs } = jest.requireMock('@janhq/core')
+
+describe('model-json', () => {
+  beforeEach(() => {
+    jest.clearAllMocks()
+  })
+
+  describe('scanModelsFolder', () => {
+    it('should return an empty array when models folder does not exist', async () => {
+      fs.existsSync.mockReturnValue(false)
+
+      const result = await scanModelsFolder()
+      expect(result).toEqual([])
+    })
+
+    it('should return an array of models when valid model folders exist', async () => {
+      const mockModelJson = {
+        id: 'test-model',
+        sources: [
+          {
+            filename: 'test-model',
+            url: 'file://models/test-model/test-model.gguf',
+          },
+        ],
+      }
+
+      fs.existsSync.mockReturnValue(true)
+      fs.readdirSync.mockReturnValueOnce(['test-model'])
+      fs.fileStat.mockResolvedValue({ isDirectory: () => true })
+      fs.readFileSync.mockReturnValue(JSON.stringify(mockModelJson))
+      fs.readdirSync.mockReturnValueOnce(['test-model.gguf', 'model.json'])
+
+      const result = await scanModelsFolder()
+      expect(result).toHaveLength(1)
+      expect(result[0]).toMatchObject(mockModelJson)
+    })
+  })
+
+  describe('getModelJsonPath', () => {
+    it('should return undefined when folder does not exist', async () => {
+      fs.existsSync.mockReturnValue(false)
+
+      const result = await getModelJsonPath('non-existent-folder')
+      expect(result).toBeUndefined()
+    })
+
+    it('should return the path when model.json exists in the root folder', async () => {
+      fs.existsSync.mockReturnValue(true)
+      fs.readdirSync.mockReturnValue(['model.json'])
+
+      const result = await getModelJsonPath('test-folder')
+      expect(result).toBe('test-folder/model.json')
+    })
+
+    it('should return the path when model.json exists in a subfolder', async () => {
+      fs.existsSync.mockReturnValue(true)
+      fs.readdirSync
+        .mockReturnValueOnce(['subfolder'])
+        .mockReturnValueOnce(['model.json'])
+      fs.fileStat.mockResolvedValue({ isDirectory: () => true })
+
+      const result = await getModelJsonPath('test-folder')
+      expect(result).toBe('test-folder/subfolder/model.json')
+    })
+  })
+})
diff --git a/extensions/model-extension/src/legacy/model-json.ts b/extensions/model-extension/src/legacy/model-json.ts
new file mode 100644
index 000000000..3cad6014b
--- /dev/null
+++ b/extensions/model-extension/src/legacy/model-json.ts
@@ -0,0 +1,144 @@
+import { InferenceEngine, Model, fs, joinPath } from '@janhq/core'
+//// LEGACY MODEL FOLDER ////
+const LocalEngines = [
+  InferenceEngine.cortex,
+  InferenceEngine.cortex_llamacpp,
+  InferenceEngine.cortex_tensorrtllm,
+  InferenceEngine.cortex_onnx,
+  InferenceEngine.nitro_tensorrt_llm,
+  InferenceEngine.nitro,
+]
+/**
+ * Scan through models folder and return downloaded models
+ * @returns
+ */
+export const scanModelsFolder = async (): Promise<Model[]> => {
+  const _homeDir = 'file://models'
+  try {
+    if (!(await fs.existsSync(_homeDir))) {
+      console.debug('Model folder not found')
+      return []
+    }
+
+    const files: string[] = await fs.readdirSync(_homeDir)
+
+    const allDirectories: string[] = []
+
+    for (const modelFolder of files) {
+      const fullModelFolderPath = await joinPath([_homeDir, modelFolder])
+      if (!(await fs.fileStat(fullModelFolderPath)).isDirectory) continue
+      allDirectories.push(modelFolder)
+    }
+
+    const readJsonPromises = allDirectories.map(async (dirName) => {
+      // filter out directories that don't match the selector
+      // read model.json
+      const folderFullPath = await joinPath([_homeDir, dirName])
+
+      const jsonPath = await getModelJsonPath(folderFullPath)
+
+      if (await fs.existsSync(jsonPath)) {
+        // if we have the model.json file, read it
+        let model = await fs.readFileSync(jsonPath, 'utf-8')
+
+        model = typeof model === 'object' ? model : JSON.parse(model)
+
+        // This to ensure backward compatibility with `model.json` with `source_url`
+        if (model['source_url'] != null) {
+          model['sources'] = [
+            {
+              filename: model.id,
+              url: model['source_url'],
+            },
+          ]
+        }
+        model.file_path = jsonPath
+        model.file_name = 'model.json'
+
+        // Check model file exist
+        // model binaries (sources) are absolute path & exist (symlinked)
+        const existFiles = await Promise.all(
+          model.sources.map(
+            (source) =>
+              // Supposed to be a local file url
+              !source.url.startsWith(`http://`) &&
+              !source.url.startsWith(`https://`)
+          )
+        )
+        if (
+          !LocalEngines.includes(model.engine) ||
+          existFiles.every((exist) => exist)
+        )
+          return model
+
+        const result = await fs
+          .readdirSync(await joinPath([_homeDir, dirName]))
+          .then((files: string[]) => {
+            // Model binary exists in the directory
+            // Model binary name can match model ID or be a .gguf file and not be an incompleted model file
+            return (
+              files.includes(dirName) || // Legacy model GGUF without extension
+              files.filter((file) => {
+                return (
+                  file.toLowerCase().endsWith('.gguf') || // GGUF
+                  file.toLowerCase().endsWith('.engine') // Tensort-LLM
+                )
+              })?.length >= (model.engine === InferenceEngine.nitro_tensorrt_llm ? 1 : (model.sources?.length ?? 1))
+            )
+          })
+
+        if (result) return model
+        else return undefined
+      }
+    })
+    const results = await Promise.allSettled(readJsonPromises)
+    const modelData = results
+      .map((result) => {
+        if (result.status === 'fulfilled' && result.value) {
+          try {
+            const model =
+              typeof result.value === 'object'
+                ? result.value
+                : JSON.parse(result.value)
+            return model as Model
+          } catch {
+            console.debug(`Unable to parse model metadata: ${result.value}`)
+          }
+        }
+        return undefined
+      })
+      .filter((e) => !!e)
+
+    return modelData
+  } catch (err) {
+    console.error(err)
+    return []
+  }
+}
+
+/**
+ * Retrieve the model.json path from a folder
+ * @param folderFullPath
+ * @returns
+ */
+export const getModelJsonPath = async (
+  folderFullPath: string
+): Promise<string | undefined> => {
+  // try to find model.json recursively inside each folder
+  if (!(await fs.existsSync(folderFullPath))) return undefined
+  const files: string[] = await fs.readdirSync(folderFullPath)
+  if (files.length === 0) return undefined
+  if (files.includes('model.json')) {
+    return joinPath([folderFullPath, 'model.json'])
+  }
+  // continue recursive
+  for (const file of files) {
+    const path = await joinPath([folderFullPath, file])
+    const fileStats = await fs.fileStat(path)
+    if (fileStats.isDirectory) {
+      const result = await getModelJsonPath(path)
+      if (result) return result
+    }
+  }
+}
+//// END LEGACY MODEL FOLDER ////
diff --git a/extensions/model-extension/src/migration.test.ts b/extensions/model-extension/src/migration.test.ts
new file mode 100644
index 000000000..a3ddfa87c
--- /dev/null
+++ b/extensions/model-extension/src/migration.test.ts
@@ -0,0 +1,167 @@
+import { Model, InferenceEngine } from '@janhq/core'
+import JanModelExtension from './index'
+
+// Mock the @janhq/core module
+jest.mock('@janhq/core', () => ({
+  ModelExtension: class {},
+  InferenceEngine: {
+    nitro: 'nitro',
+  },
+  joinPath: jest.fn(),
+  dirName: jest.fn(),
+}))
+
+// Mock the CortexAPI
+jest.mock('./cortex', () => ({
+  CortexAPI: jest.fn().mockImplementation(() => ({
+    getModels: jest.fn(),
+    importModel: jest.fn(),
+  })),
+}))
+
+// Mock the model-json module
+jest.mock('./model-json', () => ({
+  scanModelsFolder: jest.fn(),
+}))
+
+// Import the mocked scanModelsFolder after the mock is set up
+const { scanModelsFolder } = jest.requireMock('./model-json')
+
+describe('JanModelExtension', () => {
+  let extension: JanModelExtension
+  let mockLocalStorage: { [key: string]: string }
+  let mockCortexAPI: jest.Mock
+
+  beforeEach(() => {
+    // @ts-ignore
+    extension = new JanModelExtension()
+    mockLocalStorage = {}
+    mockCortexAPI = extension.cortexAPI as any
+
+    // Mock localStorage
+    Object.defineProperty(global, 'localStorage', {
+      value: {
+        getItem: jest.fn((key) => mockLocalStorage[key]),
+        setItem: jest.fn((key, value) => {
+          mockLocalStorage[key] = value
+        }),
+      },
+      writable: true,
+    })
+  })
+
+  describe('getModels', () => {
+    it('should scan models folder when localStorage is empty', async () => {
+      const mockModels: Model[] = [
+        {
+          id: 'model1',
+          object: 'model',
+          version: '1',
+          format: 'gguf',
+          engine: InferenceEngine.nitro,
+          sources: [
+            { filename: 'model1.gguf', url: 'file://models/model1.gguf' },
+          ],
+          file_path: '/path/to/model1',
+        },
+        {
+          id: 'model2',
+          object: 'model',
+          version: '1',
+          format: 'gguf',
+          engine: InferenceEngine.nitro,
+          sources: [
+            { filename: 'model2.gguf', url: 'file://models/model2.gguf' },
+          ],
+          file_path: '/path/to/model2',
+        },
+      ] as any
+      scanModelsFolder.mockResolvedValue(mockModels)
+      extension.cortexAPI.importModel = jest
+        .fn()
+        .mockResolvedValueOnce(mockModels[0])
+      extension.cortexAPI.getModels = jest
+        .fn()
+        .mockResolvedValue([mockModels[0]])
+      extension.cortexAPI.importModel = jest
+        .fn()
+        .mockResolvedValueOnce(mockModels[1])
+      extension.cortexAPI.getModels = jest
+        .fn()
+        .mockResolvedValue([mockModels[0], mockModels[1]])
+
+      const result = await extension.getModels()
+      expect(scanModelsFolder).toHaveBeenCalled()
+      expect(result).toEqual(mockModels)
+    })
+
+    it('should import models when there are models to import', async () => {
+      const mockModels: Model[] = [
+        {
+          id: 'model1',
+          object: 'model',
+          version: '1',
+          format: 'gguf',
+          engine: InferenceEngine.nitro,
+          file_path: '/path/to/model1',
+          sources: [
+            { filename: 'model1.gguf', url: 'file://models/model1.gguf' },
+          ],
+        },
+        {
+          id: 'model2',
+          object: 'model',
+          version: '1',
+          format: 'gguf',
+          engine: InferenceEngine.nitro,
+          file_path: '/path/to/model2',
+          sources: [
+            { filename: 'model2.gguf', url: 'file://models/model2.gguf' },
+          ],
+        },
+      ] as any
+      mockLocalStorage['downloadedModels'] = JSON.stringify(mockModels)
+
+      extension.cortexAPI.getModels = jest.fn().mockResolvedValue([])
+      extension.importModel = jest.fn().mockResolvedValue(undefined)
+
+      const result = await extension.getModels()
+
+      expect(extension.importModel).toHaveBeenCalledTimes(2)
+      expect(result).toEqual(mockModels)
+    })
+
+    it('should return models from cortexAPI when all models are already imported', async () => {
+      const mockModels: Model[] = [
+        {
+          id: 'model1',
+          object: 'model',
+          version: '1',
+          format: 'gguf',
+          engine: InferenceEngine.nitro,
+          sources: [
+            { filename: 'model1.gguf', url: 'file://models/model1.gguf' },
+          ],
+        },
+        {
+          id: 'model2',
+          object: 'model',
+          version: '1',
+          format: 'gguf',
+          engine: InferenceEngine.nitro,
+          sources: [
+            { filename: 'model2.gguf', url: 'file://models/model2.gguf' },
+          ],
+        },
+      ] as any
+      mockLocalStorage['downloadedModels'] = JSON.stringify(mockModels)
+
+      extension.cortexAPI.getModels = jest.fn().mockResolvedValue(mockModels)
+
+      const result = await extension.getModels()
+
+      expect(extension.cortexAPI.getModels).toHaveBeenCalled()
+      expect(result).toEqual(mockModels)
+    })
+  })
+})
diff --git a/extensions/model-extension/src/node/index.ts b/extensions/model-extension/src/node/index.ts
deleted file mode 100644
index 2acf6ec4a..000000000
--- a/extensions/model-extension/src/node/index.ts
+++ /dev/null
@@ -1,54 +0,0 @@
-import { closeSync, openSync, readSync } from 'fs'
-import { Template } from '@huggingface/jinja'
-/**
- * This is to retrieve the metadata from a GGUF file
- * It uses hyllama and jinja from @huggingface module
- */
-export const retrieveGGUFMetadata = async (ggufPath: string) => {
-  try {
-    const { ggufMetadata } = await import('hyllama')
-    // Read first 10mb of gguf file
-    const fd = openSync(ggufPath, 'r')
-    const buffer = new Uint8Array(10_000_000)
-    readSync(fd, buffer, 0, 10_000_000, 0)
-    closeSync(fd)
-
-    // Parse metadata and tensor info
-    const { metadata } = ggufMetadata(buffer.buffer)
-
-    return metadata
-  } catch (e) {
-    console.log('[MODEL_EXT]', e)
-  }
-}
-
-/**
- * Convert metadata to jinja template
- * @param metadata
- */
-export const renderJinjaTemplate = (metadata: any): string => {
-  const template = new Template(metadata['tokenizer.chat_template'])
-  const eos_id = metadata['tokenizer.ggml.eos_token_id']
-  const bos_id = metadata['tokenizer.ggml.bos_token_id']
-  if (eos_id === undefined || bos_id === undefined) {
-    return ''
-  }
-  const eos_token = metadata['tokenizer.ggml.tokens'][eos_id]
-  const bos_token = metadata['tokenizer.ggml.tokens'][bos_id]
-  // Parse jinja template
-  return template.render({
-    add_generation_prompt: true,
-    eos_token,
-    bos_token,
-    messages: [
-      {
-        role: 'system',
-        content: '{system_message}',
-      },
-      {
-        role: 'user',
-        content: '{prompt}',
-      },
-    ],
-  })
-}
diff --git a/extensions/model-extension/src/node/node.test.ts b/extensions/model-extension/src/node/node.test.ts
deleted file mode 100644
index afd2b8470..000000000
--- a/extensions/model-extension/src/node/node.test.ts
+++ /dev/null
@@ -1,53 +0,0 @@
-import { renderJinjaTemplate } from './index'
-import { Template } from '@huggingface/jinja'
-
-jest.mock('@huggingface/jinja', () => ({
-  Template: jest.fn((template: string) => ({
-    render: jest.fn(() => `${template}_rendered`),
-  })),
-}))
-
-describe('renderJinjaTemplate', () => {
-  beforeEach(() => {
-    jest.clearAllMocks() // Clear mocks between tests
-  })
-
-  it('should render the template with correct parameters', () => {
-    const metadata = {
-      'tokenizer.chat_template': 'Hello, {{ messages }}!',
-      'tokenizer.ggml.eos_token_id': 0,
-      'tokenizer.ggml.bos_token_id': 1,
-      'tokenizer.ggml.tokens': ['EOS', 'BOS'],
-    }
-
-    const renderedTemplate = renderJinjaTemplate(metadata)
-
-    expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!')
-
-    expect(renderedTemplate).toBe('Hello, {{ messages }}!_rendered')
-  })
-
-  it('should handle missing token IDs gracefully', () => {
-    const metadata = {
-      'tokenizer.chat_template': 'Hello, {{ messages }}!',
-      'tokenizer.ggml.eos_token_id': 0,
-      'tokenizer.ggml.tokens': ['EOS'],
-    }
-
-    const renderedTemplate = renderJinjaTemplate(metadata)
-
-    expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!')
-
-    expect(renderedTemplate).toBe('')
-  })
-
-  it('should handle empty template gracefully', () => {
-    const metadata = {}
-
-    const renderedTemplate = renderJinjaTemplate(metadata)
-
-    expect(Template).toHaveBeenCalledWith(undefined)
-
-    expect(renderedTemplate).toBe("")
-  })
-})
diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts
index 11c86a9a7..d9c89242f 100644
--- a/extensions/tensorrt-llm-extension/src/index.ts
+++ b/extensions/tensorrt-llm-extension/src/index.ts
@@ -7,9 +7,7 @@ import {
   DownloadEvent,
   DownloadRequest,
   DownloadState,
-  GpuSetting,
   InstallationState,
-  Model,
   baseName,
   downloadFile,
   events,
@@ -23,7 +21,7 @@ import {
   ModelEvent,
   getJanDataFolderPath,
   SystemInformation,
-  ModelFile,
+  Model,
 } from '@janhq/core'
 
 /**
@@ -137,7 +135,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
     events.emit(ModelEvent.OnModelsUpdate, {})
   }
 
-  override async loadModel(model: ModelFile): Promise<void> {
+  override async loadModel(model: Model): Promise<void> {
     if ((await this.installationState()) === 'Installed')
       return super.loadModel(model)
 
@@ -177,7 +175,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
   override async inference(data: MessageRequest) {
     if (!this.loadedModel) return
     // TensorRT LLM Extension supports streaming only
-    if (data.model) data.model.parameters.stream = true
+    if (data.model && data.model.parameters) data.model.parameters.stream = true
     super.inference(data)
   }
 
diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts
index 77003389f..d02427170 100644
--- a/extensions/tensorrt-llm-extension/src/node/index.ts
+++ b/extensions/tensorrt-llm-extension/src/node/index.ts
@@ -41,7 +41,7 @@ async function loadModel(
   // e.g. ~/jan/models/llama-2
   let modelFolder = params.modelFolder
 
-  if (params.model.settings.prompt_template) {
+  if (params.model.settings?.prompt_template) {
     const promptTemplate = params.model.settings.prompt_template
     const prompt = promptTemplateConverter(promptTemplate)
     if (prompt?.error) {
diff --git a/extensions/tensorrt-llm-extension/tsconfig.json b/extensions/tensorrt-llm-extension/tsconfig.json
index be07e716c..94465ebb6 100644
--- a/extensions/tensorrt-llm-extension/tsconfig.json
+++ b/extensions/tensorrt-llm-extension/tsconfig.json
@@ -1,7 +1,7 @@
 {
   "compilerOptions": {
     "moduleResolution": "node",
-    "target": "es5",
+    "target": "ES2015",
     "module": "ES2020",
     "lib": ["es2015", "es2016", "es2017", "dom"],
     "strict": true,
diff --git a/joi/rollup.config.mjs b/joi/rollup.config.mjs
index 333a61c5c..8f20635a5 100644
--- a/joi/rollup.config.mjs
+++ b/joi/rollup.config.mjs
@@ -38,7 +38,11 @@ export default [
       postcss({
         plugins: [autoprefixer(), tailwindcss(tailwindConfig)],
         sourceMap: true,
-        use: ['sass'],
+        use: {
+          sass: {
+            silenceDeprecations: ['legacy-js-api'],
+          },
+        },
         minimize: true,
         extract: 'main.css',
       }),
diff --git a/joi/src/core/Modal/styles.scss b/joi/src/core/Modal/styles.scss
index 11af9418a..717ce2ac7 100644
--- a/joi/src/core/Modal/styles.scss
+++ b/joi/src/core/Modal/styles.scss
@@ -3,7 +3,6 @@ button,
 fieldset,
 .modal {
   &__overlay {
-    @apply backdrop-blur-lg;
     background-color: hsla(var(--modal-overlay));
     z-index: 200;
     position: fixed;
diff --git a/themes/dark-dimmed/theme.json b/themes/dark-dimmed/theme.json
index c96085092..062469072 100644
--- a/themes/dark-dimmed/theme.json
+++ b/themes/dark-dimmed/theme.json
@@ -114,7 +114,7 @@
     },
 
     "modal": {
-      "overlay": "0, 0%, 0%, 0.5",
+      "overlay": "0, 0%, 0%, 0.7",
       "bg": "215, 25%, 9%, 1",
       "fg": "0, 0%, 100%, 11"
     },
diff --git a/themes/joi-dark/theme.json b/themes/joi-dark/theme.json
index d389d853c..c299e3b1e 100644
--- a/themes/joi-dark/theme.json
+++ b/themes/joi-dark/theme.json
@@ -52,7 +52,7 @@
     },
 
     "left-panel": {
-      "bg": "0, 0%, 13%, 0",
+      "bg": "0, 0%, 13%, 1",
       "menu": "0, 0%, 95%, 1",
       "menu-hover": "0, 0%, 28%, 0.2",
       "menu-active": "0, 0%, 100%, 1",
@@ -64,7 +64,7 @@
     },
 
     "right-panel": {
-      "bg": "0, 0%, 13%, 0"
+      "bg": "0, 0%, 13%, 1"
     },
 
     "tooltip": {
diff --git a/themes/joi-light/theme.json b/themes/joi-light/theme.json
index 36b7a0a50..7468ff976 100644
--- a/themes/joi-light/theme.json
+++ b/themes/joi-light/theme.json
@@ -114,7 +114,7 @@
     },
 
     "modal": {
-      "overlay": "0, 0%, 0%, 0.5",
+      "overlay": "0, 0%, 0%, 0.7",
       "bg": "0, 0%, 100%, 1",
       "fg": "0, 0%, 0%, 1"
     },
diff --git a/themes/night-blue/theme.json b/themes/night-blue/theme.json
index c09442b43..96d2e242c 100644
--- a/themes/night-blue/theme.json
+++ b/themes/night-blue/theme.json
@@ -114,7 +114,7 @@
     },
 
     "modal": {
-      "overlay": "0, 0%, 0%, 0.5",
+      "overlay": "0, 0%, 0%, 0.7",
       "bg": "222, 96%, 16%, 1",
       "fg": "0, 0%, 100%, 11"
     },
diff --git a/web/containers/ErrorMessage/index.test.tsx b/web/containers/ErrorMessage/index.test.tsx
index 99dad5415..d2ae5aa81 100644
--- a/web/containers/ErrorMessage/index.test.tsx
+++ b/web/containers/ErrorMessage/index.test.tsx
@@ -1,54 +1,43 @@
 // ErrorMessage.test.tsx
-import React from 'react';
-import { render, screen, fireEvent } from '@testing-library/react';
-import '@testing-library/jest-dom';
-import ErrorMessage from './index';
-import { ThreadMessage, MessageStatus, ErrorCode } from '@janhq/core';
-import { useAtomValue, useSetAtom } from 'jotai';
-import useSendChatMessage from '@/hooks/useSendChatMessage';
+import React from 'react'
+import { render, screen, fireEvent } from '@testing-library/react'
+import '@testing-library/jest-dom'
+import ErrorMessage from './index'
+import { ThreadMessage, MessageStatus, ErrorCode } from '@janhq/core'
+import { useAtomValue, useSetAtom } from 'jotai'
+import useSendChatMessage from '@/hooks/useSendChatMessage'
 
 // Mock the dependencies
 jest.mock('jotai', () => {
-    const originalModule = jest.requireActual('jotai')
-    return {
-      ...originalModule,
-      useAtomValue: jest.fn(),
-      useSetAtom: jest.fn(),
-    }
-  })
+  const originalModule = jest.requireActual('jotai')
+  return {
+    ...originalModule,
+    useAtomValue: jest.fn(),
+    useSetAtom: jest.fn(),
+  }
+})
 
 jest.mock('@/hooks/useSendChatMessage', () => ({
   __esModule: true,
   default: jest.fn(),
-}));
+}))
 
 describe('ErrorMessage Component', () => {
-  const mockSetMainState = jest.fn();
-  const mockSetSelectedSettingScreen = jest.fn();
-  const mockSetModalTroubleShooting = jest.fn();
-  const mockResendChatMessage = jest.fn();
+  const mockSetMainState = jest.fn()
+  const mockSetSelectedSettingScreen = jest.fn()
+  const mockSetModalTroubleShooting = jest.fn()
+  const mockResendChatMessage = jest.fn()
 
   beforeEach(() => {
-    jest.clearAllMocks();
-    (useAtomValue as jest.Mock).mockReturnValue([]);
-    (useSetAtom as jest.Mock).mockReturnValue(mockSetMainState);
-    (useSetAtom as jest.Mock).mockReturnValue(mockSetSelectedSettingScreen);
-    (useSetAtom as jest.Mock).mockReturnValue(mockSetModalTroubleShooting);
-    (useSendChatMessage as jest.Mock).mockReturnValue({ resendChatMessage: mockResendChatMessage });
-  });
-
-  it('renders stopped message correctly', () => {
-    const message: ThreadMessage = {
-      id: '1',
-      status: MessageStatus.Stopped,
-      content: [{ text: { value: 'Test message' } }],
-    } as ThreadMessage;
-
-    render(<ErrorMessage message={message} />);
-    
-    expect(screen.getByText("Oops! The generation was interrupted. Let's give it another go!")).toBeInTheDocument();
-    expect(screen.getByText('Regenerate')).toBeInTheDocument();
-  });
+    jest.clearAllMocks()
+    ;(useAtomValue as jest.Mock).mockReturnValue([])
+    ;(useSetAtom as jest.Mock).mockReturnValue(mockSetMainState)
+    ;(useSetAtom as jest.Mock).mockReturnValue(mockSetSelectedSettingScreen)
+    ;(useSetAtom as jest.Mock).mockReturnValue(mockSetModalTroubleShooting)
+    ;(useSendChatMessage as jest.Mock).mockReturnValue({
+      resendChatMessage: mockResendChatMessage,
+    })
+  })
 
   it('renders error message with InvalidApiKey correctly', () => {
     const message: ThreadMessage = {
@@ -56,13 +45,13 @@ describe('ErrorMessage Component', () => {
       status: MessageStatus.Error,
       error_code: ErrorCode.InvalidApiKey,
       content: [{ text: { value: 'Invalid API Key' } }],
-    } as ThreadMessage;
+    } as ThreadMessage
 
-    render(<ErrorMessage message={message} />);
-    
-    expect(screen.getByTestId('invalid-API-key-error')).toBeInTheDocument();
-    expect(screen.getByText('Settings')).toBeInTheDocument();
-  });
+    render(<ErrorMessage message={message} />)
+
+    expect(screen.getByTestId('invalid-API-key-error')).toBeInTheDocument()
+    expect(screen.getByText('Settings')).toBeInTheDocument()
+  })
 
   it('renders general error message correctly', () => {
     const message: ThreadMessage = {
@@ -70,26 +59,15 @@ describe('ErrorMessage Component', () => {
       status: MessageStatus.Error,
       error_code: ErrorCode.Unknown,
       content: [{ text: { value: 'Unknown error occurred' } }],
-    } as ThreadMessage;
+    } as ThreadMessage
 
-    render(<ErrorMessage message={message} />);
-    
-    expect(screen.getByText("Apologies, something’s amiss!")).toBeInTheDocument();
-    expect(screen.getByText('troubleshooting assistance')).toBeInTheDocument();
-  });
+    render(<ErrorMessage message={message} />)
 
-  it('calls regenerateMessage when Regenerate button is clicked', () => {
-    const message: ThreadMessage = {
-      id: '1',
-      status: MessageStatus.Stopped,
-      content: [{ text: { value: 'Test message' } }],
-    } as ThreadMessage;
-
-    render(<ErrorMessage message={message} />);
-    
-    fireEvent.click(screen.getByText('Regenerate'));
-    expect(mockResendChatMessage).toHaveBeenCalled();
-  });
+    expect(
+      screen.getByText('Apologies, something’s amiss!')
+    ).toBeInTheDocument()
+    expect(screen.getByText('troubleshooting assistance')).toBeInTheDocument()
+  })
 
   it('opens troubleshooting modal when link is clicked', () => {
     const message: ThreadMessage = {
@@ -97,11 +75,11 @@ describe('ErrorMessage Component', () => {
       status: MessageStatus.Error,
       error_code: ErrorCode.Unknown,
       content: [{ text: { value: 'Unknown error occurred' } }],
-    } as ThreadMessage;
+    } as ThreadMessage
 
-    render(<ErrorMessage message={message} />);
-    
-    fireEvent.click(screen.getByText('troubleshooting assistance'));
-    expect(mockSetModalTroubleShooting).toHaveBeenCalledWith(true);
-  });
-});
+    render(<ErrorMessage message={message} />)
+
+    fireEvent.click(screen.getByText('troubleshooting assistance'))
+    expect(mockSetModalTroubleShooting).toHaveBeenCalledWith(true)
+  })
+})
diff --git a/web/containers/ErrorMessage/index.tsx b/web/containers/ErrorMessage/index.tsx
index bcd056b93..18558c1d8 100644
--- a/web/containers/ErrorMessage/index.tsx
+++ b/web/containers/ErrorMessage/index.tsx
@@ -4,9 +4,8 @@ import {
   MessageStatus,
   ThreadMessage,
 } from '@janhq/core'
-import { Button } from '@janhq/joi'
+
 import { useAtomValue, useSetAtom } from 'jotai'
-import { RefreshCcw } from 'lucide-react'
 
 import AutoLink from '@/containers/AutoLink'
 import ModalTroubleShooting, {
@@ -15,27 +14,17 @@ import ModalTroubleShooting, {
 
 import { MainViewState } from '@/constants/screens'
 
-import useSendChatMessage from '@/hooks/useSendChatMessage'
-
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
-import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
+
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
-  const messages = useAtomValue(getCurrentChatMessagesAtom)
-  const { resendChatMessage } = useSendChatMessage()
   const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
   const setMainState = useSetAtom(mainViewStateAtom)
   const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
   const activeThread = useAtomValue(activeThreadAtom)
 
-  const regenerateMessage = async () => {
-    const lastMessageIndex = messages.length - 1
-    const message = messages[lastMessageIndex]
-    resendChatMessage(message)
-  }
-
   const getErrorTitle = () => {
     switch (message.error_code) {
       case ErrorCode.Unknown:
@@ -77,23 +66,6 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
 
   return (
     <div className="mt-10">
-      {message.status === MessageStatus.Stopped && (
-        <div key={message.id} className="flex flex-col items-center">
-          <span className="mb-3 text-center font-medium text-[hsla(var(--text-secondary))]">
-            Oops! The generation was interrupted. Let&apos;s give it another go!
-          </span>
-          <Button
-            className="w-min"
-            theme="ghost"
-            variant="outline"
-            onClick={regenerateMessage}
-          >
-            <RefreshCcw size={14} className="" />
-            <span className="w-2" />
-            Regenerate
-          </Button>
-        </div>
-      )}
       {message.status === MessageStatus.Error && (
         <div
           key={message.id}
@@ -101,7 +73,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
         >
           {getErrorTitle()}
           <p>
-            Jan’s in beta. Access&nbsp;
+            {`Something's wrong.`} Access&nbsp;
             <span
               className="cursor-pointer text-[hsla(var(--app-link))] underline"
               onClick={() => setModalTroubleShooting(true)}
diff --git a/web/containers/Layout/BottomPanel/DownloadingState/index.tsx b/web/containers/Layout/BottomPanel/DownloadingState/index.tsx
index ddc2eab91..dc9ffca89 100644
--- a/web/containers/Layout/BottomPanel/DownloadingState/index.tsx
+++ b/web/containers/Layout/BottomPanel/DownloadingState/index.tsx
@@ -2,18 +2,19 @@ import { Fragment } from 'react'
 
 import { Progress, Modal, Button } from '@janhq/joi'
 
-import { useAtomValue } from 'jotai'
+import { useAtomValue, useSetAtom } from 'jotai'
 
 import useDownloadModel from '@/hooks/useDownloadModel'
-import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
+import {
+  modelDownloadStateAtom,
+  removeDownloadStateAtom,
+} from '@/hooks/useDownloadState'
 
 import { formatDownloadPercentage } from '@/utils/converter'
 
-import { getDownloadingModelAtom } from '@/helpers/atoms/Model.atom'
-
 export default function DownloadingState() {
   const downloadStates = useAtomValue(modelDownloadStateAtom)
-  const downloadingModels = useAtomValue(getDownloadingModelAtom)
+  const removeDownloadState = useSetAtom(removeDownloadStateAtom)
   const { abortModelDownload } = useDownloadModel()
 
   const totalCurrentProgress = Object.values(downloadStates)
@@ -76,10 +77,8 @@ export default function DownloadingState() {
                       theme="destructive"
                       onClick={() => {
                         if (item?.modelId) {
-                          const model = downloadingModels.find(
-                            (model) => model.id === item.modelId
-                          )
-                          if (model) abortModelDownload(model)
+                          removeDownloadState(item?.modelId)
+                          abortModelDownload(item?.modelId)
                         }
                       }}
                     >
diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx
index 5e8549c7f..8ad16eeba 100644
--- a/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx
@@ -10,8 +10,6 @@ import { isLocalEngine } from '@/utils/modelEngine'
 
 import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
 
-const Column = ['Model', 'Size', '']
-
 const TableActiveModel = () => {
   const { activeModel, stateModel, stopModel } = useActiveModel()
 
@@ -21,37 +19,23 @@ const TableActiveModel = () => {
     <div className="w-1/2">
       <div className="overflow-hidden border-b border-[hsla(var(--app-border))]">
         <table className="w-full px-8">
-          <thead className="w-full border-b border-[hsla(var(--app-border))] bg-[hsla(var(--tertiary-bg))]">
-            <tr>
-              {Column.map((col, i) => {
-                return (
-                  <th
-                    key={i}
-                    className="px-4 py-2 text-left font-normal last:text-center"
-                  >
-                    {col}
-                  </th>
-                )
-              })}
-            </tr>
-          </thead>
           {activeModel && isLocalEngine(activeModel.engine) ? (
             <tbody>
               <tr>
                 <td
-                  className="max-w-[200px] px-4 py-2 font-bold"
+                  className="max-w-[200px] px-4 py-2 font-medium"
                   title={activeModel.name}
                 >
                   <p className="line-clamp-2">{activeModel.name}</p>
                 </td>
                 <td className="px-4 py-2">
                   <Badge theme="secondary">
-                    {activeModel.metadata.size
-                      ? toGibibytes(activeModel.metadata.size)
+                    {activeModel.metadata?.size
+                      ? toGibibytes(activeModel.metadata?.size)
                       : '-'}
                   </Badge>
                 </td>
-                <td className="px-4 py-2 text-center">
+                <td className="px-4 py-2 text-right">
                   <Tooltip
                     trigger={
                       <Button
diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
index 7fdc598ec..3dfdff2f9 100644
--- a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
@@ -1,7 +1,6 @@
 import { Fragment, useCallback, useState } from 'react'
 
 import { Progress } from '@janhq/joi'
-import { useClickOutside } from '@janhq/joi'
 import { useAtom, useAtomValue } from 'jotai'
 import {
   MonitorIcon,
@@ -44,23 +43,11 @@ const SystemMonitor = () => {
   const [showSystemMonitorPanel, setShowSystemMonitorPanel] = useAtom(
     showSystemMonitorPanelAtom
   )
-  const [control, setControl] = useState<HTMLDivElement | null>(null)
-  const [elementExpand, setElementExpand] = useState<HTMLDivElement | null>(
-    null
-  )
+
   const reduceTransparent = useAtomValue(reduceTransparentAtom)
 
   const { watch, stopWatching } = useGetSystemResources()
 
-  useClickOutside(
-    () => {
-      toggleShowSystemMonitorPanel(false)
-      setShowFullScreen(false)
-    },
-    null,
-    [control, elementExpand]
-  )
-
   const toggleShowSystemMonitorPanel = useCallback(
     (isShow: boolean) => {
       setShowSystemMonitorPanel(isShow)
@@ -76,7 +63,6 @@ const SystemMonitor = () => {
   return (
     <Fragment>
       <div
-        ref={setControl}
         data-testid="system-monitoring"
         className={twMerge(
           'flex cursor-pointer items-center gap-x-1 rounded px-1 py-0.5 hover:bg-[hsla(var(--secondary-bg))]',
@@ -92,9 +78,8 @@ const SystemMonitor = () => {
       </div>
       {showSystemMonitorPanel && (
         <div
-          ref={setElementExpand}
           className={twMerge(
-            'fixed bottom-9 left-[49px] z-50 flex w-[calc(100%-48px)] flex-shrink-0 flex-col border-t border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))]',
+            'system-monitor-panel fixed bottom-9 left-[49px] z-50 flex w-[calc(100%-48px)] flex-shrink-0 flex-col border-t border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))]',
             showFullScreen && 'h-[calc(100%-63px)]',
             reduceTransparent && 'w-[calc(100%-48px)] rounded-none'
           )}
@@ -162,7 +147,6 @@ const SystemMonitor = () => {
                   <span className="flex-shrink-0 ">{ramUtilitized}%</span>
                 </div>
               </div>
-
               {gpus.length > 0 && (
                 <div className="mb-4 border-b border-[hsla(var(--app-border))] pb-4 last:border-none">
                   {gpus.map((gpu, index) => {
diff --git a/web/containers/Layout/RibbonPanel/index.tsx b/web/containers/Layout/RibbonPanel/index.tsx
index 2eb1bad70..13116dc16 100644
--- a/web/containers/Layout/RibbonPanel/index.tsx
+++ b/web/containers/Layout/RibbonPanel/index.tsx
@@ -1,5 +1,4 @@
 import { Tooltip, useMediaQuery } from '@janhq/joi'
-import { motion as m } from 'framer-motion'
 import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 import {
   MessageCircleIcon,
@@ -95,10 +94,11 @@ export default function RibbonPanel() {
         return (
           <div
             className={twMerge(
-              'relative my-0.5 flex h-8 w-8 items-center justify-center rounded-md hover:bg-[hsla(var(--ribbon-panel-icon-hover))]',
+              'relative my-0.5 flex h-8 w-8 cursor-pointer items-center justify-center rounded-md hover:bg-[hsla(var(--ribbon-panel-icon-hover))]',
               i === 1 && 'mb-auto'
             )}
             key={i}
+            onClick={() => onMenuClick(menu.state)}
           >
             <Tooltip
               side="right"
@@ -108,20 +108,13 @@ export default function RibbonPanel() {
                   <div
                     data-testid={menu.name}
                     className={twMerge(
-                      'relative flex w-full flex-shrink-0 cursor-pointer items-center justify-center text-[hsla(var(--ribbon-panel-icon))] ',
+                      'relative flex w-full flex-shrink-0 cursor-pointer items-center justify-center rounded-md p-1.5 text-[hsla(var(--ribbon-panel-icon))]',
                       isActive &&
-                        'z-10 text-[hsla(var(--ribbon-panel-icon-active))]'
+                        'z-10 bg-[hsla(var(--ribbon-panel-icon-active-bg))] text-[hsla(var(--ribbon-panel-icon-active))]'
                     )}
-                    onClick={() => onMenuClick(menu.state)}
                   >
                     {menu.icon}
                   </div>
-                  {isActive && (
-                    <m.div
-                      className="absolute inset-0 left-0 h-full w-full rounded-md bg-[hsla(var(--ribbon-panel-icon-active-bg))]"
-                      layoutId="active-state-menu"
-                    />
-                  )}
                 </div>
               }
               content={
diff --git a/web/containers/Loader/ModelReload.tsx b/web/containers/Loader/ModelReload.tsx
index fbe673788..29709c0da 100644
--- a/web/containers/Loader/ModelReload.tsx
+++ b/web/containers/Loader/ModelReload.tsx
@@ -44,6 +44,11 @@ export default function ModelReload() {
           Reloading model {stateModel.model?.id}
         </span>
       </div>
+      <div className="my-4 mb-2 text-center">
+        <span className="text-[hsla(var(--text-secondary)]">
+          Model is reloading to apply new changes.
+        </span>
+      </div>
     </div>
   )
 }
diff --git a/web/containers/ModalCancelDownload/index.tsx b/web/containers/ModalCancelDownload/index.tsx
index e8d3842a8..1826c78a7 100644
--- a/web/containers/ModalCancelDownload/index.tsx
+++ b/web/containers/ModalCancelDownload/index.tsx
@@ -4,16 +4,17 @@ import { Model } from '@janhq/core'
 
 import { Modal, Button, Progress, ModalClose } from '@janhq/joi'
 
-import { useAtomValue } from 'jotai'
+import { useAtomValue, useSetAtom } from 'jotai'
 
 import useDownloadModel from '@/hooks/useDownloadModel'
 
-import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
+import {
+  modelDownloadStateAtom,
+  removeDownloadStateAtom,
+} from '@/hooks/useDownloadState'
 
 import { formatDownloadPercentage } from '@/utils/converter'
 
-import { getDownloadingModelAtom } from '@/helpers/atoms/Model.atom'
-
 type Props = {
   model: Model
   isFromList?: boolean
@@ -21,20 +22,16 @@ type Props = {
 
 const ModalCancelDownload = ({ model, isFromList }: Props) => {
   const { abortModelDownload } = useDownloadModel()
-  const downloadingModels = useAtomValue(getDownloadingModelAtom)
+  const removeDownloadState = useSetAtom(removeDownloadStateAtom)
   const allDownloadStates = useAtomValue(modelDownloadStateAtom)
   const downloadState = allDownloadStates[model.id]
 
-  const cancelText = `Cancel ${formatDownloadPercentage(downloadState.percent)}`
+  const cancelText = `Cancel ${formatDownloadPercentage(downloadState?.percent ?? 0)}`
 
   const onAbortDownloadClick = useCallback(() => {
-    if (downloadState?.modelId) {
-      const model = downloadingModels.find(
-        (model) => model.id === downloadState.modelId
-      )
-      if (model) abortModelDownload(model)
-    }
-  }, [downloadState, downloadingModels, abortModelDownload])
+    removeDownloadState(model.id)
+    abortModelDownload(downloadState?.modelId ?? model.id)
+  }, [downloadState, abortModelDownload, removeDownloadState, model])
 
   return (
     <Modal
@@ -51,13 +48,13 @@ const ModalCancelDownload = ({ model, isFromList }: Props) => {
               <Progress
                 className="w-[80px]"
                 value={
-                  formatDownloadPercentage(downloadState?.percent, {
+                  formatDownloadPercentage(downloadState?.percent ?? 0, {
                     hidePercentage: true,
                   }) as number
                 }
               />
               <span className="tabular-nums">
-                {formatDownloadPercentage(downloadState.percent)}
+                {formatDownloadPercentage(downloadState?.percent ?? 0)}
               </span>
             </div>
           </Button>
diff --git a/web/containers/ModalTroubleShoot/CortexLogs.tsx b/web/containers/ModalTroubleShoot/CortexLogs.tsx
new file mode 100644
index 000000000..3323a1694
--- /dev/null
+++ b/web/containers/ModalTroubleShoot/CortexLogs.tsx
@@ -0,0 +1,226 @@
+import React, { useEffect, useState, memo } from 'react'
+
+import { Button } from '@janhq/joi'
+
+import { CopyIcon, CheckIcon, FolderIcon } from 'lucide-react'
+
+import { twMerge } from 'tailwind-merge'
+
+import { useClipboard } from '@/hooks/useClipboard'
+import { useLogs } from '@/hooks/useLogs'
+import { usePath } from '@/hooks/usePath'
+
+const CortexLogs = () => {
+  const { getLogs } = useLogs()
+  const [logs, setLogs] = useState<string[]>([])
+  const { onRevealInFinder } = usePath()
+
+  useEffect(() => {
+    getLogs('cortex').then((log) => {
+      if (typeof log?.split === 'function') {
+        if (log.length > 0) {
+          setLogs(log.split(/\r?\n|\r|\n/g))
+        }
+      }
+    })
+
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])
+
+  const clipboard = useClipboard({ timeout: 1000 })
+
+  return (
+    <div
+      className={twMerge(
+        'max-w-[50vw] p-4 pb-0',
+        logs.length === 0 && 'w-full max-w-none'
+      )}
+    >
+      <div className="absolute right-2 top-7">
+        <div className="flex w-full flex-row items-center gap-2">
+          <Button
+            theme="ghost"
+            variant="outline"
+            onClick={() => onRevealInFinder('Logs')}
+          >
+            <div className="flex items-center space-x-2">
+              <>
+                <FolderIcon size={14} />
+                <span>Open</span>
+              </>
+            </div>
+          </Button>
+          <Button
+            theme="ghost"
+            variant="outline"
+            onClick={() => {
+              clipboard.copy(logs.slice(-50).join('\n') ?? '')
+            }}
+          >
+            <div className="flex items-center space-x-2">
+              {clipboard.copied ? (
+                <>
+                  <CheckIcon size={14} className="text-green-600" />
+                  <span>Copying...</span>
+                </>
+              ) : (
+                <>
+                  <CopyIcon size={14} />
+                  <span>Copy All</span>
+                </>
+              )}
+            </div>
+          </Button>
+        </div>
+      </div>
+      <div className="flex h-full w-full flex-col">
+        {logs.length > 0 ? (
+          <code className="inline-block whitespace-break-spaces text-[13px]">
+            {logs.slice(-100).map((log, i) => {
+              return (
+                <p key={i} className="my-2 leading-relaxed">
+                  {log}
+                </p>
+              )
+            })}
+          </code>
+        ) : (
+          <div className="flex flex-col items-center justify-center py-2">
+            <svg
+              width="80"
+              height="80"
+              viewBox="0 0 115 115"
+              fill="none"
+              xmlns="http://www.w3.org/2000/svg"
+            >
+              <circle cx="57.4999" cy="57.5009" r="50.2314" fill="#DADADA" />
+              <circle
+                cx="57.5"
+                cy="57.5"
+                r="55.9425"
+                fill="#E7E7E7"
+                stroke="white"
+                strokeWidth="3.1151"
+              />
+              <mask
+                id="mask0_1206_120508"
+                maskUnits="userSpaceOnUse"
+                x="3"
+                y="3"
+                width="109"
+                height="109"
+              >
+                <circle cx="57.4993" cy="57.5003" r="54.1253" fill="white" />
+              </mask>
+              <g mask="url(#mask0_1206_120508)">
+                <path
+                  d="M47.5039 116.445H58.5351L74.3593 39.8282L63.7828 37.6406L47.5039 116.445Z"
+                  fill="#8D8D8D"
+                />
+                <path
+                  d="M72.165 39.4563L74.3716 39.8457L72.4246 38.418L72.165 39.4563Z"
+                  fill="#8D8D8D"
+                />
+                <path
+                  d="M45.6797 114.947H56.7108L72.4257 38.4193L61.9585 36.1426L45.6797 114.947Z"
+                  fill="url(#paint0_linear_1206_120508)"
+                />
+                <path
+                  d="M93.1887 90.6726L26.5474 76.906L24.6602 75.2136L31.7058 51.9418L34.7984 52.1448L30.0296 49.6041L32.757 36.0039L99.3983 49.7705L101.29 51.467L98.5257 64.844L93.2456 64.9414L96.1515 65.4974L98.0387 67.1898L93.1887 90.6726Z"
+                  fill="#8D8D8D"
+                />
+                <path
+                  d="M91.3015 88.9801L24.6602 75.2136L29.8186 50.2454L32.9112 50.4483L30.3299 47.7656L32.757 36.0039L99.3983 49.7705L96.6345 63.1475L91.3583 63.2449L96.1515 65.4974L91.3015 88.9801Z"
+                  fill="url(#paint1_linear_1206_120508)"
+                />
+                <path
+                  d="M92.7826 63.1065C92.7826 63.1065 92.7298 63.1065 92.6243 63.1065L92.1576 63.0741L90.3637 62.9279L89.069 62.8143L87.5308 62.6317C86.431 62.4937 85.1688 62.3638 83.7929 62.1365L81.6216 61.8078C80.8667 61.6901 80.0875 61.5359 79.2798 61.4019C77.6564 61.126 75.9396 60.7647 74.1295 60.3995C70.5133 59.6324 66.5563 58.703 62.4247 57.6518C54.1655 55.5252 46.7221 53.4797 41.2918 52.1525C39.9403 51.8075 38.7065 51.5275 37.6391 51.2677C36.5717 51.008 35.6098 50.797 34.8631 50.6306L33.1098 50.2247L32.6552 50.1151C32.6025 50.1035 32.5508 50.0872 32.501 50.0664C32.5545 50.0686 32.6076 50.0768 32.6593 50.0908L33.1219 50.176L34.8834 50.5291C35.6504 50.6833 36.5879 50.8822 37.6675 51.1297C38.7471 51.3773 39.9849 51.6452 41.3446 51.978C46.783 53.2605 54.2386 55.2816 62.4937 57.4043C66.6253 58.4554 70.5742 59.3929 74.1823 60.1722C75.9924 60.5415 77.7051 60.9109 79.3245 61.195C80.1362 61.3411 80.9114 61.4953 81.6622 61.6008L83.8254 61.9458C85.2012 62.1852 86.4553 62.3273 87.5552 62.4775L89.0893 62.6804L90.3799 62.8143L92.1698 63.0091L92.6324 63.0659L92.7826 63.1065Z"
+                  fill="#A9A9A9"
+                />
+                <path
+                  d="M96.131 60.9773C96.0789 60.948 96.0288 60.9155 95.9808 60.8799L95.5749 60.5755C95.2056 60.3117 94.678 59.8937 93.9515 59.3985C91.9511 57.9951 89.8665 56.7156 87.7095 55.5673C84.5067 53.8752 81.1551 52.4813 77.697 51.4032C73.4578 50.101 69.0819 49.2947 64.6569 49.0005C59.9449 48.6555 55.4481 49.1142 51.353 49.2075C49.411 49.2762 47.4666 49.2369 45.529 49.0898C41.3921 48.7494 37.3342 47.762 33.5035 46.1636C32.6918 45.8267 32.0952 45.5426 31.6853 45.3519L31.2226 45.1165L31.0684 45.0312C31.1262 45.0462 31.1821 45.068 31.2348 45.0962L31.7096 45.3072C32.1155 45.4939 32.7364 45.7658 33.54 46.0865C35.8079 46.9951 38.1522 47.7 40.5451 48.1929C42.1954 48.5323 43.8654 48.7681 45.5452 48.899C47.4754 49.0336 49.4114 49.0647 51.3449 48.9924C55.4278 48.8869 59.9368 48.4201 64.6731 48.7651C69.1177 49.0615 73.5121 49.8788 77.766 51.2002C81.2331 52.295 84.5906 53.7108 87.7947 55.4293C89.949 56.5876 92.0247 57.8864 94.0083 59.3173C94.7105 59.8206 95.2259 60.2549 95.5912 60.5349L95.997 60.8596C96.0446 60.8953 96.0894 60.9347 96.131 60.9773Z"
+                  fill="#A9A9A9"
+                />
+                <path
+                  d="M63.9192 43.0816C63.8188 43.1282 63.7141 43.1649 63.6067 43.1912L62.6935 43.4631C62.2876 43.5849 61.8128 43.7188 61.2405 43.8487C60.6683 43.9786 60.023 44.1572 59.2924 44.287C58.5619 44.4169 57.7745 44.5914 56.91 44.6929C56.0456 44.7943 55.1283 44.9364 54.1583 45.046C52.0463 45.2502 49.9242 45.3328 47.8027 45.2936C45.6814 45.2371 43.565 45.0623 41.4632 44.77C40.4973 44.6158 39.58 44.4818 38.7278 44.2951C37.8755 44.1084 37.1043 43.9461 36.3697 43.7675C35.6351 43.589 35.0101 43.4063 34.446 43.244C33.8818 43.0816 33.3989 42.9315 33.0092 42.7975L32.1082 42.485C32.0022 42.4531 31.8991 42.4123 31.7998 42.3633C31.9103 42.3761 32.0191 42.4006 32.1245 42.4363L33.0377 42.7042C33.4435 42.826 33.9143 42.968 34.4825 43.1101C35.0507 43.2521 35.7001 43.4469 36.4103 43.5971C37.1206 43.7472 37.916 43.942 38.7683 44.0922C39.6206 44.2424 40.5338 44.3966 41.4957 44.5427C43.5877 44.8202 45.693 44.9868 47.8027 45.0419C49.9143 45.0792 52.0264 45.0034 54.1299 44.8146C55.0959 44.7091 56.0172 44.6239 56.8735 44.4859C57.7299 44.3479 58.5253 44.2302 59.2518 44.08C59.9783 43.9299 60.6277 43.8 61.1999 43.6742C61.7722 43.5484 62.2633 43.4347 62.661 43.3292L63.5823 43.106C63.6933 43.0854 63.8063 43.0772 63.9192 43.0816Z"
+                  fill="#A9A9A9"
+                />
+                <path
+                  d="M46.1782 66.8891C46.1782 66.8891 46.2837 66.9459 46.4786 67.0677L47.3552 67.6075C48.1263 68.0742 49.2546 68.7885 50.7644 69.5962C52.5839 70.6072 54.5341 71.3624 56.56 71.8405C57.1712 71.9765 57.7909 72.0714 58.4148 72.1246C59.0677 72.2062 59.729 72.063 60.2898 71.7188C60.5621 71.5185 60.7593 71.2327 60.8499 70.9071C60.9379 70.5705 60.9379 70.2169 60.8499 69.8803C60.6372 69.1763 60.1849 68.5689 59.5714 68.1635C54.4901 64.7949 47.3349 62.3395 39.2381 62.1122C38.2275 62.0797 37.2088 62.0797 36.1698 62.1122C35.1309 62.1447 34.0756 62.3882 33.3816 63.1593C33.2064 63.3413 33.0692 63.5562 32.9779 63.7918C32.8866 64.0273 32.8431 64.2786 32.85 64.5311C32.8877 65.0471 33.0864 65.5383 33.4181 65.9354C34.0716 66.7714 35.0132 67.3518 35.9182 67.9646C39.5709 70.4403 43.4387 72.8998 47.7895 74.6044C52.1402 76.309 56.7629 77.0761 61.1177 76.4308C62.1922 76.2718 63.2512 76.0219 64.2834 75.684C65.3 75.3967 66.2119 74.8217 66.9093 74.0281C67.5327 73.2028 67.8626 72.1929 67.8468 71.1587C67.8497 70.1451 67.625 69.1437 67.1893 68.2284C66.3039 66.4723 64.9462 64.9979 63.2688 63.971C61.6898 62.9951 59.9765 62.2554 58.1834 61.7753C56.4585 61.2761 54.7499 60.919 53.1183 60.5578L48.4226 59.4944L34.2542 56.276L30.4067 55.3872L29.4083 55.1518C29.181 55.099 29.0674 55.0625 29.0674 55.0625L29.4124 55.1274L30.4189 55.3385L34.2786 56.1827L48.4632 59.324L53.1589 60.3711C54.7824 60.7404 56.5032 61.0895 58.2402 61.5887C60.0548 62.0698 61.7887 62.8151 63.3865 63.8006C65.1016 64.8462 66.4904 66.3503 67.3963 68.1432C67.8467 69.0889 68.0797 70.1234 68.0782 71.1709C68.0964 72.2575 67.7507 73.319 67.096 74.1864C66.3707 75.017 65.4208 75.6203 64.3605 75.9235C63.3146 76.2697 62.2404 76.5237 61.1502 76.6824C56.7426 77.3399 52.0631 76.5566 47.7002 74.8479C43.3372 73.1393 39.441 70.6798 35.7965 68.1919C34.8955 67.5669 33.9376 66.9743 33.2477 66.0936C32.8831 65.658 32.6657 65.1181 32.6267 64.5514C32.6196 64.2683 32.6687 63.9866 32.7711 63.7226C32.8735 63.4586 33.0272 63.2174 33.2233 63.0132C33.599 62.6113 34.0734 62.3147 34.5992 62.1528C35.1094 61.9991 35.6373 61.9118 36.1698 61.893C37.2169 61.8525 38.2438 61.8565 39.2584 61.893C47.4039 62.1406 54.5794 64.6163 59.6932 68.0336C60.3373 68.4648 60.8099 69.1082 61.0285 69.8519C61.1239 70.2164 61.1239 70.5994 61.0285 70.9639C60.9286 71.3211 60.7095 71.6333 60.4075 71.8487C59.8169 72.2131 59.12 72.3662 58.431 72.2829C57.7989 72.2243 57.1713 72.1253 56.5519 71.9867C54.5186 71.4945 52.5639 70.7213 50.7441 69.6895C49.2343 68.8778 48.1142 68.1391 47.3512 67.6602C46.9778 67.4208 46.6896 67.2544 46.4907 67.1002C46.2919 66.9459 46.1782 66.8891 46.1782 66.8891Z"
+                  fill="#A9A9A9"
+                />
+                <path
+                  d="M94.8364 71.2204C94.6993 71.2055 94.5635 71.1797 94.4305 71.1433C94.1789 71.0743 93.8014 71.0012 93.3185 70.916C91.9393 70.7187 90.5384 70.7297 89.1625 70.9484C87.1263 71.2911 85.1507 71.9282 83.2979 72.8397C81.0433 73.9901 78.866 75.2861 76.7799 76.7197C74.6823 78.1612 72.4837 79.4497 70.201 80.5753C68.3181 81.4721 66.3087 82.0743 64.243 82.3611C62.8484 82.5487 61.4325 82.5089 60.0505 82.2434C59.6768 82.1692 59.3081 82.0716 58.9466 81.9512C58.8182 81.9168 58.6932 81.8706 58.5732 81.8132C58.7037 81.8336 58.8326 81.8634 58.9588 81.9025C59.2104 81.9755 59.5838 82.0567 60.0668 82.15C61.4407 82.3746 62.841 82.3869 64.2187 82.1866C66.2604 81.8789 68.2442 81.266 70.1036 80.3683C72.3696 79.236 74.5543 77.9477 76.6419 76.5127C78.7383 75.0733 80.9295 73.777 83.2005 72.6327C85.0755 71.7192 87.077 71.0926 89.1382 70.7739C90.5308 70.568 91.9473 70.5845 93.3347 70.8226C93.72 70.8867 94.1009 70.9748 94.4752 71.0864C94.5995 71.1198 94.7204 71.1646 94.8364 71.2204Z"
+                  fill="#A9A9A9"
+                />
+                <path
+                  d="M93.6026 77.826C93.6026 77.8504 93.286 77.7205 92.7016 77.5906C91.8761 77.4114 91.0248 77.3839 90.1894 77.5095C88.9516 77.719 87.7468 78.0901 86.6057 78.6134C85.195 79.2299 83.8293 79.9446 82.5187 80.7523C81.1063 81.5883 79.7589 82.4041 78.4602 83.025C77.321 83.5882 76.1214 84.0199 74.8846 84.3116C74.0488 84.5016 73.1926 84.5861 72.3358 84.5632C72.1034 84.5575 71.8716 84.5372 71.6418 84.5024C71.5603 84.4985 71.4797 84.4835 71.4023 84.4577C71.4023 84.4293 71.7392 84.4577 72.3358 84.4577C73.1828 84.4453 74.0257 84.3364 74.8481 84.133C76.0637 83.8193 77.242 83.3757 78.3628 82.8099C79.6371 82.1849 80.9724 81.3692 82.3888 80.529C83.7103 79.7132 85.0914 78.9983 86.5204 78.3902C87.683 77.8677 88.9122 77.5085 90.1731 77.3228C91.0279 77.2097 91.8965 77.2648 92.73 77.4851C92.9548 77.5461 93.1757 77.6207 93.3916 77.7083C93.4671 77.7375 93.5381 77.7771 93.6026 77.826Z"
+                  fill="#A9A9A9"
+                />
+                <path
+                  d="M72.1531 44.1988C72.1531 44.2678 69.584 43.7645 66.4468 43.0746C63.3095 42.3846 60.7648 41.7718 60.7932 41.7069C60.8216 41.6419 63.3623 42.1411 66.4995 42.8311C69.6368 43.521 72.1531 44.1339 72.1531 44.1988Z"
+                  fill="#A9A9A9"
+                />
+                <path
+                  d="M87.7278 22.8493C87.9286 21.4011 85.8726 20.21 84.3238 20.0848C83.5886 20.0273 82.8227 20.139 82.1249 19.8987C80.6135 19.3743 80.011 17.432 78.5371 16.8128C77.4342 16.3526 76.1544 16.762 75.0957 17.2967C74.0371 17.8313 72.9717 18.5046 71.7769 18.5825C70.7557 18.6468 69.6086 18.2644 68.7133 18.7686C68.0326 19.1442 67.6922 19.9224 67.059 20.3792C66.4259 20.836 65.6498 20.9206 64.8941 20.9612C64.1384 21.0018 63.3521 21.012 62.6611 21.3233C61.9701 21.6346 61.4017 22.3655 61.5446 23.1031L87.7278 22.8493Z"
+                  fill="#ABABAB"
+                />
+                <path
+                  d="M39.1881 32.5312C39.3293 31.4869 37.8655 30.6287 36.7662 30.5385C36.2413 30.4963 35.6955 30.5769 35.1993 30.4022C34.121 30.0182 33.6916 28.6264 32.64 28.1791C31.8556 27.847 30.951 28.1426 30.1895 28.5285C29.428 28.9144 28.6741 29.4001 27.8229 29.4538C27.0824 29.5018 26.2789 29.2254 25.632 29.5901C25.1491 29.8608 24.8972 30.4195 24.4525 30.742C24.0078 31.0645 23.4486 31.126 22.9085 31.1624C22.3684 31.1989 21.8092 31.1989 21.3187 31.4254C20.8282 31.652 20.4198 32.1741 20.5229 32.7078L39.1881 32.5312Z"
+                  fill="#ABABAB"
+                />
+                <path
+                  d="M76.46 61.6777L78.8178 62.1824L80.1702 66.9562L80.2674 66.977L83.4556 63.1752L85.8134 63.6799L80.8041 69.3391L80.0506 72.8588L77.9602 72.4114L78.7137 68.8917L76.46 61.6777Z"
+                  fill="white"
+                />
+                <path
+                  d="M67.148 61.4992L67.5195 59.7637L75.6965 61.514L75.325 63.2496L72.2769 62.5971L70.5171 70.8178L68.4364 70.3724L70.1962 62.1517L67.148 61.4992Z"
+                  fill="white"
+                />
+                <path
+                  d="M56.9049 67.9016L59.0361 57.9453L62.9642 58.7862C63.7193 58.9478 64.3318 59.2297 64.8016 59.632C65.272 60.0309 65.5922 60.5147 65.762 61.0832C65.9357 61.6491 65.9518 62.2627 65.8103 62.9238C65.6688 63.585 65.4013 64.1379 65.0078 64.5824C64.6144 65.0269 64.1169 65.3323 63.5153 65.4984C62.9169 65.6652 62.2353 65.6667 61.4705 65.503L58.9668 64.967L59.3279 63.2801L61.4913 63.7432C61.8964 63.8299 62.2451 63.8317 62.5375 63.7485C62.8338 63.6628 63.0734 63.5091 63.2565 63.2872C63.4435 63.0629 63.572 62.7871 63.6421 62.4597C63.7128 62.1291 63.7082 61.8265 63.628 61.5517C63.5518 61.2744 63.3954 61.0392 63.1587 60.8462C62.9228 60.65 62.6007 60.5082 62.1923 60.4207L60.7728 60.1169L59.0099 68.3522L56.9049 67.9016Z"
+                  fill="white"
+                />
+                <path
+                  d="M46.5049 55.2637L49.1009 55.8194L50.4108 63.0957L50.5275 63.1206L54.7013 57.0182L57.2973 57.5739L55.1661 67.5302L53.1243 67.0931L54.5114 60.6128L54.4288 60.5951L50.4754 66.4753L49.0851 66.1776L47.8905 59.1701L47.8078 59.1524L46.4154 65.657L44.3736 65.2199L46.5049 55.2637Z"
+                  fill="white"
+                />
+                <path
+                  d="M35.9977 63.425L38.1289 53.4688L44.8377 54.9048L44.4662 56.6404L39.8624 55.6549L39.3546 58.0273L43.6132 58.9389L43.2417 60.6744L38.9831 59.7628L38.4742 62.1401L43.0974 63.1297L42.7259 64.8653L35.9977 63.425Z"
+                  fill="white"
+                />
+              </g>
+              <defs>
+                <linearGradient
+                  id="paint0_linear_1206_120508"
+                  x1="59.1074"
+                  y1="36.1426"
+                  x2="59.1074"
+                  y2="114.947"
+                  gradientUnits="userSpaceOnUse"
+                >
+                  <stop stopColor="#CFCFCF" />
+                  <stop offset="1" stopColor="#C6C6C6" />
+                </linearGradient>
+                <linearGradient
+                  id="paint1_linear_1206_120508"
+                  x1="62.0292"
+                  y1="36.0039"
+                  x2="62.0292"
+                  y2="88.9801"
+                  gradientUnits="userSpaceOnUse"
+                >
+                  <stop stopColor="#DDDDDD" />
+                  <stop offset="1" stopColor="#B6B6B6" />
+                </linearGradient>
+              </defs>
+            </svg>
+            <p className="text-[hsla(var(--text-secondary)] mt-4">Empty logs</p>
+          </div>
+        )}
+      </div>
+    </div>
+  )
+}
+
+export default memo(CortexLogs)
diff --git a/web/containers/ModalTroubleShoot/index.tsx b/web/containers/ModalTroubleShoot/index.tsx
index 67ccbe22f..77ee51034 100644
--- a/web/containers/ModalTroubleShoot/index.tsx
+++ b/web/containers/ModalTroubleShoot/index.tsx
@@ -8,10 +8,11 @@ import { twMerge } from 'tailwind-merge'
 import ServerLogs from '@/containers/ServerLogs'
 
 import AppLogs from './AppLogs'
+import CortexLogs from './CortexLogs'
 import DeviceSpecs from './DeviceSpecs'
 
 export const modalTroubleShootingAtom = atom(false)
-const logOption = ['App Logs', 'Server Logs', 'Device Specs']
+const logOption = ['App Logs', 'Cortex Logs', 'Server Logs', 'Device Specs']
 
 const ModalTroubleShooting = () => {
   const [modalTroubleShooting, setModalTroubleShooting] = useAtom(
@@ -144,10 +145,15 @@ const ModalTroubleShooting = () => {
                 <div
                   className={twMerge('hidden', isTabActive === 1 && 'block')}
                 >
-                  <ServerLogs limit={50} withCopy />
+                  <CortexLogs />
                 </div>
                 <div
                   className={twMerge('hidden', isTabActive === 2 && 'block')}
+                >
+                  <ServerLogs limit={50} withCopy />
+                </div>
+                <div
+                  className={twMerge('hidden', isTabActive === 3 && 'block')}
                 >
                   <DeviceSpecs />
                 </div>
diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx
index 59f19586a..66a20a854 100644
--- a/web/containers/ModelDropdown/index.tsx
+++ b/web/containers/ModelDropdown/index.tsx
@@ -88,7 +88,7 @@ const ModelDropdown = ({
   const searchInputRef = useRef<HTMLInputElement>(null)
   const configuredModels = useAtomValue(configuredModelsAtom)
   const featuredModel = configuredModels.filter((x) =>
-    x.metadata.tags.includes('Featured')
+    x.metadata?.tags?.includes('Featured')
   )
   const { updateThreadMetadata } = useCreateNewThread()
 
@@ -108,6 +108,11 @@ const ModelDropdown = ({
   const filteredDownloadedModels = useMemo(
     () =>
       configuredModels
+        .concat(
+          downloadedModels.filter(
+            (e) => !configuredModels.some((x) => x.id === e.id)
+          )
+        )
         .filter((e) =>
           e.name.toLowerCase().includes(searchText.toLowerCase().trim())
         )
@@ -200,7 +205,6 @@ const ModelDropdown = ({
         if (model)
           updateModelParameter(activeThread, {
             params: modelParams,
-            modelPath: model.file_path,
             modelId: model.id,
             engine: model.engine,
           })
@@ -262,8 +266,13 @@ const ModelDropdown = ({
   }, [])
 
   const findByEngine = filteredDownloadedModels
-    .filter((x) => !inActiveEngineProvider.includes(x.engine))
-    .map((x) => x.engine)
+    .map((x) => {
+      // Legacy engine support - they will be grouped under Cortex LlamaCPP
+      if (x.engine === InferenceEngine.nitro)
+        return InferenceEngine.cortex_llamacpp
+      return x.engine
+    })
+    .filter((x) => !inActiveEngineProvider.includes(x))
 
   const groupByEngine = findByEngine
     .filter(function (item, index) {
@@ -444,7 +453,7 @@ const ModelDropdown = ({
                         <ul className="pb-2">
                           {featuredModel.map((model) => {
                             const isDownloading = downloadingModels.some(
-                              (md) => md.id === model.id
+                              (md) => md === model.id
                             )
                             return (
                               <li
@@ -465,13 +474,18 @@ const ModelDropdown = ({
                                 </div>
                                 <div className="flex items-center gap-2 text-[hsla(var(--text-tertiary))]">
                                   <span className="font-medium">
-                                    {toGibibytes(model.metadata.size)}
+                                    {toGibibytes(model.metadata?.size)}
                                   </span>
                                   {!isDownloading ? (
                                     <DownloadCloudIcon
                                       size={18}
                                       className="cursor-pointer text-[hsla(var(--app-link))]"
-                                      onClick={() => downloadModel(model)}
+                                      onClick={() =>
+                                        downloadModel(
+                                          model.sources[0].url,
+                                          model.id
+                                        )
+                                      }
                                     />
                                   ) : (
                                     Object.values(downloadStates)
@@ -500,7 +514,12 @@ const ModelDropdown = ({
 
                     <ul className="pb-2">
                       {filteredDownloadedModels
-                        .filter((x) => x.engine === engine)
+                        .filter(
+                          (x) =>
+                            x.engine === engine ||
+                            (x.engine === InferenceEngine.nitro &&
+                              engine === InferenceEngine.cortex_llamacpp)
+                        )
                         .filter((y) => {
                           if (isLocalEngine(y.engine) && !searchText.length) {
                             return downloadedModels.find((c) => c.id === y.id)
@@ -511,7 +530,7 @@ const ModelDropdown = ({
                         .map((model) => {
                           if (!showModel) return null
                           const isDownloading = downloadingModels.some(
-                            (md) => md.id === model.id
+                            (md) => md === model.id
                           )
                           const isDownloaded = downloadedModels.some(
                             (c) => c.id === model.id
@@ -549,14 +568,19 @@ const ModelDropdown = ({
                               <div className="flex items-center gap-2 text-[hsla(var(--text-tertiary))]">
                                 {!isDownloaded && (
                                   <span className="font-medium">
-                                    {toGibibytes(model.metadata.size)}
+                                    {toGibibytes(model.metadata?.size)}
                                   </span>
                                 )}
                                 {!isDownloading && !isDownloaded ? (
                                   <DownloadCloudIcon
                                     size={18}
                                     className="cursor-pointer text-[hsla(var(--app-link))]"
-                                    onClick={() => downloadModel(model)}
+                                    onClick={() =>
+                                      downloadModel(
+                                        model.sources[0].url,
+                                        model.id
+                                      )
+                                    }
                                   />
                                 ) : (
                                   Object.values(downloadStates)
diff --git a/web/containers/ModelLabel/index.tsx b/web/containers/ModelLabel/index.tsx
index b0a3da96f..a6237ada6 100644
--- a/web/containers/ModelLabel/index.tsx
+++ b/web/containers/ModelLabel/index.tsx
@@ -42,7 +42,7 @@ const ModelLabel = ({ metadata, compact }: Props) => {
     const availableRam =
       settings?.run_mode === 'gpu'
         ? availableVram * 1000000 // MB to bytes
-        : totalRam - usedRam + (activeModel?.metadata.size ?? 0)
+        : totalRam - usedRam + (activeModel?.metadata?.size ?? 0)
     if (minimumRamModel > totalRam) {
       return (
         <NotEnoughMemoryLabel
@@ -59,10 +59,10 @@ const ModelLabel = ({ metadata, compact }: Props) => {
     return null
   }
 
-  return metadata.tags.includes('Coming Soon') ? (
+  return metadata?.tags?.includes('Coming Soon') ? (
     <UnsupportedModel />
   ) : (
-    getLabel(metadata.size ?? 0)
+    getLabel(metadata?.size ?? 0)
   )
 }
 
diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx
index 5cc92219c..6cad910f7 100644
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/EventHandler.tsx
@@ -15,6 +15,8 @@ import {
   Thread,
   EngineManager,
   InferenceEngine,
+  extractInferenceParams,
+  ModelExtension,
 } from '@janhq/core'
 import { useAtomValue, useSetAtom } from 'jotai'
 import { ulid } from 'ulidx'
@@ -22,7 +24,6 @@ import { ulid } from 'ulidx'
 import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
 
 import { isLocalEngine } from '@/utils/modelEngine'
-import { extractInferenceParams } from '@/utils/modelParam'
 
 import { extensionManager } from '@/extension'
 import {
@@ -179,6 +180,17 @@ export default function EventHandler({ children }: { children: ReactNode }) {
           setIsGeneratingResponse(false)
         }
         return
+      } else if (message.status === MessageStatus.Error) {
+        ;(async () => {
+          if (
+            !(await extensionManager
+              .get<ModelExtension>(ExtensionTypeEnum.Model)
+              ?.isModelLoaded(activeModelRef.current?.id as string))
+          ) {
+            setActiveModel(undefined)
+            setStateModel({ state: 'start', loading: false, model: undefined })
+          }
+        })()
       }
       // Mark the thread as not waiting for response
       updateThreadWaiting(message.thread_id, false)
diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx
index b35ab2e43..5cb0debab 100644
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@@ -2,7 +2,17 @@ import { PropsWithChildren, useCallback, useEffect } from 'react'
 
 import React from 'react'
 
-import { DownloadEvent, events, DownloadState, ModelEvent } from '@janhq/core'
+import {
+  DownloadEvent,
+  events,
+  DownloadState,
+  ModelEvent,
+  ExtensionTypeEnum,
+  ModelExtension,
+  ModelManager,
+  Model,
+} from '@janhq/core'
+
 import { useSetAtom } from 'jotai'
 
 import { setDownloadStateAtom } from '@/hooks/useDownloadState'
@@ -18,16 +28,23 @@ import EventHandler from './EventHandler'
 import ModelImportListener from './ModelImportListener'
 import QuickAskListener from './QuickAskListener'
 
+import { extensionManager } from '@/extension'
 import {
   InstallingExtensionState,
   removeInstallingExtensionAtom,
   setInstallingExtensionAtom,
 } from '@/helpers/atoms/Extension.atom'
+import {
+  addDownloadingModelAtom,
+  removeDownloadingModelAtom,
+} from '@/helpers/atoms/Model.atom'
 
 const EventListenerWrapper = ({ children }: PropsWithChildren) => {
   const setDownloadState = useSetAtom(setDownloadStateAtom)
   const setInstallingExtension = useSetAtom(setInstallingExtensionAtom)
   const removeInstallingExtension = useSetAtom(removeInstallingExtensionAtom)
+  const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
+  const removeDownloadingModel = useSetAtom(removeDownloadingModelAtom)
 
   const onFileDownloadUpdate = useCallback(
     async (state: DownloadState) => {
@@ -40,10 +57,11 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
         }
         setInstallingExtension(state.extensionId!, installingExtensionState)
       } else {
+        addDownloadingModel(state.modelId)
         setDownloadState(state)
       }
     },
-    [setDownloadState, setInstallingExtension]
+    [addDownloadingModel, setDownloadState, setInstallingExtension]
   )
 
   const onFileDownloadError = useCallback(
@@ -52,21 +70,52 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
       if (state.downloadType === 'extension') {
         removeInstallingExtension(state.extensionId!)
       } else {
+        state.downloadState = 'error'
         setDownloadState(state)
+        removeDownloadingModel(state.modelId)
       }
     },
-    [setDownloadState, removeInstallingExtension]
+    [removeInstallingExtension, setDownloadState, removeDownloadingModel]
+  )
+
+  const onFileDownloadStopped = useCallback(
+    (state: DownloadState) => {
+      console.debug('onFileDownloadError', state)
+      if (state.downloadType === 'extension') {
+        removeInstallingExtension(state.extensionId!)
+      } else {
+        state.downloadState = 'error'
+        state.error = 'aborted'
+        setDownloadState(state)
+        removeDownloadingModel(state.modelId)
+      }
+    },
+    [removeInstallingExtension, setDownloadState, removeDownloadingModel]
   )
 
   const onFileDownloadSuccess = useCallback(
-    (state: DownloadState) => {
+    async (state: DownloadState) => {
       console.debug('onFileDownloadSuccess', state)
       if (state.downloadType !== 'extension') {
+        // Update model metadata accordingly
+        const model = ModelManager.instance().models.get(state.modelId)
+        if (model) {
+          await extensionManager
+            .get<ModelExtension>(ExtensionTypeEnum.Model)
+            ?.updateModel({
+              id: model.id,
+              ...model.settings,
+              ...model.parameters,
+            } as Partial<Model>)
+            .catch((e) => console.debug(e))
+        }
+        state.downloadState = 'end'
         setDownloadState(state)
+        removeDownloadingModel(state.modelId)
       }
       events.emit(ModelEvent.OnModelsUpdate, {})
     },
-    [setDownloadState]
+    [removeDownloadingModel, setDownloadState]
   )
 
   const onFileUnzipSuccess = useCallback(
@@ -87,6 +136,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
     events.on(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
     events.on(DownloadEvent.onFileDownloadError, onFileDownloadError)
     events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+    events.on(DownloadEvent.onFileDownloadStopped, onFileDownloadStopped)
     events.on(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
 
     return () => {
@@ -94,6 +144,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
       events.off(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
       events.off(DownloadEvent.onFileDownloadError, onFileDownloadError)
       events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+      events.off(DownloadEvent.onFileDownloadStopped, onFileDownloadStopped)
       events.off(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
     }
   }, [
@@ -101,6 +152,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
     onFileDownloadError,
     onFileDownloadSuccess,
     onFileUnzipSuccess,
+    onFileDownloadStopped,
   ])
 
   return (
diff --git a/web/extension/ExtensionManager.ts b/web/extension/ExtensionManager.ts
index aa1a7674b..811126f85 100644
--- a/web/extension/ExtensionManager.ts
+++ b/web/extension/ExtensionManager.ts
@@ -8,6 +8,7 @@ import Extension from './Extension'
  * Manages the registration and retrieval of extensions.
  */
 export class ExtensionManager {
+  date = new Date().toISOString()
   // Registered extensions
   private extensions = new Map<string, BaseExtension>()
 
diff --git a/web/helpers/atoms/Model.atom.test.ts b/web/helpers/atoms/Model.atom.test.ts
index 57827efec..923f24df4 100644
--- a/web/helpers/atoms/Model.atom.test.ts
+++ b/web/helpers/atoms/Model.atom.test.ts
@@ -32,13 +32,22 @@ describe('Model.atom.ts', () => {
   })
 
   describe('showEngineListModelAtom', () => {
-    it('should initialize as an empty array', () => {
-      expect(ModelAtoms.showEngineListModelAtom.init).toEqual(['nitro'])
+    it('should initialize with local engines', () => {
+      expect(ModelAtoms.showEngineListModelAtom.init).toEqual([
+        'nitro',
+        'cortex',
+        'llama-cpp',
+        'onnxruntime',
+        'tensorrt-llm',
+      ])
     })
   })
 
   describe('addDownloadingModelAtom', () => {
     it('should add downloading model', async () => {
+      const { result: reset } = renderHook(() =>
+        useSetAtom(ModelAtoms.downloadingModelsAtom)
+      )
       const { result: setAtom } = renderHook(() =>
         useSetAtom(ModelAtoms.addDownloadingModelAtom)
       )
@@ -49,11 +58,16 @@ describe('Model.atom.ts', () => {
         setAtom.current({ id: '1' } as any)
       })
       expect(getAtom.current).toEqual([{ id: '1' }])
+      reset.current([])
     })
   })
 
   describe('removeDownloadingModelAtom', () => {
     it('should remove downloading model', async () => {
+      const { result: reset } = renderHook(() =>
+        useSetAtom(ModelAtoms.downloadingModelsAtom)
+      )
+
       const { result: setAtom } = renderHook(() =>
         useSetAtom(ModelAtoms.addDownloadingModelAtom)
       )
@@ -63,16 +77,21 @@ describe('Model.atom.ts', () => {
       const { result: getAtom } = renderHook(() =>
         useAtomValue(ModelAtoms.getDownloadingModelAtom)
       )
+      expect(getAtom.current).toEqual([])
       act(() => {
-        setAtom.current({ id: '1' } as any)
+        setAtom.current('1')
         removeAtom.current('1')
       })
       expect(getAtom.current).toEqual([])
+      reset.current([])
     })
   })
 
   describe('removeDownloadedModelAtom', () => {
     it('should remove downloaded model', async () => {
+      const { result: reset } = renderHook(() =>
+        useSetAtom(ModelAtoms.downloadingModelsAtom)
+      )
       const { result: setAtom } = renderHook(() =>
         useSetAtom(ModelAtoms.downloadedModelsAtom)
       )
@@ -94,6 +113,7 @@ describe('Model.atom.ts', () => {
         removeAtom.current('1')
       })
       expect(getAtom.current).toEqual([])
+      reset.current([])
     })
   })
 
@@ -284,10 +304,4 @@ describe('Model.atom.ts', () => {
       expect(importAtom.current[0]).toEqual([])
     })
   })
-
-  describe('defaultModelAtom', () => {
-    it('should initialize as undefined', () => {
-      expect(ModelAtoms.defaultModelAtom.init).toBeUndefined()
-    })
-  })
 })
diff --git a/web/helpers/atoms/Model.atom.ts b/web/helpers/atoms/Model.atom.ts
index 6abc42c9e..dd4414801 100644
--- a/web/helpers/atoms/Model.atom.ts
+++ b/web/helpers/atoms/Model.atom.ts
@@ -1,4 +1,4 @@
-import { ImportingModel, InferenceEngine, Model, ModelFile } from '@janhq/core'
+import { ImportingModel, InferenceEngine, Model } from '@janhq/core'
 import { atom } from 'jotai'
 import { atomWithStorage } from 'jotai/utils'
 
@@ -14,7 +14,7 @@ enum ModelStorageAtomKeys {
  * Downloaded Models Atom
  * This atom stores the list of models that have been downloaded.
  */
-export const downloadedModelsAtom = atomWithStorage<ModelFile[]>(
+export const downloadedModelsAtom = atomWithStorage<Model[]>(
   ModelStorageAtomKeys.DownloadedModels,
   []
 )
@@ -23,7 +23,7 @@ export const downloadedModelsAtom = atomWithStorage<ModelFile[]>(
  * Configured Models Atom
  * This atom stores the list of models that have been configured and available to download
  */
-export const configuredModelsAtom = atomWithStorage<ModelFile[]>(
+export const configuredModelsAtom = atomWithStorage<Model[]>(
   ModelStorageAtomKeys.AvailableModels,
   []
 )
@@ -43,12 +43,18 @@ export const removeDownloadedModelAtom = atom(
 /**
  * Atom to store the selected model (from ModelDropdown)
  */
-export const selectedModelAtom = atom<ModelFile | undefined>(undefined)
+export const selectedModelAtom = atom<Model | undefined>(undefined)
 
 /**
  * Atom to store the expanded engine sections (from ModelDropdown)
  */
-export const showEngineListModelAtom = atom<string[]>([InferenceEngine.nitro])
+export const showEngineListModelAtom = atom<string[]>([
+  InferenceEngine.nitro,
+  InferenceEngine.cortex,
+  InferenceEngine.cortex_llamacpp,
+  InferenceEngine.cortex_onnx,
+  InferenceEngine.cortex_tensorrtllm,
+])
 
 /// End Models Atom
 /// Model Download Atom
@@ -58,13 +64,13 @@ export const stateModel = atom({ state: 'start', loading: false, model: '' })
 /**
  * Stores the list of models which are being downloaded.
  */
-const downloadingModelsAtom = atom<Model[]>([])
+export const downloadingModelsAtom = atom<string[]>([])
 
 export const getDownloadingModelAtom = atom((get) => get(downloadingModelsAtom))
 
-export const addDownloadingModelAtom = atom(null, (get, set, model: Model) => {
+export const addDownloadingModelAtom = atom(null, (get, set, model: string) => {
   const downloadingModels = get(downloadingModelsAtom)
-  if (!downloadingModels.find((e) => e.id === model.id)) {
+  if (!downloadingModels.includes(model)) {
     set(downloadingModelsAtom, [...downloadingModels, model])
   }
 })
@@ -76,7 +82,7 @@ export const removeDownloadingModelAtom = atom(
 
     set(
       downloadingModelsAtom,
-      downloadingModels.filter((e) => e.id !== modelId)
+      downloadingModels.filter((e) => e !== modelId)
     )
   }
 )
@@ -88,10 +94,6 @@ export const removeDownloadingModelAtom = atom(
 // store the paths of the models that are being imported
 export const importingModelsAtom = atom<ImportingModel[]>([])
 
-// DEPRECATED: Remove when moving to cortex.cpp
-// Default model template when importing
-export const defaultModelAtom = atom<Model | undefined>(undefined)
-
 /**
  * Importing progress Atom
  */
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 2d53678c3..7b9ee98e0 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -1,6 +1,6 @@
 import { useCallback, useEffect, useRef } from 'react'
 
-import { EngineManager, Model, ModelFile } from '@janhq/core'
+import { EngineManager, Model } from '@janhq/core'
 import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
 
 import { toaster } from '@/containers/Toast'
@@ -11,7 +11,7 @@ import { vulkanEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
-export const activeModelAtom = atom<ModelFile | undefined>(undefined)
+export const activeModelAtom = atom<Model | undefined>(undefined)
 export const loadModelErrorAtom = atom<string | undefined>(undefined)
 
 type ModelState = {
@@ -37,7 +37,7 @@ export function useActiveModel() {
   const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom)
   const isVulkanEnabled = useAtomValue(vulkanEnabledAtom)
 
-  const downloadedModelsRef = useRef<ModelFile[]>([])
+  const downloadedModelsRef = useRef<Model[]>([])
 
   useEffect(() => {
     downloadedModelsRef.current = downloadedModels
@@ -51,15 +51,14 @@ export function useActiveModel() {
       console.debug(`Model ${modelId} is already initialized. Ignore..`)
       return Promise.resolve()
     }
+
+    if (activeModel) {
+      await stopModel(activeModel)
+    }
     setPendingModelLoad(true)
 
     let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
 
-    const error = await stopModel().catch((error: Error) => error)
-    if (error) {
-      return Promise.reject(error)
-    }
-
     setLoadModelError(undefined)
 
     setActiveModel(undefined)
@@ -118,7 +117,7 @@ export function useActiveModel() {
         setStateModel(() => ({
           state: 'start',
           loading: false,
-          model,
+          undefined,
         }))
 
         if (!pendingModelLoad && abortable) {
@@ -135,28 +134,30 @@ export function useActiveModel() {
       })
   }
 
-  const stopModel = useCallback(async () => {
-    const stoppingModel = activeModel || stateModel.model
-    if (!stoppingModel || (stateModel.state === 'stop' && stateModel.loading))
-      return
+  const stopModel = useCallback(
+    async (model?: Model) => {
+      const stoppingModel = model ?? activeModel ?? stateModel.model
+      if (!stoppingModel || (stateModel.state === 'stop' && stateModel.loading))
+        return
 
-    setStateModel({ state: 'stop', loading: true, model: stoppingModel })
-    const engine = EngineManager.instance().get(stoppingModel.engine)
-    return engine
-      ?.unloadModel(stoppingModel)
-      .catch()
-      .then(() => {
-        setActiveModel(undefined)
-        setStateModel({ state: 'start', loading: false, model: undefined })
-        setPendingModelLoad(false)
-      })
-  }, [
-    activeModel,
-    setActiveModel,
-    setStateModel,
-    setPendingModelLoad,
-    stateModel,
-  ])
+      const engine = EngineManager.instance().get(stoppingModel.engine)
+      return engine
+        ?.unloadModel(stoppingModel)
+        .catch((e) => console.error(e))
+        .then(() => {
+          setActiveModel(undefined)
+          setStateModel({ state: 'start', loading: false, model: undefined })
+          setPendingModelLoad(false)
+        })
+    },
+    [
+      activeModel,
+      setStateModel,
+      setActiveModel,
+      setPendingModelLoad,
+      stateModel,
+    ]
+  )
 
   const stopInference = useCallback(async () => {
     // Loading model
diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts
index e65353753..75aa99c27 100644
--- a/web/hooks/useCreateNewThread.ts
+++ b/web/hooks/useCreateNewThread.ts
@@ -8,7 +8,7 @@ import {
   ThreadAssistantInfo,
   ThreadState,
   AssistantTool,
-  ModelFile,
+  Model,
 } from '@janhq/core'
 import { atom, useAtomValue, useSetAtom } from 'jotai'
 
@@ -76,7 +76,7 @@ export const useCreateNewThread = () => {
 
   const requestCreateNewThread = async (
     assistant: Assistant,
-    model?: ModelFile | undefined
+    model?: Model | undefined
   ) => {
     // Stop generating if any
     setIsGeneratingResponse(false)
diff --git a/web/hooks/useDeleteModel.test.ts b/web/hooks/useDeleteModel.test.ts
index 336a1cd0c..3ee0926f9 100644
--- a/web/hooks/useDeleteModel.test.ts
+++ b/web/hooks/useDeleteModel.test.ts
@@ -16,7 +16,7 @@ describe('useDeleteModel', () => {
   const mockModel: any = {
     id: 'test-model',
     name: 'Test Model',
-    // Add other required properties of ModelFile
+    // Add other required properties of Model
   }
 
   const mockDeleteModel = jest.fn()
@@ -35,7 +35,7 @@ describe('useDeleteModel', () => {
       await result.current.deleteModel(mockModel)
     })
 
-    expect(mockDeleteModel).toHaveBeenCalledWith(mockModel)
+    expect(mockDeleteModel).toHaveBeenCalledWith('test-model')
     expect(toaster).toHaveBeenCalledWith({
       title: 'Model Deletion Successful',
       description: `Model ${mockModel.name} has been successfully deleted.`,
@@ -67,7 +67,7 @@ describe('useDeleteModel', () => {
       )
     })
 
-    expect(mockDeleteModel).toHaveBeenCalledWith(mockModel)
+    expect(mockDeleteModel).toHaveBeenCalledWith("test-model")
     expect(toaster).not.toHaveBeenCalled()
   })
 })
diff --git a/web/hooks/useDeleteModel.ts b/web/hooks/useDeleteModel.ts
index 5a7a319b2..5621a78b8 100644
--- a/web/hooks/useDeleteModel.ts
+++ b/web/hooks/useDeleteModel.ts
@@ -1,6 +1,6 @@
 import { useCallback } from 'react'
 
-import { ExtensionTypeEnum, ModelExtension, ModelFile } from '@janhq/core'
+import { ExtensionTypeEnum, ModelExtension, Model } from '@janhq/core'
 
 import { useSetAtom } from 'jotai'
 
@@ -13,8 +13,8 @@ export default function useDeleteModel() {
   const removeDownloadedModel = useSetAtom(removeDownloadedModelAtom)
 
   const deleteModel = useCallback(
-    async (model: ModelFile) => {
-      await localDeleteModel(model)
+    async (model: Model) => {
+      await localDeleteModel(model.id)
       removeDownloadedModel(model.id)
       toaster({
         title: 'Model Deletion Successful',
@@ -28,7 +28,7 @@ export default function useDeleteModel() {
   return { deleteModel }
 }
 
-const localDeleteModel = async (model: ModelFile) =>
+const localDeleteModel = async (model: string) =>
   extensionManager
     .get<ModelExtension>(ExtensionTypeEnum.Model)
     ?.deleteModel(model)
diff --git a/web/hooks/useDownloadModel.test.ts b/web/hooks/useDownloadModel.test.ts
index fc0b7c21f..7e9d7b518 100644
--- a/web/hooks/useDownloadModel.test.ts
+++ b/web/hooks/useDownloadModel.test.ts
@@ -13,12 +13,6 @@ jest.mock('jotai', () => ({
 }))
 jest.mock('@janhq/core')
 jest.mock('@/extension/ExtensionManager')
-jest.mock('./useGpuSetting', () => ({
-  __esModule: true,
-  default: () => ({
-    getGpuSettings: jest.fn().mockResolvedValue({ some: 'gpuSettings' }),
-  }),
-}))
 
 describe('useDownloadModel', () => {
   beforeEach(() => {
@@ -29,25 +23,25 @@ describe('useDownloadModel', () => {
   it('should download a model', async () => {
     const mockModel: core.Model = {
       id: 'test-model',
-      sources: [{ filename: 'test.bin' }],
+      sources: [{ filename: 'test.bin', url: 'https://fake.url' }],
     } as core.Model
 
     const mockExtension = {
-      downloadModel: jest.fn().mockResolvedValue(undefined),
+      pullModel: jest.fn().mockResolvedValue(undefined),
     }
     ;(useSetAtom as jest.Mock).mockReturnValue(() => undefined)
     ;(extensionManager.get as jest.Mock).mockReturnValue(mockExtension)
 
     const { result } = renderHook(() => useDownloadModel())
 
-    await act(async () => {
-      await result.current.downloadModel(mockModel)
+    act(() => {
+      result.current.downloadModel(mockModel.sources[0].url, mockModel.id)
     })
 
-    expect(mockExtension.downloadModel).toHaveBeenCalledWith(
-      mockModel,
-      { some: 'gpuSettings' },
-      { ignoreSSL: undefined, proxy: '' }
+    expect(mockExtension.pullModel).toHaveBeenCalledWith(
+      mockModel.sources[0].url,
+      mockModel.id,
+      undefined
     )
   })
 
@@ -58,15 +52,18 @@ describe('useDownloadModel', () => {
     } as core.Model
 
     ;(core.joinPath as jest.Mock).mockResolvedValue('/path/to/model/test.bin')
-    ;(core.abortDownload as jest.Mock).mockResolvedValue(undefined)
+    const mockExtension = {
+      cancelModelPull: jest.fn().mockResolvedValue(undefined),
+    }
     ;(useSetAtom as jest.Mock).mockReturnValue(() => undefined)
+    ;(extensionManager.get as jest.Mock).mockReturnValue(mockExtension)
     const { result } = renderHook(() => useDownloadModel())
 
-    await act(async () => {
-      await result.current.abortModelDownload(mockModel)
+    act(() => {
+      result.current.abortModelDownload(mockModel.id)
     })
 
-    expect(core.abortDownload).toHaveBeenCalledWith('/path/to/model/test.bin')
+    expect(mockExtension.cancelModelPull).toHaveBeenCalledWith('test-model')
   })
 
   it('should handle proxy settings', async () => {
@@ -76,7 +73,7 @@ describe('useDownloadModel', () => {
     } as core.Model
 
     const mockExtension = {
-      downloadModel: jest.fn().mockResolvedValue(undefined),
+      pullModel: jest.fn().mockResolvedValue(undefined),
     }
     ;(useSetAtom as jest.Mock).mockReturnValue(() => undefined)
     ;(extensionManager.get as jest.Mock).mockReturnValue(mockExtension)
@@ -85,14 +82,14 @@ describe('useDownloadModel', () => {
 
     const { result } = renderHook(() => useDownloadModel())
 
-    await act(async () => {
-      await result.current.downloadModel(mockModel)
+    act(() => {
+      result.current.downloadModel(mockModel.sources[0].url, mockModel.id)
     })
 
-    expect(mockExtension.downloadModel).toHaveBeenCalledWith(
-      mockModel,
-      expect.objectContaining({ some: 'gpuSettings' }),
-      expect.anything()
+    expect(mockExtension.pullModel).toHaveBeenCalledWith(
+      mockModel.sources[0].url,
+      mockModel.id,
+      undefined
     )
   })
 })
diff --git a/web/hooks/useDownloadModel.ts b/web/hooks/useDownloadModel.ts
index 0cd21ea83..c616f8769 100644
--- a/web/hooks/useDownloadModel.ts
+++ b/web/hooks/useDownloadModel.ts
@@ -1,106 +1,55 @@
 import { useCallback } from 'react'
 
-import {
-  Model,
-  ExtensionTypeEnum,
-  ModelExtension,
-  abortDownload,
-  joinPath,
-  ModelArtifact,
-  DownloadState,
-  GpuSetting,
-  ModelFile,
-  dirName,
-} from '@janhq/core'
+import { ExtensionTypeEnum, ModelExtension } from '@janhq/core'
 
-import { useAtomValue, useSetAtom } from 'jotai'
+import { useSetAtom } from 'jotai'
+
+import { toaster } from '@/containers/Toast'
 
 import { setDownloadStateAtom } from './useDownloadState'
 
-import useGpuSetting from './useGpuSetting'
-
 import { extensionManager } from '@/extension/ExtensionManager'
+
 import {
-  ignoreSslAtom,
-  proxyAtom,
-  proxyEnabledAtom,
-} from '@/helpers/atoms/AppConfig.atom'
-import { addDownloadingModelAtom } from '@/helpers/atoms/Model.atom'
+  addDownloadingModelAtom,
+  removeDownloadingModelAtom,
+} from '@/helpers/atoms/Model.atom'
 
 export default function useDownloadModel() {
-  const ignoreSSL = useAtomValue(ignoreSslAtom)
-  const proxy = useAtomValue(proxyAtom)
-  const proxyEnabled = useAtomValue(proxyEnabledAtom)
-  const setDownloadState = useSetAtom(setDownloadStateAtom)
+  const removeDownloadingModel = useSetAtom(removeDownloadingModelAtom)
   const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
-
-  const { getGpuSettings } = useGpuSetting()
+  const setDownloadStates = useSetAtom(setDownloadStateAtom)
 
   const downloadModel = useCallback(
-    async (model: Model) => {
-      const childProgresses: DownloadState[] = model.sources.map(
-        (source: ModelArtifact) => ({
-          fileName: source.filename,
-          modelId: model.id,
-          time: {
-            elapsed: 0,
-            remaining: 0,
-          },
-          speed: 0,
-          percent: 0,
-          size: {
-            total: 0,
-            transferred: 0,
-          },
-          downloadState: 'downloading',
-        })
-      )
-
-      // set an initial download state
-      setDownloadState({
-        fileName: '',
-        modelId: model.id,
-        time: {
-          elapsed: 0,
-          remaining: 0,
-        },
-        speed: 0,
-        percent: 0,
+    async (model: string, id?: string, name?: string) => {
+      addDownloadingModel(id ?? model)
+      setDownloadStates({
+        modelId: id ?? model,
+        downloadState: 'downloading',
+        fileName: id ?? model,
         size: {
           total: 0,
           transferred: 0,
         },
-        children: childProgresses,
-        downloadState: 'downloading',
+        percent: 0,
       })
+      downloadLocalModel(model, id, name).catch((error) => {
+        if (error.message) {
+          toaster({
+            title: 'Download failed',
+            description: error.message,
+            type: 'error',
+          })
+        }
 
-      addDownloadingModel(model)
-      const gpuSettings = await getGpuSettings()
-      await localDownloadModel(
-        model,
-        ignoreSSL,
-        proxyEnabled ? proxy : '',
-        gpuSettings
-      )
+        removeDownloadingModel(model)
+      })
     },
-    [
-      ignoreSSL,
-      proxy,
-      proxyEnabled,
-      getGpuSettings,
-      addDownloadingModel,
-      setDownloadState,
-    ]
+    [removeDownloadingModel, addDownloadingModel, setDownloadStates]
   )
 
-  const abortModelDownload = useCallback(async (model: Model | ModelFile) => {
-    for (const source of model.sources) {
-      const path =
-        'file_path' in model
-          ? await joinPath([await dirName(model.file_path), source.filename])
-          : await joinPath(['models', model.id, source.filename])
-      await abortDownload(path)
-    }
+  const abortModelDownload = useCallback(async (model: string) => {
+    await cancelModelDownload(model)
   }, [])
 
   return {
@@ -109,12 +58,12 @@ export default function useDownloadModel() {
   }
 }
 
-const localDownloadModel = async (
-  model: Model,
-  ignoreSSL: boolean,
-  proxy: string,
-  gpuSettings?: GpuSetting
-) =>
+const downloadLocalModel = async (model: string, id?: string, name?: string) =>
   extensionManager
     .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.downloadModel(model, gpuSettings, { ignoreSSL, proxy })
+    ?.pullModel(model, id, name)
+
+const cancelModelDownload = async (model: string) =>
+  extensionManager
+    .get<ModelExtension>(ExtensionTypeEnum.Model)
+    ?.cancelModelPull(model)
diff --git a/web/hooks/useDownloadState.ts b/web/hooks/useDownloadState.ts
index 03a8883cb..32a9d3255 100644
--- a/web/hooks/useDownloadState.ts
+++ b/web/hooks/useDownloadState.ts
@@ -10,8 +10,18 @@ import {
 } from '@/helpers/atoms/Model.atom'
 
 // download states
+
 export const modelDownloadStateAtom = atom<Record<string, DownloadState>>({})
 
+/**
+ * Remove a download state for a particular model.
+ */
+export const removeDownloadStateAtom = atom(null, (get, set, id: string) => {
+  const currentState = { ...get(modelDownloadStateAtom) }
+  delete currentState[id]
+  set(modelDownloadStateAtom, currentState)
+  set(removeDownloadingModelAtom, id)
+})
 /**
  * Used to set the download state for a particular model.
  */
@@ -77,7 +87,7 @@ export const setDownloadStateAtom = atom(
         }
       } else {
         // download in progress
-        if (state.size.total === 0) {
+        if (state.size.total === 0 || !currentState[state.modelId]) {
           // this is initial state, just set the state
           currentState[state.modelId] = state
           set(modelDownloadStateAtom, currentState)
@@ -108,6 +118,7 @@ export const setDownloadStateAtom = atom(
         )
 
         modelDownloadState.children = updatedChildren
+
         if (isAnyChildDownloadNotReady) {
           // just update the children
           currentState[state.modelId] = modelDownloadState
@@ -115,23 +126,18 @@ export const setDownloadStateAtom = atom(
           return
         }
 
-        const parentTotalSize = modelDownloadState.size.total
-        if (parentTotalSize === 0) {
-          // calculate the total size of the parent by sum all children total size
-          const totalSize = updatedChildren.reduce(
-            (acc, m) => acc + m.size.total,
-            0
-          )
-
-          modelDownloadState.size.total = totalSize
-        }
-
+        const parentTotalSize = updatedChildren.reduce(
+          (acc, m) => acc + m.size.total,
+          0
+        )
         // calculate the total transferred size by sum all children transferred size
         const transferredSize = updatedChildren.reduce(
           (acc, m) => acc + m.size.transferred,
           0
         )
+        modelDownloadState.size.total = parentTotalSize
         modelDownloadState.size.transferred = transferredSize
+
         modelDownloadState.percent =
           parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
         currentState[state.modelId] = modelDownloadState
diff --git a/web/hooks/useDropModelBinaries.ts b/web/hooks/useDropModelBinaries.ts
index d87e96627..7c87355f7 100644
--- a/web/hooks/useDropModelBinaries.ts
+++ b/web/hooks/useDropModelBinaries.ts
@@ -29,7 +29,7 @@ export default function useDropModelBinaries() {
       const importingModels: ImportingModel[] = supportedFiles.map((file) => ({
         importId: uuidv4(),
         modelId: undefined,
-        name: file.name.replace('.gguf', ''),
+        name: file.name.replace(/ /g, '').replace('.gguf', ''),
         description: '',
         path: file.path,
         tags: [],
diff --git a/web/hooks/useGetHFRepoData.test.ts b/web/hooks/useGetHFRepoData.test.ts
index eaf86d79a..01055612d 100644
--- a/web/hooks/useGetHFRepoData.test.ts
+++ b/web/hooks/useGetHFRepoData.test.ts
@@ -1,6 +1,10 @@
+/**
+ * @jest-environment jsdom
+ */
 import { renderHook, act } from '@testing-library/react'
 import { useGetHFRepoData } from './useGetHFRepoData'
 import { extensionManager } from '@/extension'
+import * as hf from '@/utils/huggingface'
 
 jest.mock('@/extension', () => ({
   extensionManager: {
@@ -8,6 +12,8 @@ jest.mock('@/extension', () => ({
   },
 }))
 
+jest.mock('@/utils/huggingface')
+
 describe('useGetHFRepoData', () => {
   beforeEach(() => {
     jest.clearAllMocks()
@@ -15,10 +21,7 @@ describe('useGetHFRepoData', () => {
 
   it('should fetch HF repo data successfully', async () => {
     const mockData = { name: 'Test Repo', stars: 100 }
-    const mockFetchHuggingFaceRepoData = jest.fn().mockResolvedValue(mockData)
-    ;(extensionManager.get as jest.Mock).mockReturnValue({
-      fetchHuggingFaceRepoData: mockFetchHuggingFaceRepoData,
-    })
+    ;(hf.fetchHuggingFaceRepoData as jest.Mock).mockReturnValue(mockData)
 
     const { result } = renderHook(() => useGetHFRepoData())
 
@@ -34,6 +37,5 @@ describe('useGetHFRepoData', () => {
 
     expect(result.current.error).toBeUndefined()
     expect(await data).toEqual(mockData)
-    expect(mockFetchHuggingFaceRepoData).toHaveBeenCalledWith('test-repo')
   })
 })
diff --git a/web/hooks/useGetHFRepoData.ts b/web/hooks/useGetHFRepoData.ts
index 3dab2c72e..6f2ec2b57 100644
--- a/web/hooks/useGetHFRepoData.ts
+++ b/web/hooks/useGetHFRepoData.ts
@@ -1,12 +1,8 @@
 import { useCallback, useState } from 'react'
 
-import {
-  ExtensionTypeEnum,
-  HuggingFaceRepoData,
-  ModelExtension,
-} from '@janhq/core'
+import { HuggingFaceRepoData } from '@janhq/core'
 
-import { extensionManager } from '@/extension'
+import { fetchHuggingFaceRepoData } from '@/utils/huggingface'
 
 export const useGetHFRepoData = () => {
   const [error, setError] = useState<string | undefined>(undefined)
@@ -35,7 +31,5 @@ export const useGetHFRepoData = () => {
 const extensionGetHfRepoData = async (
   repoId: string
 ): Promise<HuggingFaceRepoData | undefined> => {
-  return extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.fetchHuggingFaceRepoData(repoId)
+  return fetchHuggingFaceRepoData(repoId)
 }
diff --git a/web/hooks/useImportModel.test.ts b/web/hooks/useImportModel.test.ts
index 2148f581b..571947903 100644
--- a/web/hooks/useImportModel.test.ts
+++ b/web/hooks/useImportModel.test.ts
@@ -18,7 +18,7 @@ describe('useImportModel', () => {
   it('should import models successfully', async () => {
     const mockImportModels = jest.fn().mockResolvedValue(undefined)
     const mockExtension = {
-      importModels: mockImportModels,
+      importModel: mockImportModels,
     } as any
 
     jest.spyOn(extensionManager, 'get').mockReturnValue(mockExtension)
@@ -26,15 +26,16 @@ describe('useImportModel', () => {
     const { result } = renderHook(() => useImportModel())
 
     const models = [
-      { importId: '1', name: 'Model 1', path: '/path/to/model1' },
-      { importId: '2', name: 'Model 2', path: '/path/to/model2' },
+      { modelId: '1', path: '/path/to/model1' },
+      { modelId: '2', path: '/path/to/model2' },
     ] as any
 
     await act(async () => {
-      await result.current.importModels(models, 'local' as any)
+      await result.current.importModels(models, 'copy')
     })
 
-    expect(mockImportModels).toHaveBeenCalledWith(models, 'local')
+    expect(mockImportModels).toHaveBeenCalledWith('1', '/path/to/model1', undefined,'copy')
+    expect(mockImportModels).toHaveBeenCalledWith('2', '/path/to/model2', undefined, 'copy')
   })
 
   it('should update model info successfully', async () => {
@@ -42,7 +43,7 @@ describe('useImportModel', () => {
       .fn()
       .mockResolvedValue({ id: 'model-1', name: 'Updated Model' })
     const mockExtension = {
-      updateModelInfo: mockUpdateModelInfo,
+      updateModel: mockUpdateModelInfo,
     } as any
 
     jest.spyOn(extensionManager, 'get').mockReturnValue(mockExtension)
diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts
index effc64f86..c49ddb964 100644
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@@ -3,13 +3,16 @@ import { useCallback } from 'react'
 import {
   ExtensionTypeEnum,
   ImportingModel,
+  LocalImportModelEvent,
   Model,
   ModelExtension,
   OptionType,
+  events,
   fs,
+  baseName,
 } from '@janhq/core'
 
-import { atom, useSetAtom } from 'jotai'
+import { atom, useAtomValue, useSetAtom } from 'jotai'
 
 import { v4 as uuidv4 } from 'uuid'
 
@@ -18,7 +21,12 @@ import { snackbar } from '@/containers/Toast'
 import { FilePathWithSize } from '@/utils/file'
 
 import { extensionManager } from '@/extension'
-import { importingModelsAtom } from '@/helpers/atoms/Model.atom'
+import {
+  addDownloadingModelAtom,
+  downloadedModelsAtom,
+  importingModelsAtom,
+  removeDownloadingModelAtom,
+} from '@/helpers/atoms/Model.atom'
 
 export type ImportModelStage =
   | 'NONE'
@@ -49,11 +57,42 @@ export type ModelUpdate = {
 const useImportModel = () => {
   const setImportModelStage = useSetAtom(setImportModelStageAtom)
   const setImportingModels = useSetAtom(importingModelsAtom)
+  const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
+  const removeDownloadingModel = useSetAtom(removeDownloadingModelAtom)
+  const downloadedModels = useAtomValue(downloadedModelsAtom)
+
+  const incrementalModelName = useCallback(
+    (name: string, startIndex: number = 0): string => {
+      const newModelName = startIndex ? `${name}-${startIndex}` : name
+      if (downloadedModels.some((model) => model.id === newModelName)) {
+        return incrementalModelName(name, startIndex + 1)
+      } else {
+        return newModelName
+      }
+    },
+    [downloadedModels]
+  )
 
   const importModels = useCallback(
-    (models: ImportingModel[], optionType: OptionType) =>
-      localImportModels(models, optionType),
-    []
+    (models: ImportingModel[], optionType: OptionType) => {
+      models.map(async (model) => {
+        const modelId = model.modelId ?? incrementalModelName(model.name)
+        if (modelId) {
+          addDownloadingModel(modelId)
+          extensionManager
+            .get<ModelExtension>(ExtensionTypeEnum.Model)
+            ?.importModel(modelId, model.path, model.name, optionType)
+            .finally(() => {
+              removeDownloadingModel(modelId)
+              events.emit(LocalImportModelEvent.onLocalImportModelSuccess, {
+                importId: model.importId,
+                modelId: modelId,
+              })
+            })
+        }
+      })
+    },
+    [addDownloadingModel, incrementalModelName, removeDownloadingModel]
   )
 
   const updateModelInfo = useCallback(
@@ -75,7 +114,7 @@ const useImportModel = () => {
         ({ path, name, size }: FilePathWithSize) => ({
           importId: uuidv4(),
           modelId: undefined,
-          name: name.replace('.gguf', ''),
+          name: name.replace(/ /g, '').replace('.gguf', ''),
           description: '',
           path: path,
           tags: [],
@@ -101,19 +140,11 @@ const useImportModel = () => {
   return { importModels, updateModelInfo, sanitizeFilePaths }
 }
 
-const localImportModels = async (
-  models: ImportingModel[],
-  optionType: OptionType
-): Promise<void> =>
-  extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.importModels(models, optionType)
-
 const localUpdateModelInfo = async (
   modelInfo: Partial<Model>
 ): Promise<Model | undefined> =>
   extensionManager
     .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.updateModelInfo(modelInfo)
+    ?.updateModel(modelInfo)
 
 export default useImportModel
diff --git a/web/hooks/useModels.test.ts b/web/hooks/useModels.test.ts
index 4c53ffaa7..9b6b898ad 100644
--- a/web/hooks/useModels.test.ts
+++ b/web/hooks/useModels.test.ts
@@ -1,28 +1,23 @@
 // useModels.test.ts
-
 import { renderHook, act } from '@testing-library/react'
-import { events, ModelEvent } from '@janhq/core'
+import { events, ModelEvent, ModelManager } from '@janhq/core'
 import { extensionManager } from '@/extension'
 
 // Mock dependencies
 jest.mock('@janhq/core')
 jest.mock('@/extension')
+jest.mock('use-debounce', () => ({
+  useDebouncedCallback: jest.fn().mockImplementation((fn) => fn),
+}))
 
 import useModels from './useModels'
 
 // Mock data
-const mockDownloadedModels = [
+const models = [
   { id: 'model-1', name: 'Model 1' },
   { id: 'model-2', name: 'Model 2' },
 ]
 
-const mockConfiguredModels = [
-  { id: 'model-3', name: 'Model 3' },
-  { id: 'model-4', name: 'Model 4' },
-]
-
-const mockDefaultModel = { id: 'default-model', name: 'Default Model' }
-
 describe('useModels', () => {
   beforeEach(() => {
     jest.clearAllMocks()
@@ -30,20 +25,28 @@ describe('useModels', () => {
 
   it('should fetch and set models on mount', async () => {
     const mockModelExtension = {
-      getDownloadedModels: jest.fn().mockResolvedValue(mockDownloadedModels),
-      getConfiguredModels: jest.fn().mockResolvedValue(mockConfiguredModels),
-      getDefaultModel: jest.fn().mockResolvedValue(mockDefaultModel),
+      getModels: jest.fn().mockResolvedValue(models),
     } as any
+    ;(ModelManager.instance as jest.Mock).mockReturnValue({
+      models: {
+        values: () => ({
+          toArray: () => ({
+            filter: () => models,
+          }),
+        }),
+        get: () => undefined,
+        has: () => true,
+        // set: () => {}
+      },
+    })
 
     jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
 
-    await act(async () => {
+    act(() => {
       renderHook(() => useModels())
     })
 
-    expect(mockModelExtension.getDownloadedModels).toHaveBeenCalled()
-    expect(mockModelExtension.getConfiguredModels).toHaveBeenCalled()
-    expect(mockModelExtension.getDefaultModel).toHaveBeenCalled()
+    expect(mockModelExtension.getModels).toHaveBeenCalled()
   })
 
   it('should remove event listener on unmount', async () => {
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 58def79c6..400e02793 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -5,16 +5,19 @@ import {
   Model,
   ModelEvent,
   ModelExtension,
-  ModelFile,
   events,
+  ModelManager,
 } from '@janhq/core'
 
 import { useSetAtom } from 'jotai'
 
+import { useDebouncedCallback } from 'use-debounce'
+
+import { isLocalEngine } from '@/utils/modelEngine'
+
 import { extensionManager } from '@/extension'
 import {
   configuredModelsAtom,
-  defaultModelAtom,
   downloadedModelsAtom,
 } from '@/helpers/atoms/Model.atom'
 
@@ -25,62 +28,68 @@ import {
  */
 const useModels = () => {
   const setDownloadedModels = useSetAtom(downloadedModelsAtom)
-  const setConfiguredModels = useSetAtom(configuredModelsAtom)
-  const setDefaultModel = useSetAtom(defaultModelAtom)
+  const setExtensionModels = useSetAtom(configuredModelsAtom)
 
   const getData = useCallback(() => {
     const getDownloadedModels = async () => {
-      const models = await getLocalDownloadedModels()
-      setDownloadedModels(models)
+      const localModels = (await getModels()).map((e) => ({
+        ...e,
+        name: ModelManager.instance().models.get(e.id)?.name ?? e.id,
+        metadata:
+          ModelManager.instance().models.get(e.id)?.metadata ?? e.metadata,
+      }))
+
+      const remoteModels = ModelManager.instance()
+        .models.values()
+        .toArray()
+        .filter((e) => !isLocalEngine(e.engine))
+      const toUpdate = [
+        ...localModels,
+        ...remoteModels.filter(
+          (e: Model) => !localModels.some((g: Model) => g.id === e.id)
+        ),
+      ]
+
+      setDownloadedModels(toUpdate)
+
+      let isUpdated = false
+      toUpdate.forEach((model) => {
+        if (!ModelManager.instance().models.has(model.id)) {
+          ModelManager.instance().models.set(model.id, model)
+          isUpdated = true
+        }
+      })
+      if (isUpdated) {
+        getExtensionModels()
+      }
     }
 
-    const getConfiguredModels = async () => {
-      const models = await getLocalConfiguredModels()
-      setConfiguredModels(models)
+    const getExtensionModels = () => {
+      const models = ModelManager.instance().models.values().toArray()
+      setExtensionModels(models)
     }
-
-    const getDefaultModel = async () => {
-      const defaultModel = await getLocalDefaultModel()
-      setDefaultModel(defaultModel)
-    }
-
     // Fetch all data
-    Promise.all([
-      getDownloadedModels(),
-      getConfiguredModels(),
-      getDefaultModel(),
-    ])
-  }, [setDownloadedModels, setConfiguredModels, setDefaultModel])
+    getExtensionModels()
+    getDownloadedModels()
+  }, [setDownloadedModels, setExtensionModels])
+
+  const reloadData = useDebouncedCallback(() => getData(), 300)
 
   useEffect(() => {
     // Try get data on mount
-    getData()
+    reloadData()
 
     // Listen for model updates
-    events.on(ModelEvent.OnModelsUpdate, async () => getData())
+    events.on(ModelEvent.OnModelsUpdate, async () => reloadData())
     return () => {
       // Remove listener on unmount
       events.off(ModelEvent.OnModelsUpdate, async () => {})
     }
-  }, [getData])
+  }, [getData, reloadData])
 }
 
-// TODO: Deprecated - Remove when moving to cortex.cpp
-const getLocalDefaultModel = async (): Promise<Model | undefined> =>
-  extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.getDefaultModel()
-
-// TODO: Deprecated - Remove when moving to cortex.cpp
-const getLocalConfiguredModels = async (): Promise<ModelFile[]> =>
-  extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.getConfiguredModels() ?? []
-
-// TODO: Deprecated - Remove when moving to cortex.cpp
-const getLocalDownloadedModels = async (): Promise<ModelFile[]> =>
-  extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.getDownloadedModels() ?? []
+const getModels = async (): Promise<Model[]> =>
+  extensionManager.get<ModelExtension>(ExtensionTypeEnum.Model)?.getModels() ??
+  []
 
 export default useModels
diff --git a/web/hooks/useRecommendedModel.ts b/web/hooks/useRecommendedModel.ts
index ed56efa55..21a9c69e7 100644
--- a/web/hooks/useRecommendedModel.ts
+++ b/web/hooks/useRecommendedModel.ts
@@ -1,6 +1,6 @@
 import { useCallback, useEffect, useState } from 'react'
 
-import { Model, InferenceEngine, ModelFile } from '@janhq/core'
+import { Model, InferenceEngine } from '@janhq/core'
 
 import { atom, useAtomValue } from 'jotai'
 
@@ -24,16 +24,12 @@ export const LAST_USED_MODEL_ID = 'last-used-model-id'
  */
 export default function useRecommendedModel() {
   const activeModel = useAtomValue(activeModelAtom)
-  const [sortedModels, setSortedModels] = useState<ModelFile[]>([])
-  const [recommendedModel, setRecommendedModel] = useState<
-    ModelFile | undefined
-  >()
+  const [sortedModels, setSortedModels] = useState<Model[]>([])
+  const [recommendedModel, setRecommendedModel] = useState<Model | undefined>()
   const activeThread = useAtomValue(activeThreadAtom)
   const downloadedModels = useAtomValue(downloadedModelsAtom)
 
-  const getAndSortDownloadedModels = useCallback(async (): Promise<
-    ModelFile[]
-  > => {
+  const getAndSortDownloadedModels = useCallback(async (): Promise<Model[]> => {
     const models = downloadedModels.sort((a, b) =>
       a.engine !== InferenceEngine.nitro && b.engine === InferenceEngine.nitro
         ? 1
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index 1dbd5b45e..cda53b24a 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -12,6 +12,7 @@ import {
   ToolManager,
   ChatCompletionMessage,
 } from '@janhq/core'
+import { extractInferenceParams, extractModelLoadParams } from '@janhq/core'
 import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
 
 import {
@@ -23,10 +24,6 @@ import {
 import { Stack } from '@/utils/Stack'
 import { compressImage, getBase64 } from '@/utils/base64'
 import { MessageRequestBuilder } from '@/utils/messageRequestBuilder'
-import {
-  extractInferenceParams,
-  extractModelLoadParams,
-} from '@/utils/modelParam'
 
 import { ThreadMessageBuilder } from '@/utils/threadMessageBuilder'
 
@@ -123,65 +120,27 @@ export default function useSendChatMessage() {
   }
 
   const resendChatMessage = async (currentMessage: ThreadMessage) => {
-    if (!activeThreadRef.current) {
-      console.error('No active thread')
-      return
-    }
-    updateThreadWaiting(activeThreadRef.current.id, true)
+    // Delete last response before regenerating
+    const newConvoData = currentMessages
+    let toSendMessage = currentMessage
 
-    const requestBuilder = new MessageRequestBuilder(
-      MessageRequestType.Thread,
-      activeThreadRef.current.assistants[0].model ?? selectedModelRef.current,
-      activeThreadRef.current,
-      currentMessages
-    )
-      .addSystemMessage(activeThreadRef.current.assistants[0]?.instructions)
-      .removeLastAssistantMessage()
+    do {
+      deleteMessage(currentMessage.id)
+      const msg = newConvoData.pop()
+      if (!msg) break
+      toSendMessage = msg
+      deleteMessage(toSendMessage.id ?? '')
+    } while (toSendMessage.role !== ChatCompletionRole.User)
 
-    const modelId =
-      selectedModelRef.current?.id ??
-      activeThreadRef.current.assistants[0].model.id
-
-    if (modelRef.current?.id !== modelId) {
-      const error = await startModel(modelId).catch((error: Error) => error)
-      if (error) {
-        updateThreadWaiting(activeThreadRef.current.id, false)
-        return
-      }
+    if (activeThreadRef.current) {
+      await extensionManager
+        .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+        ?.writeMessages(activeThreadRef.current.id, newConvoData)
     }
 
-    setIsGeneratingResponse(true)
-
-    if (currentMessage.role !== ChatCompletionRole.User) {
-      // Delete last response before regenerating
-      deleteMessage(currentMessage.id ?? '')
-      if (activeThreadRef.current) {
-        await extensionManager
-          .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-          ?.writeMessages(
-            activeThreadRef.current.id,
-            currentMessages.filter((msg) => msg.id !== currentMessage.id)
-          )
-      }
-    }
-    // Process message request with Assistants tools
-    const request = await ToolManager.instance().process(
-      requestBuilder.build(),
-      activeThreadRef.current.assistants?.flatMap(
-        (assistant) => assistant.tools ?? []
-      ) ?? []
-    )
-
-    request.messages = normalizeMessages(request.messages ?? [])
-
-    const engine =
-      requestBuilder.model?.engine ?? selectedModelRef.current?.engine ?? ''
-
-    EngineManager.instance().get(engine)?.inference(request)
+    sendChatMessage(toSendMessage.content[0]?.text.value)
   }
 
-  // Define interface extending Array prototype
-
   const sendChatMessage = async (message: string) => {
     if (!message || message.trim().length === 0) return
 
@@ -254,7 +213,7 @@ export default function useSendChatMessage() {
       ...activeThreadRef.current,
       updated: newMessage.created,
       metadata: {
-        ...(activeThreadRef.current.metadata ?? {}),
+        ...activeThreadRef.current.metadata,
         lastMessage: prompt,
       },
     }
diff --git a/web/hooks/useUpdateModelParameters.ts b/web/hooks/useUpdateModelParameters.ts
index 2af6e3323..6eb7c3c5a 100644
--- a/web/hooks/useUpdateModelParameters.ts
+++ b/web/hooks/useUpdateModelParameters.ts
@@ -6,15 +6,12 @@ import {
   InferenceEngine,
   Thread,
   ThreadAssistantInfo,
+  extractInferenceParams,
+  extractModelLoadParams,
 } from '@janhq/core'
 
 import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 
-import {
-  extractInferenceParams,
-  extractModelLoadParams,
-} from '@/utils/modelParam'
-
 import { extensionManager } from '@/extension'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 import {
diff --git a/web/package.json b/web/package.json
index bed94aed4..d3ee82a33 100644
--- a/web/package.json
+++ b/web/package.json
@@ -38,7 +38,7 @@
     "react": "18.2.0",
     "react-circular-progressbar": "^2.1.0",
     "react-dom": "18.2.0",
-    "react-dropzone": "^14.2.3",
+    "react-dropzone": "14.2.3",
     "react-hook-form": "^7.47.0",
     "react-hot-toast": "^2.4.1",
     "react-icons": "^4.12.0",
@@ -52,8 +52,9 @@
     "uuid": "^9.0.1",
     "zod": "^3.22.4",
     "slate": "latest",
-    "slate-react": "latest",
-    "slate-history": "latest"
+    "slate-dom": "0.111.0",
+    "slate-react": "0.110.3",
+    "slate-history": "0.110.3"
   },
   "devDependencies": {
     "@next/eslint-plugin-next": "^14.0.1",
diff --git a/web/screens/Hub/ModelList/ModelHeader/index.tsx b/web/screens/Hub/ModelList/ModelHeader/index.tsx
index 44a3fd278..da98e41e3 100644
--- a/web/screens/Hub/ModelList/ModelHeader/index.tsx
+++ b/web/screens/Hub/ModelList/ModelHeader/index.tsx
@@ -1,6 +1,6 @@
 import { useCallback } from 'react'
 
-import { ModelFile } from '@janhq/core'
+import { Model } from '@janhq/core'
 import { Button, Badge, Tooltip } from '@janhq/joi'
 
 import { useAtomValue, useSetAtom } from 'jotai'
@@ -38,7 +38,7 @@ import {
 } from '@/helpers/atoms/SystemBar.atom'
 
 type Props = {
-  model: ModelFile
+  model: Model
   onClick: () => void
   open: string
 }
@@ -64,7 +64,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
   const assistants = useAtomValue(assistantsAtom)
 
   const onDownloadClick = useCallback(() => {
-    downloadModel(model)
+    downloadModel(model.sources[0].url, model.id, model.name)
   }, [model, downloadModel])
 
   const isDownloaded = downloadedModels.find((md) => md.id === model.id) != null
@@ -81,7 +81,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
     </Button>
   )
 
-  const isDownloading = downloadingModels.some((md) => md.id === model.id)
+  const isDownloading = downloadingModels.some((md) => md === model.id)
 
   const onUseModelClick = useCallback(async () => {
     if (assistants.length === 0) {
@@ -123,17 +123,6 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
       className="cursor-pointer rounded-t-md bg-[hsla(var(--app-bg))]"
       onClick={onClick}
     >
-      {/* TODO: @faisal are we still using cover? */}
-      {/* {model.metadata.cover && imageLoaded && (
-        <div className="relative h-full w-full">
-          <img
-            onError={() => setImageLoaded(false)}
-            src={model.metadata.cover}
-            className="h-[250px] w-full object-cover"
-            alt={`Cover - ${model.id}`}
-          />
-        </div>
-      )} */}
       <div className="flex items-center justify-between px-4 py-2">
         <div className="flex items-center gap-2">
           <span className="line-clamp-1 text-base font-semibold">
@@ -144,7 +133,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
         <div className="inline-flex items-center space-x-2">
           <div className="hidden items-center sm:inline-flex">
             <span className="mr-4 font-semibold">
-              {toGibibytes(model.metadata.size)}
+              {toGibibytes(model.metadata?.size)}
             </span>
             <ModelLabel metadata={model.metadata} />
           </div>
diff --git a/web/screens/Hub/ModelList/ModelItem/index.tsx b/web/screens/Hub/ModelList/ModelItem/index.tsx
index ec9d885a1..a077dbffc 100644
--- a/web/screens/Hub/ModelList/ModelItem/index.tsx
+++ b/web/screens/Hub/ModelList/ModelItem/index.tsx
@@ -1,6 +1,6 @@
 import { useState } from 'react'
 
-import { ModelFile } from '@janhq/core'
+import { Model } from '@janhq/core'
 import { Badge } from '@janhq/joi'
 
 import { twMerge } from 'tailwind-merge'
@@ -12,7 +12,7 @@ import ModelItemHeader from '@/screens/Hub/ModelList/ModelHeader'
 import { toGibibytes } from '@/utils/converter'
 
 type Props = {
-  model: ModelFile
+  model: Model
 }
 
 const ModelItem: React.FC<Props> = ({ model }) => {
@@ -34,7 +34,7 @@ const ModelItem: React.FC<Props> = ({ model }) => {
           <div className="flex w-full flex-col border-t border-[hsla(var(--app-border))] p-4 ">
             <div className="my-2 inline-flex items-center sm:hidden">
               <span className="mr-4 font-semibold">
-                {toGibibytes(model.metadata.size)}
+                {toGibibytes(model.metadata?.size)}
               </span>
               <ModelLabel metadata={model.metadata} />
             </div>
@@ -49,9 +49,9 @@ const ModelItem: React.FC<Props> = ({ model }) => {
                 <span className="font-semibold ">Author</span>
                 <p
                   className="mt-2 line-clamp-1 font-medium text-[hsla(var(--text-secondary))]"
-                  title={model.metadata.author}
+                  title={model.metadata?.author}
                 >
-                  {model.metadata.author}
+                  {model.metadata?.author}
                 </p>
               </div>
               <div>
@@ -66,7 +66,7 @@ const ModelItem: React.FC<Props> = ({ model }) => {
               <div>
                 <span className="mb-1 font-semibold ">Tags</span>
                 <div className="mt-2 flex flex-wrap gap-x-1 gap-y-1">
-                  {model.metadata.tags.map((tag: string) => (
+                  {model.metadata?.tags?.map((tag: string) => (
                     <Badge key={tag} title={tag} variant="soft">
                       {tag}
                     </Badge>
diff --git a/web/screens/Hub/ModelList/index.tsx b/web/screens/Hub/ModelList/index.tsx
index 8fc30d541..0d7865a81 100644
--- a/web/screens/Hub/ModelList/index.tsx
+++ b/web/screens/Hub/ModelList/index.tsx
@@ -1,6 +1,6 @@
 import { useMemo } from 'react'
 
-import { ModelFile } from '@janhq/core'
+import { Model } from '@janhq/core'
 
 import { useAtomValue } from 'jotai'
 
@@ -9,16 +9,16 @@ import ModelItem from '@/screens/Hub/ModelList/ModelItem'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 
 type Props = {
-  models: ModelFile[]
+  models: Model[]
 }
 
 const ModelList = ({ models }: Props) => {
   const downloadedModels = useAtomValue(downloadedModelsAtom)
-  const sortedModels: ModelFile[] = useMemo(() => {
-    const featuredModels: ModelFile[] = []
-    const remoteModels: ModelFile[] = []
-    const localModels: ModelFile[] = []
-    const remainingModels: ModelFile[] = []
+  const sortedModels: Model[] = useMemo(() => {
+    const featuredModels: Model[] = []
+    const remoteModels: Model[] = []
+    const localModels: Model[] = []
+    const remainingModels: Model[] = []
     models.forEach((m) => {
       if (m.metadata?.tags?.includes('Featured')) {
         featuredModels.push(m)
@@ -30,9 +30,9 @@ const ModelList = ({ models }: Props) => {
         remainingModels.push(m)
       }
     })
-    featuredModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size)
-    localModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size)
-    remainingModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size)
+    featuredModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size)
+    localModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size)
+    remainingModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size)
     remoteModels.sort((m1, m2) => m1.name.localeCompare(m2.name))
     return [
       ...featuredModels,
diff --git a/web/screens/Hub/index.tsx b/web/screens/Hub/index.tsx
index 8148a6bb5..382cf5667 100644
--- a/web/screens/Hub/index.tsx
+++ b/web/screens/Hub/index.tsx
@@ -52,7 +52,7 @@ const HubScreen = () => {
     } else if (sortSelected === 'featured') {
       return (
         x.name.toLowerCase().includes(searchValue.toLowerCase()) &&
-        x.metadata.tags.includes('Featured')
+        x.metadata?.tags?.includes('Featured')
       )
     } else {
       return x.name.toLowerCase().includes(searchValue.toLowerCase())
diff --git a/web/screens/LocalServer/LocalServerRightPanel/index.tsx b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
index 13e3cad57..a59e83e7e 100644
--- a/web/screens/LocalServer/LocalServerRightPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
@@ -1,8 +1,9 @@
 import { useCallback, useEffect, useMemo, useState } from 'react'
 
-import { Accordion, AccordionItem } from '@janhq/joi'
+import { extractInferenceParams, extractModelLoadParams } from '@janhq/core'
+import { Accordion, AccordionItem, Input } from '@janhq/joi'
 import { useAtomValue, useSetAtom } from 'jotai'
-import { AlertTriangleIcon, InfoIcon } from 'lucide-react'
+import { AlertTriangleIcon, CheckIcon, CopyIcon, InfoIcon } from 'lucide-react'
 
 import EngineSetting from '@/containers/EngineSetting'
 import { modalTroubleShootingAtom } from '@/containers/ModalTroubleShoot'
@@ -12,12 +13,9 @@ import RightPanelContainer from '@/containers/RightPanelContainer'
 
 import { loadModelErrorAtom } from '@/hooks/useActiveModel'
 
-import { getConfigurationsData } from '@/utils/componentSettings'
+import { useClipboard } from '@/hooks/useClipboard'
 
-import {
-  extractInferenceParams,
-  extractModelLoadParams,
-} from '@/utils/modelParam'
+import { getConfigurationsData } from '@/utils/componentSettings'
 
 import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
@@ -29,6 +27,8 @@ const LocalServerRightPanel = () => {
 
   const selectedModel = useAtomValue(selectedModelAtom)
 
+  const clipboard = useClipboard({ timeout: 1000 })
+
   const [currentModelSettingParams, setCurrentModelSettingParams] = useState(
     extractModelLoadParams(selectedModel?.settings)
   )
@@ -92,6 +92,30 @@ const LocalServerRightPanel = () => {
 
         <ModelDropdown strictedThread={false} disabled={serverEnabled} />
 
+        <div className="mt-2">
+          <Input
+            value={selectedModel?.id || ''}
+            className="cursor-pointer text-[hsla(var(--text-secondary))] hover:border-[hsla(var(--app-border))] focus-visible:outline-0 focus-visible:ring-0"
+            readOnly
+            onClick={() => {
+              clipboard.copy(selectedModel?.id)
+            }}
+            suffixIcon={
+              clipboard.copied ? (
+                <CheckIcon
+                  size={14}
+                  className="text-[hsla(var(--success-bg))]"
+                />
+              ) : (
+                <CopyIcon
+                  size={14}
+                  className="cursor-pointer text-[hsla(var(--text-secondary))]"
+                />
+              )
+            }
+          />
+        </div>
+
         {loadModelError && serverEnabled && (
           <div className="mt-3 flex space-x-2">
             <AlertTriangleIcon
diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx
index 475cc4d58..150f70398 100644
--- a/web/screens/Settings/Advanced/index.tsx
+++ b/web/screens/Settings/Advanced/index.tsx
@@ -189,7 +189,7 @@ const Advanced = () => {
    * @param gpuId
    * @returns
    */
-  const handleGPUChange = (gpuId: string) => {
+  const handleGPUChange = async (gpuId: string) => {
     let updatedGpusInUse = [...gpusInUse]
     if (updatedGpusInUse.includes(gpuId)) {
       updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId)
@@ -208,7 +208,8 @@ const Advanced = () => {
       updatedGpusInUse.push(gpuId)
     }
     setGpusInUse(updatedGpusInUse)
-    saveSettings({ gpusInUse: updatedGpusInUse })
+    await saveSettings({ gpusInUse: updatedGpusInUse })
+    window.core?.api?.relaunch()
   }
 
   const gpuSelectionPlaceHolder =
@@ -305,7 +306,13 @@ const Advanced = () => {
                           })
                         }
                         // Stop any running model to apply the changes
-                        if (e.target.checked !== gpuEnabled) stopModel()
+                        if (e.target.checked !== gpuEnabled) {
+                          stopModel().finally(() => {
+                            setTimeout(() => {
+                              window.location.reload()
+                            }, 300)
+                          })
+                        }
                       }}
                     />
                   }
diff --git a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
index c3f09f171..96d8ac04e 100644
--- a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
+++ b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
@@ -1,11 +1,6 @@
-import { useCallback, useMemo } from 'react'
+import { useCallback } from 'react'
 
-import {
-  DownloadState,
-  HuggingFaceRepoData,
-  Model,
-  Quantization,
-} from '@janhq/core'
+import { DownloadState, HuggingFaceRepoData, Quantization } from '@janhq/core'
 import { Badge, Button, Progress } from '@janhq/joi'
 
 import { useAtomValue, useSetAtom } from 'jotai'
@@ -20,13 +15,15 @@ import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
 
 import { formatDownloadPercentage, toGibibytes } from '@/utils/converter'
 
+import { normalizeModelId } from '@/utils/model'
+
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { assistantsAtom } from '@/helpers/atoms/Assistant.atom'
 
 import { importHuggingFaceModelStageAtom } from '@/helpers/atoms/HuggingFace.atom'
 import {
-  defaultModelAtom,
   downloadedModelsAtom,
+  getDownloadingModelAtom,
 } from '@/helpers/atoms/Model.atom'
 
 type Props = {
@@ -39,7 +36,6 @@ type Props = {
 }
 
 const ModelDownloadRow: React.FC<Props> = ({
-  repoData,
   downloadUrl,
   fileName,
   fileSize = 0,
@@ -49,51 +45,31 @@ const ModelDownloadRow: React.FC<Props> = ({
   const { downloadModel, abortModelDownload } = useDownloadModel()
   const allDownloadStates = useAtomValue(modelDownloadStateAtom)
   const downloadState: DownloadState | undefined = allDownloadStates[fileName]
+  const downloadingModels = useAtomValue(getDownloadingModelAtom)
 
   const { requestCreateNewThread } = useCreateNewThread()
   const setMainViewState = useSetAtom(mainViewStateAtom)
   const assistants = useAtomValue(assistantsAtom)
   const downloadedModel = downloadedModels.find((md) => md.id === fileName)
+  const isDownloading = downloadingModels.some((md) => md === fileName)
 
   const setHfImportingStage = useSetAtom(importHuggingFaceModelStageAtom)
-  const defaultModel = useAtomValue(defaultModelAtom)
-
-  const model = useMemo(() => {
-    if (!defaultModel) {
-      return undefined
-    }
-
-    const model: Model = {
-      ...defaultModel,
-      sources: [
-        {
-          url: downloadUrl,
-          filename: fileName,
-        },
-      ],
-      id: fileName,
-      name: fileName,
-      created: Date.now(),
-      metadata: {
-        author: 'User',
-        tags: repoData.tags,
-        size: fileSize,
-      },
-    }
-    return model
-  }, [fileName, fileSize, repoData, downloadUrl, defaultModel])
 
   const onAbortDownloadClick = useCallback(() => {
-    if (model) {
-      abortModelDownload(model)
+    if (downloadUrl) {
+      abortModelDownload(normalizeModelId(downloadUrl))
     }
-  }, [model, abortModelDownload])
+  }, [downloadUrl, abortModelDownload])
 
   const onDownloadClick = useCallback(async () => {
-    if (model) {
-      downloadModel(model)
+    if (downloadUrl) {
+      downloadModel(
+        downloadUrl,
+        normalizeModelId(downloadUrl),
+        normalizeModelId(downloadUrl)
+      )
     }
-  }, [model, downloadModel])
+  }, [downloadUrl, downloadModel])
 
   const onUseModelClick = useCallback(async () => {
     if (assistants.length === 0) {
@@ -111,14 +87,14 @@ const ModelDownloadRow: React.FC<Props> = ({
     setHfImportingStage,
   ])
 
-  if (!model) {
+  if (!downloadUrl) {
     return null
   }
 
   return (
     <div className="flex flex-col gap-4 rounded border border-[hsla(var(--app-border))] p-3 md:flex-row md:items-center md:justify-between xl:w-full">
-      <div className="flex justify-between">
-        <div className="flex">
+      <div className="flex max-w-[50%] justify-between">
+        <div className="flex min-w-[280px] max-w-[280px]">
           {quantization && (
             <Badge variant="soft" className="mr-1">
               {quantization}
@@ -126,17 +102,18 @@ const ModelDownloadRow: React.FC<Props> = ({
           )}
           <h1
             className={twMerge(
-              'mr-5 line-clamp-1 font-medium text-[hsla(var(--text-secondary))]',
-              quantization && 'max-w-[25ch]'
+              'mr-5 line-clamp-1 font-medium text-[hsla(var(--text-secondary))]'
             )}
             title={fileName}
           >
             {fileName}
           </h1>
         </div>
-        <Badge theme="secondary" className="hidden md:flex">
-          {toGibibytes(fileSize)}
-        </Badge>
+        <div className="md:min-w-[90px] md:max-w-[90px]">
+          <Badge theme="secondary" className="ml-4 hidden md:flex">
+            {toGibibytes(fileSize)}
+          </Badge>
+        </div>
       </div>
 
       {downloadedModel ? (
@@ -144,11 +121,11 @@ const ModelDownloadRow: React.FC<Props> = ({
           variant="soft"
           className="min-w-[98px]"
           onClick={onUseModelClick}
-          data-testid={`use-model-btn-${model.id}`}
+          data-testid={`use-model-btn-${downloadUrl}`}
         >
           Use
         </Button>
-      ) : downloadState != null ? (
+      ) : isDownloading ? (
         <Button variant="soft">
           <div className="flex items-center space-x-2">
             <span className="inline-block" onClick={onAbortDownloadClick}>
@@ -163,7 +140,7 @@ const ModelDownloadRow: React.FC<Props> = ({
               }
             />
             <span className="tabular-nums">
-              {formatDownloadPercentage(downloadState.percent)}
+              {formatDownloadPercentage(downloadState?.percent)}
             </span>
           </div>
         </Button>
diff --git a/web/screens/Settings/ImportModelOptionModal/index.tsx b/web/screens/Settings/ImportModelOptionModal/index.tsx
index 5a2af2335..f185b9015 100644
--- a/web/screens/Settings/ImportModelOptionModal/index.tsx
+++ b/web/screens/Settings/ImportModelOptionModal/index.tsx
@@ -15,13 +15,13 @@ import { importingModelsAtom } from '@/helpers/atoms/Model.atom'
 
 const importOptions: ModelImportOption[] = [
   {
-    type: 'SYMLINK',
+    type: 'symlink',
     title: 'Keep Original Files & Symlink',
     description:
       'You maintain your model files outside of Jan. Keeping your files where they are, and Jan will create a smart link to them.',
   },
   {
-    type: 'MOVE_BINARY_FILE',
+    type: 'copy',
     title: 'Move model binary file',
     description:
       'Jan will move your model binary file from your current folder into Jan Data Folder.',
diff --git a/web/screens/Settings/MyModels/MyModelList/index.tsx b/web/screens/Settings/MyModels/MyModelList/index.tsx
index c9ca6e867..2e87f3080 100644
--- a/web/screens/Settings/MyModels/MyModelList/index.tsx
+++ b/web/screens/Settings/MyModels/MyModelList/index.tsx
@@ -1,6 +1,6 @@
 import { memo, useState } from 'react'
 
-import { InferenceEngine, ModelFile } from '@janhq/core'
+import { Model } from '@janhq/core'
 import { Badge, Button, Tooltip, useClickOutside } from '@janhq/joi'
 import { useAtom } from 'jotai'
 import {
@@ -21,7 +21,7 @@ import { isLocalEngine } from '@/utils/modelEngine'
 import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
 
 type Props = {
-  model: ModelFile
+  model: Model
   groupTitle?: string
 }
 
@@ -54,14 +54,14 @@ const MyModelList = ({ model }: Props) => {
             <h6
               className={twMerge(
                 'font-medium lg:line-clamp-1 lg:min-w-[280px] lg:max-w-[280px]',
-                model.engine !== InferenceEngine.nitro &&
+                !isLocalEngine(model.engine) &&
                   'max-w-none text-[hsla(var(--text-secondary))]'
               )}
               title={model.name}
             >
               {model.name}
             </h6>
-            {model.engine === InferenceEngine.nitro && (
+            {isLocalEngine(model.engine) && (
               <div className="flex gap-x-8">
                 <p
                   className="line-clamp-1 text-[hsla(var(--text-secondary))] lg:min-w-[160px] lg:max-w-[160px] xl:max-w-none"
@@ -78,14 +78,14 @@ const MyModelList = ({ model }: Props) => {
           <div className="flex gap-x-4">
             <div className="md:min-w-[90px] md:max-w-[90px]">
               <Badge theme="secondary" className="sm:mr-8">
-                {toGibibytes(model.metadata.size)}
+                {model.metadata?.size ? toGibibytes(model.metadata?.size) : '-'}
               </Badge>
             </div>
 
             <div className="relative flex items-center gap-x-4">
               {stateModel.loading && stateModel.model?.id === model.id ? (
                 <Badge
-                  className="inline-flex items-center space-x-2"
+                  className="inline-flex w-[80px] items-center space-x-2"
                   theme="secondary"
                 >
                   <span className="h-2 w-2 rounded-full bg-gray-500" />
@@ -99,7 +99,7 @@ const MyModelList = ({ model }: Props) => {
                 <Badge
                   theme="success"
                   variant="soft"
-                  className="inline-flex items-center space-x-2"
+                  className="inline-flex w-[80px] items-center space-x-2"
                 >
                   <span className="h-2 w-2 rounded-full bg-green-500" />
                   <span>Active</span>
@@ -107,7 +107,7 @@ const MyModelList = ({ model }: Props) => {
               ) : (
                 <Badge
                   theme="secondary"
-                  className="inline-flex items-center space-x-2"
+                  className="inline-flex w-[80px] items-center space-x-2"
                 >
                   <span className="h-2 w-2 rounded-full bg-gray-500" />
                   <span>Inactive</span>
diff --git a/web/screens/Settings/MyModels/index.tsx b/web/screens/Settings/MyModels/index.tsx
index 547e6153b..218f8cb62 100644
--- a/web/screens/Settings/MyModels/index.tsx
+++ b/web/screens/Settings/MyModels/index.tsx
@@ -116,7 +116,12 @@ const MyModels = () => {
     getAllSettings()
   }, [])
 
-  const findByEngine = filteredDownloadedModels.map((x) => x.engine)
+  const findByEngine = filteredDownloadedModels.map((x) => {
+    // Legacy engine support - they will be grouped under Cortex LlamaCPP
+    if (x.engine === InferenceEngine.nitro)
+      return InferenceEngine.cortex_llamacpp
+    return x.engine
+  })
   const groupByEngine = findByEngine
     .filter(function (item, index) {
       if (findByEngine.indexOf(item) === index) return item
@@ -245,7 +250,12 @@ const MyModels = () => {
                     <div className="mt-2">
                       {filteredDownloadedModels
                         ? filteredDownloadedModels
-                            .filter((x) => x.engine === engine)
+                            .filter(
+                              (x) =>
+                                x.engine === engine ||
+                                (x.engine === InferenceEngine.nitro &&
+                                  engine === InferenceEngine.cortex_llamacpp)
+                            )
                             .map((model) => {
                               if (!showModel) return null
                               return (
diff --git a/web/screens/Settings/SelectingModelModal/index.tsx b/web/screens/Settings/SelectingModelModal/index.tsx
index 6273d0032..9a2f4fe82 100644
--- a/web/screens/Settings/SelectingModelModal/index.tsx
+++ b/web/screens/Settings/SelectingModelModal/index.tsx
@@ -21,7 +21,7 @@ const SelectingModelModal = () => {
 
   const onSelectFileClick = useCallback(async () => {
     const platform = (await systemInformation()).osInfo?.platform
-    if (platform === 'win32') {
+    if (platform !== 'darwin') {
       setImportModelStage('CHOOSE_WHAT_TO_IMPORT')
       return
     }
diff --git a/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx b/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
index 3127f1578..647263ffe 100644
--- a/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
+++ b/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
@@ -7,7 +7,13 @@ import {
 } from '@janhq/core'
 
 import { Input } from '@janhq/joi'
-import { CopyIcon, EyeIcon, FolderOpenIcon } from 'lucide-react'
+import {
+  CheckIcon,
+  CopyIcon,
+  EyeIcon,
+  EyeOffIcon,
+  FolderOpenIcon,
+} from 'lucide-react'
 import { Marked, Renderer } from 'marked'
 
 type Props = {
@@ -34,6 +40,7 @@ const SettingDetailTextInputItem = ({
   const { value, type, placeholder, textAlign, inputActions } =
     settingProps.controllerProps as InputComponentProps
   const [obscure, setObscure] = useState(type === 'password')
+  const [copied, setCopied] = useState(false)
 
   const description = marked.parse(settingProps.description ?? '', {
     async: false,
@@ -45,6 +52,10 @@ const SettingDetailTextInputItem = ({
 
   const copy = useCallback(() => {
     navigator.clipboard.writeText(value)
+    if (value.length > 0) {
+      setCopied(true)
+    }
+    setTimeout(() => setCopied(false), 2000) // Reset icon after 2 seconds
   }, [value])
 
   const onAction = useCallback(
@@ -84,6 +95,8 @@ const SettingDetailTextInputItem = ({
             <InputExtraActions
               actions={inputActions ?? []}
               onAction={onAction}
+              copied={copied}
+              obscure={obscure}
             />
           }
         />
@@ -95,33 +108,51 @@ const SettingDetailTextInputItem = ({
 type InputActionProps = {
   actions: InputAction[]
   onAction: (action: InputAction) => void
+  copied: boolean
+  obscure: boolean
 }
 
 const InputExtraActions: React.FC<InputActionProps> = ({
   actions,
   onAction,
+  copied,
+  obscure,
 }) => {
   if (actions.length === 0) return <Fragment />
 
   return (
     <div className="flex flex-row space-x-2">
       {actions.map((action) => {
+        console.log(action)
         switch (action) {
           case 'copy':
-            return (
+            return copied ? (
+              <CheckIcon
+                key={action}
+                size={16}
+                onClick={() => onAction('copy')}
+                className="text-green-600"
+              />
+            ) : (
               <CopyIcon
                 key={action}
                 size={16}
-                onClick={() => onAction(action)}
+                onClick={() => onAction('copy')}
               />
             )
 
           case 'unobscure':
-            return (
+            return obscure ? (
               <EyeIcon
                 key={action}
                 size={16}
-                onClick={() => onAction(action)}
+                onClick={() => onAction('unobscure')}
+              />
+            ) : (
+              <EyeOffIcon
+                key={action}
+                size={16}
+                onClick={() => onAction('unobscure')}
               />
             )
 
diff --git a/web/screens/Settings/SettingLeftPanel/SettingItem/index.tsx b/web/screens/Settings/SettingLeftPanel/SettingItem/index.tsx
index 505a2cf40..b8c1994c5 100644
--- a/web/screens/Settings/SettingLeftPanel/SettingItem/index.tsx
+++ b/web/screens/Settings/SettingLeftPanel/SettingItem/index.tsx
@@ -1,6 +1,5 @@
 import { useCallback } from 'react'
 
-import { motion as m } from 'framer-motion'
 import { useAtom } from 'jotai'
 
 import { twMerge } from 'tailwind-merge'
@@ -22,23 +21,20 @@ const SettingItem = ({ name, setting }: Props) => {
 
   return (
     <div
-      className="relative my-0.5 block cursor-pointer rounded-lg px-2 py-1.5 hover:bg-[hsla(var(--left-panel-menu-hover))]"
+      className={twMerge(
+        'relative my-0.5 block cursor-pointer rounded-lg px-2 py-1.5 hover:bg-[hsla(var(--left-panel-menu-hover))]',
+        isActive && 'rounded-lg bg-[hsla(var(--left-panel-icon-active-bg))]'
+      )}
       onClick={onSettingItemClick}
     >
       <span
         className={twMerge(
-          'font-medium capitalize text-[hsla(var(--left-panel-menu))]',
+          'p-1.5 font-medium capitalize text-[hsla(var(--left-panel-menu))]',
           isActive && 'relative z-10 text-[hsla(var(--left-panel-menu-active))]'
         )}
       >
         {name}
       </span>
-      {isActive && (
-        <m.div
-          className="absolute inset-0 -left-0.5 h-full w-[calc(100%+4px)] rounded-lg bg-[hsla(var(--left-panel-icon-active-bg))]"
-          layoutId="active-static-menu"
-        />
-      )}
     </div>
   )
 }
diff --git a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
index 4dab6bfa8..f73efb486 100644
--- a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
@@ -38,20 +38,20 @@ const AssistantSetting: React.FC<Props> = ({ componentData }) => {
         (key === 'chunk_overlap' || key === 'chunk_size')
       ) {
         if (
-          activeThread.assistants[0].tools[0]?.settings.chunk_size <
-          activeThread.assistants[0].tools[0]?.settings.chunk_overlap
+          activeThread.assistants[0].tools[0]?.settings?.chunk_size <
+          activeThread.assistants[0].tools[0]?.settings?.chunk_overlap
         ) {
           activeThread.assistants[0].tools[0].settings.chunk_overlap =
             activeThread.assistants[0].tools[0].settings.chunk_size
         }
         if (
           key === 'chunk_size' &&
-          value < activeThread.assistants[0].tools[0].settings.chunk_overlap
+          value < activeThread.assistants[0].tools[0].settings?.chunk_overlap
         ) {
           activeThread.assistants[0].tools[0].settings.chunk_overlap = value
         } else if (
           key === 'chunk_overlap' &&
-          value > activeThread.assistants[0].tools[0].settings.chunk_size
+          value > activeThread.assistants[0].tools[0].settings?.chunk_size
         ) {
           activeThread.assistants[0].tools[0].settings.chunk_size = value
         }
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx
index 6fc05d44b..403370ade 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx
@@ -1,6 +1,5 @@
 import { memo } from 'react'
 
-import { InferenceEngine } from '@janhq/core'
 import { Button } from '@janhq/joi'
 import { useAtomValue, useSetAtom } from 'jotai'
 
@@ -8,6 +7,8 @@ import LogoMark from '@/containers/Brand/Logo/Mark'
 
 import { MainViewState } from '@/constants/screens'
 
+import { isLocalEngine } from '@/utils/modelEngine'
+
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 
@@ -15,8 +16,7 @@ const EmptyThread = () => {
   const downloadedModels = useAtomValue(downloadedModelsAtom)
   const setMainViewState = useSetAtom(mainViewStateAtom)
   const showOnboardingStep =
-    downloadedModels.filter((e) => e.engine === InferenceEngine.nitro)
-      .length === 0
+    downloadedModels.filter((e) => isLocalEngine(e.engine)).length === 0
 
   return (
     <div className="mx-auto flex h-full flex-col items-center justify-center text-center">
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
index 0ef9a9ba1..0433a8688 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
@@ -69,7 +69,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
       return x.id === recommendModel[0] || x.id === recommendModel[1]
     } else {
       return (
-        x.metadata.tags.includes('Featured') && x.metadata.size < 5000000000
+        x.metadata?.tags?.includes('Featured') && x.metadata?.size < 5000000000
       )
     }
   })
@@ -143,7 +143,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                     ) : (
                       filteredModels.map((model) => {
                         const isDownloading = downloadingModels.some(
-                          (md) => md.id === model.id
+                          (md) => md === model.id
                         )
                         return (
                           <div
@@ -161,13 +161,19 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                             </div>
                             <div className="flex items-center gap-2 text-[hsla(var(--text-tertiary))]">
                               <span className="font-medium">
-                                {toGibibytes(model.metadata.size)}
+                                {toGibibytes(model.metadata?.size)}
                               </span>
                               {!isDownloading ? (
                                 <DownloadCloudIcon
                                   size={18}
                                   className="cursor-pointer text-[hsla(var(--app-link))]"
-                                  onClick={() => downloadModel(model)}
+                                  onClick={() =>
+                                    downloadModel(
+                                      model.sources[0].url,
+                                      model.id,
+                                      model.name
+                                    )
+                                  }
                                 />
                               ) : (
                                 Object.values(downloadStates)
@@ -210,24 +216,24 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
 
                 {featuredModel.slice(0, 2).map((featModel) => {
                   const isDownloading = downloadingModels.some(
-                    (md) => md.id === featModel.id
+                    (md) => md === featModel.id
                   )
                   return (
                     <div
                       key={featModel.id}
-                      className="my-2 flex items-center justify-between gap-2 border-b border-[hsla(var(--app-border))] pb-4 pt-1 last:border-none"
+                      className="my-2 flex items-start justify-between gap-2 border-b border-[hsla(var(--app-border))] pb-4 pt-1 last:border-none"
                     >
                       <div className="w-full text-left">
-                        <h6 className="font-medium">{featModel.name}</h6>
+                        <h6 className="mt-1.5 font-medium">{featModel.name}</h6>
                       </div>
 
                       {isDownloading ? (
-                        <div className="flex w-full items-center gap-2">
+                        <div className="flex w-full flex-col items-end gap-2">
                           {Object.values(downloadStates)
                             .filter((x) => x.modelId === featModel.id)
                             .map((item, i) => (
                               <div
-                                className="flex w-full items-center gap-2"
+                                className="mt-1.5 flex w-full items-center gap-2"
                                 key={i}
                               >
                                 <Progress
@@ -247,18 +253,27 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                                 </div>
                               </div>
                             ))}
+                          <span className="text-[hsla(var(--text-secondary))]">
+                            {toGibibytes(featModel.metadata?.size)}
+                          </span>
                         </div>
                       ) : (
                         <div className="flex flex-col items-end justify-end gap-2">
                           <Button
                             theme="ghost"
                             className="!bg-[hsla(var(--secondary-bg))]"
-                            onClick={() => downloadModel(featModel)}
+                            onClick={() =>
+                              downloadModel(
+                                featModel.sources[0].url,
+                                featModel.id,
+                                featModel.name
+                              )
+                            }
                           >
                             Download
                           </Button>
                           <span className="text-[hsla(var(--text-secondary))]">
-                            {toGibibytes(featModel.metadata.size)}
+                            {toGibibytes(featModel.metadata?.size)}
                           </span>
                         </div>
                       )}
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
index 6e6beda07..70fecb8a9 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
@@ -5,7 +5,7 @@ import { MessageStatus } from '@janhq/core'
 import hljs from 'highlight.js'
 
 import { useAtom, useAtomValue } from 'jotai'
-import { BaseEditor, createEditor, Editor, Element, Transforms } from 'slate'
+import { BaseEditor, createEditor, Editor, Transforms } from 'slate'
 import { withHistory } from 'slate-history' // Import withHistory
 import {
   Editable,
@@ -69,6 +69,9 @@ const RichTextEditor = ({
 }: RichTextEditorProps) => {
   const [editor] = useState(() => withHistory(withReact(createEditor())))
   const currentLanguage = useRef<string>('plaintext')
+  const hasStartBackticks = useRef<boolean>(false)
+  const hasEndBackticks = useRef<boolean>(false)
+
   const [currentPrompt, setCurrentPrompt] = useAtom(currentPromptAtom)
   const textareaRef = useRef<HTMLDivElement>(null)
   const activeThreadId = useAtomValue(getActiveThreadIdAtom)
@@ -129,57 +132,35 @@ const RichTextEditor = ({
         })
       }
 
-      if (Editor.isBlock(editor, node) && node.type === 'code') {
+      if (Editor.isBlock(editor, node) && node.type === 'paragraph') {
         node.children.forEach((child: { text: any }, childIndex: number) => {
           const text = child.text
 
-          // Match code block start and end
-          const startMatch = text.match(/^```(\w*)$/)
-          const endMatch = text.match(/^```$/)
-          const inlineMatch = text.match(/^`([^`]+)`$/) // Match inline code
+          const codeBlockStartRegex = /```(\w*)/g
+          const matches = [...currentPrompt.matchAll(codeBlockStartRegex)]
 
-          if (startMatch) {
+          if (matches.length % 2 !== 0) {
+            hasEndBackticks.current = false
+          }
+
+          // Match code block start and end
+          const lang = text.match(/^```(\w*)$/)
+          const endMatch = text.match(/^```$/)
+
+          if (lang) {
             // If it's the start of a code block, store the language
-            currentLanguage.current = startMatch[1] || 'plaintext'
+            currentLanguage.current = lang[1] || 'plaintext'
           } else if (endMatch) {
             // Reset language when code block ends
             currentLanguage.current = 'plaintext'
-          } else if (inlineMatch) {
-            // Apply syntax highlighting to inline code
-            const codeContent = inlineMatch[1] // Get the content within the backticks
-            try {
-              hljs.highlight(codeContent, {
-                language:
-                  currentLanguage.current.length > 1
-                    ? currentLanguage.current
-                    : 'plaintext',
-              }).value
-            } catch (err) {
-              hljs.highlight(codeContent, {
-                language: 'javascript',
-              }).value
-            }
-
-            // Calculate the range for the inline code
-            const length = codeContent.length
-            ranges.push({
-              anchor: {
-                path: [...path, childIndex],
-                offset: inlineMatch.index + 1,
-              },
-              focus: {
-                path: [...path, childIndex],
-                offset: inlineMatch.index + 1 + length,
-              },
-              type: 'code',
-              code: true,
-              language: currentLanguage.current,
-              className: '', // Specify class name if needed
-            })
-          } else if (currentLanguage.current !== 'plaintext') {
+          } else if (
+            hasStartBackticks.current &&
+            hasEndBackticks.current &&
+            currentLanguage.current !== 'plaintext'
+          ) {
             // Highlight entire code line if in a code block
-            const leadingSpaces = text.match(/^\s*/)?.[0] ?? '' // Capture leading spaces
-            const codeContent = text.trimStart() // Remove leading spaces for highlighting
+
+            const codeContent = text.trim() // Remove leading spaces for highlighting
 
             let highlighted = ''
             highlighted = hljs.highlightAuto(codeContent).value
@@ -201,21 +182,9 @@ const RichTextEditor = ({
 
             let slateTextIndex = 0
 
-            // Adjust to include leading spaces in the ranges and preserve formatting
-            ranges.push({
-              anchor: { path: [...path, childIndex], offset: 0 },
-              focus: {
-                path: [...path, childIndex],
-                offset: leadingSpaces.length,
-              },
-              type: 'code',
-              code: true,
-              language: currentLanguage.current,
-              className: '', // No class for leading spaces
-            })
-
             doc.body.childNodes.forEach((childNode) => {
               const childText = childNode.textContent || ''
+
               const length = childText.length
               const className =
                 childNode.nodeType === Node.ELEMENT_NODE
@@ -225,11 +194,11 @@ const RichTextEditor = ({
               ranges.push({
                 anchor: {
                   path: [...path, childIndex],
-                  offset: slateTextIndex + leadingSpaces.length,
+                  offset: slateTextIndex,
                 },
                 focus: {
                   path: [...path, childIndex],
-                  offset: slateTextIndex + leadingSpaces.length + length,
+                  offset: slateTextIndex + length,
                 },
                 type: 'code',
                 code: true,
@@ -240,6 +209,7 @@ const RichTextEditor = ({
               slateTextIndex += length
             })
           } else {
+            currentLanguage.current = 'plaintext'
             ranges.push({
               anchor: { path: [...path, childIndex], offset: 0 },
               focus: { path: [...path, childIndex], offset: text.length },
@@ -252,7 +222,7 @@ const RichTextEditor = ({
 
       return ranges
     },
-    [editor]
+    [currentPrompt, editor]
   )
 
   // RenderLeaf applies the decoration styles
@@ -287,10 +257,13 @@ const RichTextEditor = ({
   )
 
   useEffect(() => {
+    if (!ReactEditor.isFocused(editor)) {
+      ReactEditor.focus(editor)
+    }
     if (textareaRef.current) {
       textareaRef.current.focus()
     }
-  }, [activeThreadId])
+  }, [activeThreadId, editor])
 
   useEffect(() => {
     if (textareaRef.current?.clientHeight) {
@@ -298,6 +271,10 @@ const RichTextEditor = ({
         ? '100px'
         : '40px'
       textareaRef.current.style.height = textareaRef.current.scrollHeight + 'px'
+      textareaRef.current?.scrollTo({
+        top: textareaRef.current.scrollHeight,
+        behavior: 'instant',
+      })
       textareaRef.current.style.overflow =
         textareaRef.current.clientHeight >= 390 ? 'auto' : 'hidden'
     }
@@ -341,35 +318,6 @@ const RichTextEditor = ({
           resetEditor()
         } else onStopInferenceClick()
       }
-
-      if (event.key === '`') {
-        // Determine whether any of the currently selected blocks are code blocks.
-        const [match] = Editor.nodes(editor, {
-          match: (n) =>
-            Element.isElement(n) && (n as CustomElement).type === 'code',
-        })
-        // Toggle the block type dependsing on whether there's already a match.
-        Transforms.setNodes(
-          editor,
-          { type: match ? 'paragraph' : 'code' },
-          { match: (n) => Element.isElement(n) && Editor.isBlock(editor, n) }
-        )
-      }
-
-      if (event.key === 'Tab') {
-        const [match] = Editor.nodes(editor, {
-          match: (n) => {
-            return (n as CustomElement).type === 'code'
-          },
-          mode: 'lowest',
-        })
-
-        if (match) {
-          event.preventDefault()
-          // Insert a tab character
-          Editor.insertText(editor, '  ') // Insert 2 spaces
-        }
-      }
     },
     // eslint-disable-next-line react-hooks/exhaustive-deps
     [currentPrompt, editor, messages]
@@ -390,6 +338,25 @@ const RichTextEditor = ({
           .join('\n')
 
         setCurrentPrompt(combinedText)
+        if (combinedText.trim() === '') {
+          currentLanguage.current = 'plaintext'
+        }
+        const hasCodeBlockStart = combinedText.match(/^```(\w*)/m)
+        const hasCodeBlockEnd = combinedText.match(/^```$/m)
+
+        // Set language to plaintext if no code block with language identifier is found
+        if (!hasCodeBlockStart) {
+          currentLanguage.current = 'plaintext'
+          hasStartBackticks.current = false
+        } else {
+          hasStartBackticks.current = true
+        }
+        if (!hasCodeBlockEnd) {
+          currentLanguage.current = 'plaintext'
+          hasEndBackticks.current = false
+        } else {
+          hasEndBackticks.current = true
+        }
       }}
     >
       <Editable
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
index 83a68fa8a..066c93430 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
@@ -125,7 +125,6 @@ const ChatInput = () => {
     <div className="relative p-4 pb-2">
       <div className="relative flex w-full flex-col">
         {renderPreview(fileUpload)}
-
         <RichTextEditor
           className={twMerge(
             'relative mb-1 max-h-[400px] resize-none rounded-lg border border-[hsla(var(--app-border))] p-3 pr-20',
@@ -141,7 +140,7 @@ const ChatInput = () => {
           disabled={stateModel.loading || !activeThread}
         />
         <TextArea
-          className="absolute inset-0 top-14 h-0 w-0"
+          className="sr-only"
           data-testid="txt-input-chat"
           onChange={(e) => setCurrentPrompt(e.target.value)}
         />
@@ -156,7 +155,7 @@ const ChatInput = () => {
                     fileUpload.length > 0 ||
                     (activeThread?.assistants[0].tools &&
                       !activeThread?.assistants[0].tools[0]?.enabled &&
-                      !activeThread?.assistants[0].model.settings.vision_model)
+                      !activeThread?.assistants[0].model.settings?.vision_model)
                   ) {
                     e.stopPropagation()
                   } else {
@@ -181,7 +180,7 @@ const ChatInput = () => {
                   (activeThread?.assistants[0].tools &&
                     !activeThread?.assistants[0].tools[0]?.enabled &&
                     !activeThread?.assistants[0].model.settings
-                      .vision_model && (
+                      ?.vision_model && (
                       <>
                         {fileUpload.length !== 0 && (
                           <span>
@@ -222,13 +221,13 @@ const ChatInput = () => {
                   <li
                     className={twMerge(
                       'text-[hsla(var(--text-secondary)] hover:bg-secondary flex w-full items-center space-x-2 px-4 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                      activeThread?.assistants[0].model.settings.vision_model
+                      activeThread?.assistants[0].model.settings?.vision_model
                         ? 'cursor-pointer'
                         : 'cursor-not-allowed opacity-50'
                     )}
                     onClick={() => {
                       if (
-                        activeThread?.assistants[0].model.settings.vision_model
+                        activeThread?.assistants[0].model.settings?.vision_model
                       ) {
                         imageInputRef.current?.click()
                         setShowAttacmentMenus(false)
@@ -241,7 +240,7 @@ const ChatInput = () => {
                 }
                 content="This feature only supports multimodal models."
                 disabled={
-                  activeThread?.assistants[0].model.settings.vision_model
+                  activeThread?.assistants[0].model.settings?.vision_model
                 }
               />
               <Tooltip
@@ -250,7 +249,7 @@ const ChatInput = () => {
                   <li
                     className={twMerge(
                       'text-[hsla(var(--text-secondary)] hover:bg-secondary flex w-full cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                      activeThread?.assistants[0].model.settings.text_model ===
+                      activeThread?.assistants[0].model.settings?.text_model ===
                         false
                         ? 'cursor-not-allowed opacity-50'
                         : 'cursor-pointer'
@@ -258,7 +257,7 @@ const ChatInput = () => {
                     onClick={() => {
                       if (
                         activeThread?.assistants[0].model.settings
-                          .text_model !== false
+                          ?.text_model !== false
                       ) {
                         fileInputRef.current?.click()
                         setShowAttacmentMenus(false)
@@ -272,11 +271,11 @@ const ChatInput = () => {
                 content={
                   (!activeThread?.assistants[0].tools ||
                     !activeThread?.assistants[0].tools[0]?.enabled ||
-                    activeThread?.assistants[0].model.settings.text_model ===
+                    activeThread?.assistants[0].model.settings?.text_model ===
                       false) && (
                     <>
-                      {activeThread?.assistants[0].model.settings.text_model ===
-                      false ? (
+                      {activeThread?.assistants[0].model.settings
+                        ?.text_model === false ? (
                         <span>
                           This model does not support text-based retrieval.
                         </span>
diff --git a/web/screens/Thread/ThreadCenterPanel/EditChatInput/index.tsx b/web/screens/Thread/ThreadCenterPanel/EditChatInput/index.tsx
index 8b57a54b9..a98d14e7d 100644
--- a/web/screens/Thread/ThreadCenterPanel/EditChatInput/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/EditChatInput/index.tsx
@@ -57,6 +57,8 @@ const EditChatInput: React.FC<Props> = ({ message }) => {
   useEffect(() => {
     if (textareaRef.current) {
       textareaRef.current.focus()
+      const length = textareaRef.current.value.length
+      textareaRef.current.setSelectionRange(length, length)
     }
   }, [activeThreadId])
 
diff --git a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
index f5f74f9c9..0420b7d51 100644
--- a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
@@ -64,55 +64,12 @@ const LoadModelError = () => {
           to continue using it.
         </p>
       )
-    } else if (
-      settings &&
-      settings.run_mode === 'gpu' &&
-      !settings.vulkan &&
-      (!settings.nvidia_driver?.exist || !settings.cuda?.exist)
-    ) {
-      return (
-        <>
-          {!settings?.cuda.exist ? (
-            <p>
-              The CUDA toolkit may be unavailable. Please use the{' '}
-              <span
-                className="cursor-pointer font-medium text-[hsla(var(--app-link))]"
-                onClick={() => {
-                  setMainState(MainViewState.Settings)
-                  if (activeThread?.assistants[0]?.model.engine) {
-                    const engine = EngineManager.instance().get(
-                      activeThread.assistants[0].model.engine
-                    )
-                    engine?.name && setSelectedSettingScreen(engine.name)
-                  }
-                }}
-              >
-                Install Additional Dependencies
-              </span>{' '}
-              setting to proceed with the download / installation process.
-            </p>
-          ) : (
-            <div>
-              Problem with Nvidia drivers. Please follow the{' '}
-              <a
-                className="font-medium text-[hsla(var(--app-link))]"
-                href="https://www.nvidia.com/Download/index.aspx"
-                target="_blank"
-              >
-                Nvidia Drivers guideline
-              </a>{' '}
-              to access installation instructions and ensure proper functioning
-              of the application.
-            </div>
-          )}
-        </>
-      )
     } else {
       return (
         <div>
-          Apologies, something’s amiss!
+          Apologies, {`Something's wrong.`}.&nbsp;
           <p>
-            Jan’s in beta. Access&nbsp;
+            Access&nbsp;
             <span
               className="cursor-pointer text-[hsla(var(--app-link))]"
               onClick={() => setModalTroubleShooting(true)}
diff --git a/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx b/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx
index cdf865ceb..c4a97a6b9 100644
--- a/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx
@@ -74,7 +74,7 @@ const MessageToolbar = ({ message }: { message: ThreadMessage }) => {
           )[
             messages.filter((msg) => msg.role === ChatCompletionRole.Assistant)
               .length - 1
-          ]?.content[0].text.value,
+          ]?.content[0]?.text.value,
         },
       }
 
diff --git a/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx b/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
index febb63e78..12bcf7a4d 100644
--- a/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
@@ -276,18 +276,18 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
             </div>
           )}
 
-          {editMessage === props.id && (
+          {editMessage === props.id ? (
             <div>
               <EditChatInput message={props} />
             </div>
+          ) : (
+            <div
+              className={twMerge(
+                'message max-width-[100%] flex flex-col gap-y-2 overflow-auto break-all leading-relaxed	'
+              )}
+              dangerouslySetInnerHTML={{ __html: parsedText }}
+            />
           )}
-
-          <div
-            className={twMerge(
-              'message max-width-[100%] flex flex-col gap-y-2 overflow-auto leading-relaxed'
-            )}
-            dangerouslySetInnerHTML={{ __html: parsedText }}
-          />
         </>
       </div>
     </div>
diff --git a/web/screens/Thread/ThreadCenterPanel/index.tsx b/web/screens/Thread/ThreadCenterPanel/index.tsx
index b12f859bd..1f23e9dc5 100644
--- a/web/screens/Thread/ThreadCenterPanel/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/index.tsx
@@ -1,6 +1,6 @@
 /* eslint-disable @typescript-eslint/naming-convention */
 
-import { Fragment, useEffect, useState } from 'react'
+import { useEffect, useState } from 'react'
 
 import { Accept, useDropzone } from 'react-dropzone'
 
@@ -25,6 +25,7 @@ import ChatBody from '@/screens/Thread/ThreadCenterPanel/ChatBody'
 import ChatInput from './ChatInput'
 import RequestDownloadModel from './RequestDownloadModel'
 
+import { showSystemMonitorPanelAtom } from '@/helpers/atoms/App.atom'
 import { experimentalFeatureEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
@@ -56,7 +57,7 @@ const ThreadCenterPanel = () => {
   const activeThread = useAtomValue(activeThreadAtom)
 
   const acceptedFormat: Accept = activeThread?.assistants[0].model.settings
-    .vision_model
+    ?.vision_model
     ? {
         'application/pdf': ['.pdf'],
         'image/jpeg': ['.jpeg'],
@@ -79,7 +80,7 @@ const ThreadCenterPanel = () => {
         e.dataTransfer.items.length === 1 &&
         ((activeThread?.assistants[0].tools &&
           activeThread?.assistants[0].tools[0]?.enabled) ||
-          activeThread?.assistants[0].model.settings.vision_model)
+          activeThread?.assistants[0].model.settings?.vision_model)
       ) {
         setDragOver(true)
       } else if (
@@ -101,7 +102,7 @@ const ThreadCenterPanel = () => {
         rejectFiles.length !== 0 ||
         (activeThread?.assistants[0].tools &&
           !activeThread?.assistants[0].tools[0]?.enabled &&
-          !activeThread?.assistants[0].model.settings.vision_model)
+          !activeThread?.assistants[0].model.settings?.vision_model)
       )
         return
       const imageType = files[0]?.type.includes('image')
@@ -144,6 +145,22 @@ const ThreadCenterPanel = () => {
 
   const isGeneratingResponse = useAtomValue(isGeneratingResponseAtom)
 
+  const showSystemMonitorPanel = useAtomValue(showSystemMonitorPanelAtom)
+
+  const [height, setHeight] = useState<number>(0)
+
+  useEffect(() => {
+    if (showSystemMonitorPanel) {
+      const element = document.querySelector('.system-monitor-panel')
+
+      if (element) {
+        setHeight(element.clientHeight) // You can also use offsetHeight if needed
+      }
+    } else {
+      setHeight(0)
+    }
+  }, [showSystemMonitorPanel])
+
   return (
     <CenterPanelContainer>
       <div
@@ -170,7 +187,7 @@ const ThreadCenterPanel = () => {
                     {isDragReject
                       ? `Currently, we only support 1 attachment at the same time with ${
                           activeThread?.assistants[0].model.settings
-                            .vision_model
+                            ?.vision_model
                             ? 'PDF, JPEG, JPG, PNG'
                             : 'PDF'
                         } format`
@@ -178,7 +195,7 @@ const ThreadCenterPanel = () => {
                   </h6>
                   {!isDragReject && (
                     <p className="mt-2">
-                      {activeThread?.assistants[0].model.settings.vision_model
+                      {activeThread?.assistants[0].model.settings?.vision_model
                         ? 'PDF, JPEG, JPG, PNG'
                         : 'PDF'}
                     </p>
@@ -188,7 +205,13 @@ const ThreadCenterPanel = () => {
             </div>
           </div>
         )}
-        <div className="flex h-full w-full flex-col justify-between">
+        <div
+          className={twMerge(
+            'flex h-full w-full flex-col justify-between'
+            // showSystemMonitorPanel && `h-[calc(100%-${height}px)]`
+          )}
+          style={{ height: `calc(100% - ${height}px)` }}
+        >
           {activeThread ? (
             <div className="flex h-full w-full overflow-x-hidden">
               <ChatBody />
@@ -199,16 +222,7 @@ const ThreadCenterPanel = () => {
 
           {!engineParamsUpdate && <ModelStart />}
 
-          {reloadModel && (
-            <Fragment>
-              <ModelReload />
-              <div className="mb-2 text-center">
-                <span className="text-[hsla(var(--text-secondary)]">
-                  Model is reloading to apply new changes.
-                </span>
-              </div>
-            </Fragment>
-          )}
+          {reloadModel && <ModelReload />}
 
           {activeModel && isGeneratingResponse && <GenerateResponse />}
           <ChatInput />
diff --git a/web/screens/Thread/ThreadLeftPanel/index.tsx b/web/screens/Thread/ThreadLeftPanel/index.tsx
index 67bc5db0c..aca21c8a2 100644
--- a/web/screens/Thread/ThreadLeftPanel/index.tsx
+++ b/web/screens/Thread/ThreadLeftPanel/index.tsx
@@ -3,7 +3,6 @@ import { useCallback, useEffect, useState } from 'react'
 import { Thread } from '@janhq/core'
 
 import { Button } from '@janhq/joi'
-import { motion as m } from 'framer-motion'
 import { useAtomValue, useSetAtom } from 'jotai'
 import {
   GalleryHorizontalEndIcon,
@@ -118,7 +117,9 @@ const ThreadLeftPanel = () => {
             <div
               key={thread.id}
               className={twMerge(
-                `group/message relative mb-1 flex cursor-pointer flex-col transition-all hover:rounded-lg hover:bg-[hsla(var(--left-panel-menu-hover))]`
+                `group/message relative mb-1 flex cursor-pointer flex-col transition-all hover:rounded-lg hover:bg-[hsla(var(--left-panel-menu-hover))]`,
+                activeThreadId === thread.id &&
+                  'rounded-lg bg-[hsla(var(--left-panel-icon-active-bg))]'
               )}
               onClick={() => {
                 onThreadClick(thread)
@@ -208,12 +209,6 @@ const ThreadLeftPanel = () => {
                   </div>
                 </div>
               </div>
-              {activeThreadId === thread.id && (
-                <m.div
-                  className="absolute inset-0 left-0 h-full w-full rounded-lg bg-[hsla(var(--left-panel-icon-active-bg))]"
-                  layoutId="active-thread"
-                />
-              )}
             </div>
           ))}
         </div>
diff --git a/web/screens/Thread/ThreadRightPanel/index.tsx b/web/screens/Thread/ThreadRightPanel/index.tsx
index 0bf917015..674c97766 100644
--- a/web/screens/Thread/ThreadRightPanel/index.tsx
+++ b/web/screens/Thread/ThreadRightPanel/index.tsx
@@ -4,6 +4,8 @@ import {
   InferenceEngine,
   SettingComponentProps,
   SliderComponentProps,
+  extractInferenceParams,
+  extractModelLoadParams,
 } from '@janhq/core'
 import {
   Tabs,
@@ -15,6 +17,8 @@ import {
 
 import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 
+import { useDebouncedCallback } from 'use-debounce'
+
 import CopyOverInstruction from '@/containers/CopyInstruction'
 import EngineSetting from '@/containers/EngineSetting'
 import ModelDropdown from '@/containers/ModelDropdown'
@@ -29,10 +33,6 @@ import useUpdateModelParameters from '@/hooks/useUpdateModelParameters'
 
 import { getConfigurationsData } from '@/utils/componentSettings'
 import { isLocalEngine } from '@/utils/modelEngine'
-import {
-  extractInferenceParams,
-  extractModelLoadParams,
-} from '@/utils/modelParam'
 
 import PromptTemplateSetting from './PromptTemplateSetting'
 import Tools from './Tools'
@@ -168,6 +168,10 @@ const ThreadRightPanel = () => {
     [activeThread, updateThreadMetadata]
   )
 
+  const resetModel = useDebouncedCallback(() => {
+    stopModel()
+  }, 300)
+
   const onValueChanged = useCallback(
     (key: string, value: string | number | boolean) => {
       if (!activeThread) {
@@ -175,15 +179,15 @@ const ThreadRightPanel = () => {
       }
 
       setEngineParamsUpdate(true)
-      stopModel()
+      resetModel()
 
       updateModelParameter(activeThread, {
         params: { [key]: value },
       })
 
       if (
-        activeThread.assistants[0].model.parameters.max_tokens &&
-        activeThread.assistants[0].model.settings.ctx_len
+        activeThread.assistants[0].model.parameters?.max_tokens &&
+        activeThread.assistants[0].model.settings?.ctx_len
       ) {
         if (
           key === 'max_tokens' &&
@@ -207,7 +211,7 @@ const ThreadRightPanel = () => {
         }
       }
     },
-    [activeThread, setEngineParamsUpdate, stopModel, updateModelParameter]
+    [activeThread, resetModel, setEngineParamsUpdate, updateModelParameter]
   )
 
   if (!activeThread) {
diff --git a/web/services/appService.test.ts b/web/services/appService.test.ts
index 37053f930..5172ea6ed 100644
--- a/web/services/appService.test.ts
+++ b/web/services/appService.test.ts
@@ -1,30 +1,32 @@
-
-import { ExtensionTypeEnum, extensionManager } from '@/extension';
-import { appService } from './appService';
+import { extensionManager } from '@/extension'
+import { appService } from './appService'
 
 test('should return correct system information when monitoring extension is found', async () => {
-  const mockGpuSetting = { name: 'NVIDIA GeForce GTX 1080', memory: 8192 };
-  const mockOsInfo = { platform: 'win32', release: '10.0.19041' };
+  const mockGpuSetting = { name: 'NVIDIA GeForce GTX 1080', memory: 8192 }
+  const mockOsInfo = { platform: 'win32', release: '10.0.19041' }
   const mockMonitoringExtension = {
     getGpuSetting: jest.fn().mockResolvedValue(mockGpuSetting),
     getOsInfo: jest.fn().mockResolvedValue(mockOsInfo),
-  };
-  extensionManager.get = jest.fn().mockReturnValue(mockMonitoringExtension);
-  
-  const result = await appService.systemInformation();
-  
-  expect(mockMonitoringExtension.getGpuSetting).toHaveBeenCalled();
-  expect(mockMonitoringExtension.getOsInfo).toHaveBeenCalled();
-  expect(result).toEqual({ gpuSetting: mockGpuSetting, osInfo: mockOsInfo });
-});
+  }
+  extensionManager.get = jest.fn().mockReturnValue(mockMonitoringExtension)
 
+  const result = await appService.systemInformation()
+
+  expect(mockMonitoringExtension.getGpuSetting).toHaveBeenCalled()
+  expect(mockMonitoringExtension.getOsInfo).toHaveBeenCalled()
+  expect(result).toEqual({ gpuSetting: mockGpuSetting, osInfo: mockOsInfo })
+})
 
 test('should log a warning when monitoring extension is not found', async () => {
-  const consoleWarnMock = jest.spyOn(console, 'warn').mockImplementation(() => {});
-  extensionManager.get = jest.fn().mockReturnValue(undefined);
-  
-  await appService.systemInformation();
-  
-  expect(consoleWarnMock).toHaveBeenCalledWith('System monitoring extension not found');
-  consoleWarnMock.mockRestore();
-});
+  const consoleWarnMock = jest
+    .spyOn(console, 'warn')
+    .mockImplementation(() => {})
+  extensionManager.get = jest.fn().mockReturnValue(undefined)
+
+  await appService.systemInformation()
+
+  expect(consoleWarnMock).toHaveBeenCalledWith(
+    'System monitoring extension not found'
+  )
+  consoleWarnMock.mockRestore()
+})
diff --git a/web/services/restService.ts b/web/services/restService.ts
index 73348caeb..3c1cfc6a8 100644
--- a/web/services/restService.ts
+++ b/web/services/restService.ts
@@ -9,7 +9,7 @@ export function openExternalUrl(url: string) {
 }
 
 // Define API routes based on different route types
-export const APIRoutes = [...CoreRoutes.map((r) => ({ path: `app`, route: r }))]
+export const APIRoutes = CoreRoutes.map((r) => ({ path: `app`, route: r }))
 
 // Define the restAPI object with methods for each API route
 export const restAPI = {
diff --git a/web/styles/components/code-block.scss b/web/styles/components/code-block.scss
index c90684d28..fed56884b 100644
--- a/web/styles/components/code-block.scss
+++ b/web/styles/components/code-block.scss
@@ -1,6 +1,6 @@
 .hljs-comment,
 .hljs-quote {
-  color: #d4d0ab;
+  color: var(--hljs-comment);
 }
 
 /* Red */
@@ -12,7 +12,7 @@
 .hljs-selector-class,
 .hljs-regexp,
 .hljs-deletion {
-  color: #ffa07a;
+  color: var(--hljs-variable);
 }
 
 /* Orange */
@@ -24,12 +24,12 @@
 .hljs-params,
 .hljs-meta,
 .hljs-link {
-  color: #f5ab35;
+  color: var(--hljs-number);
 }
 
 /* Yellow */
 .hljs-attribute {
-  color: #ffd700;
+  color: var(--hljs-attribute);
 }
 
 /* Green */
@@ -37,19 +37,19 @@
 .hljs-symbol,
 .hljs-bullet,
 .hljs-addition {
-  color: #abe338;
+  color: var(--hljs-string);
 }
 
 /* Blue */
 .hljs-title,
 .hljs-section {
-  color: #00e0e0;
+  color: var(--hljs-title);
 }
 
 /* Purple */
 .hljs-keyword,
 .hljs-selector-tag {
-  color: #dcc6e0;
+  color: var(--hljs-keyword);
 }
 
 .hljs {
@@ -70,7 +70,7 @@ pre > code {
   display: block;
   text-indent: 0;
   white-space: pre;
-  max-width: 40vw;
+  max-width: 10vw;
 }
 
 .hljs-emphasis {
diff --git a/web/styles/main.scss b/web/styles/main.scss
index 8e952af5c..a7e85fcb0 100644
--- a/web/styles/main.scss
+++ b/web/styles/main.scss
@@ -40,6 +40,14 @@
   --text-secondary: 0, 0%, 0%, 0.6;
   --text-tertiary: 0, 0%, 0%, 0.4;
   --text-quaternary: 0, 0%, 0%, 0.2;
+
+  --hljs-comment: #6e7781;
+  --hljs-variable: #cf222e;
+  --hljs-number: #bc4c00;
+  --hljs-attribute: #b58407;
+  --hljs-string: #116329;
+  --hljs-title: #0550ae;
+  --hljs-keyword: #8250df;
 }
 
 html.dark {
@@ -68,4 +76,12 @@ html.dark {
   --text-secondary: 0, 0%, 68%, 1;
   --text-tertiary: 0, 0%, 68%, 0.4;
   --text-quaternary: 0, 0%, 68%, 0.2;
+
+  --hljs-comment: #8b949e;
+  --hljs-variable: #ff7b72;
+  --hljs-number: #f0883e;
+  --hljs-attribute: #ffa657;
+  --hljs-string: #7ee787;
+  --hljs-title: #79c0ff;
+  --hljs-keyword: #d2a8ff;
 }
diff --git a/web/utils/converter.ts b/web/utils/converter.ts
index a0b05c9dd..017a05d1a 100644
--- a/web/utils/converter.ts
+++ b/web/utils/converter.ts
@@ -18,8 +18,8 @@ export const formatDownloadPercentage = (
   input: number,
   options?: { hidePercentage?: boolean }
 ) => {
-  if (options?.hidePercentage) return input * 100
-  return (input * 100).toFixed(2) + '%'
+  if (options?.hidePercentage) return input <= 1 ? input * 100 : input
+  return (input <= 1 ? input * 100 : (input ?? 0)).toFixed(2) + '%'
 }
 
 export const formatDownloadSpeed = (input: number | undefined) => {
diff --git a/web/utils/huggingface.test.ts b/web/utils/huggingface.test.ts
new file mode 100644
index 000000000..db7dbf3e1
--- /dev/null
+++ b/web/utils/huggingface.test.ts
@@ -0,0 +1,96 @@
+import {
+  fetchHuggingFaceRepoData,
+  toHuggingFaceUrl,
+  InvalidHostError,
+} from './huggingface'
+import { getFileSize } from '@janhq/core'
+
+// Mock the getFileSize function
+jest.mock('@janhq/core', () => ({
+  getFileSize: jest.fn(),
+  AllQuantizations: ['q4_0', 'q4_1', 'q5_0', 'q5_1', 'q8_0'],
+}))
+
+describe('huggingface utils', () => {
+  let originalFetch: typeof global.fetch
+
+  beforeAll(() => {
+    originalFetch = global.fetch
+    global.fetch = jest.fn()
+  })
+
+  afterAll(() => {
+    global.fetch = originalFetch
+  })
+
+  beforeEach(() => {
+    jest.resetAllMocks()
+  })
+
+  describe('fetchHuggingFaceRepoData', () => {
+    it('should fetch and process repo data correctly', async () => {
+      const mockResponse = {
+        tags: ['gguf'],
+        siblings: [
+          { rfilename: 'model-q4_0.gguf' },
+          { rfilename: 'model-q8_0.gguf' },
+        ],
+      }
+
+      ;(global.fetch as jest.Mock).mockResolvedValue({
+        json: jest.fn().mockResolvedValue(mockResponse),
+      })
+      ;(getFileSize as jest.Mock).mockResolvedValue(1000000)
+
+      const result = await fetchHuggingFaceRepoData('user/repo')
+
+      expect(result.tags).toEqual(['gguf'])
+      expect(result.siblings).toHaveLength(2)
+      expect(result.siblings[0].fileSize).toBe(1000000)
+      expect(result.siblings[0].quantization).toBe('q4_0')
+      expect(result.modelUrl).toBe('https://huggingface.co/user/repo')
+    })
+
+    it('should throw an error if the model is not GGUF', async () => {
+      const mockResponse = {
+        tags: ['not-gguf'],
+      }
+
+      ;(global.fetch as jest.Mock).mockResolvedValue({
+        json: jest.fn().mockResolvedValue(mockResponse),
+      })
+
+      await expect(fetchHuggingFaceRepoData('user/repo')).rejects.toThrow(
+        'user/repo is not supported. Only GGUF models are supported.'
+      )
+    })
+
+    // ... existing code ...
+  })
+
+  describe('toHuggingFaceUrl', () => {
+    it('should convert a valid repo ID to a Hugging Face API URL', () => {
+      expect(toHuggingFaceUrl('user/repo')).toBe(
+        'https://huggingface.co/api/models/user/repo'
+      )
+    })
+
+    it('should handle a full Hugging Face URL', () => {
+      expect(toHuggingFaceUrl('https://huggingface.co/user/repo')).toBe(
+        'https://huggingface.co/api/models/user/repo'
+      )
+    })
+
+    it('should throw an InvalidHostError for non-Hugging Face URLs', () => {
+      expect(() => toHuggingFaceUrl('https://example.com/user/repo')).toThrow(
+        InvalidHostError
+      )
+    })
+
+    it('should throw an error for invalid URLs', () => {
+      expect(() => toHuggingFaceUrl('https://invalid-url')).toThrow(
+        'Invalid Hugging Face repo URL: https://invalid-url'
+      )
+    })
+  })
+})
diff --git a/web/utils/huggingface.ts b/web/utils/huggingface.ts
new file mode 100644
index 000000000..3e71f3a0f
--- /dev/null
+++ b/web/utils/huggingface.ts
@@ -0,0 +1,123 @@
+import { AllQuantizations, getFileSize, HuggingFaceRepoData } from '@janhq/core'
+
+/**
+ * Fetches data from a Hugging Face repository.
+ *
+ * @param repoId - The ID of the Hugging Face repository.
+ * @param huggingFaceAccessToken - Optional access token for Hugging Face API.
+ * @returns A promise that resolves to the HuggingFaceRepoData.
+ * @throws Will throw an error if the repository is not supported or if there is an error in the response.
+ */
+export const fetchHuggingFaceRepoData = async (
+  repoId: string,
+  huggingFaceAccessToken?: string
+): Promise<HuggingFaceRepoData> => {
+  const sanitizedUrl = toHuggingFaceUrl(repoId)
+  console.debug('sanitizedUrl', sanitizedUrl)
+
+  const headers: Record<string, string> = {
+    Accept: 'application/json',
+  }
+
+  if (huggingFaceAccessToken && huggingFaceAccessToken.length > 0) {
+    headers['Authorization'] = `Bearer ${huggingFaceAccessToken}`
+  }
+
+  const res = await fetch(sanitizedUrl, {
+    headers: headers,
+  })
+  const response = await res.json()
+  if (response['error'] != null) {
+    throw new Error(response['error'])
+  }
+
+  const data = response as HuggingFaceRepoData
+
+  if (data.tags.indexOf('gguf') === -1) {
+    throw new Error(
+      `${repoId} is not supported. Only GGUF models are supported.`
+    )
+  }
+
+  const promises: Promise<number>[] = []
+
+  // fetching file sizes
+  const url = new URL(sanitizedUrl)
+  const paths = url.pathname.split('/').filter((e) => e.trim().length > 0)
+
+  for (const sibling of data.siblings) {
+    const downloadUrl = `https://huggingface.co/${paths[2]}/${paths[3]}/resolve/main/${sibling.rfilename}`
+    sibling.downloadUrl = downloadUrl
+    promises.push(getFileSize(downloadUrl))
+  }
+
+  const result = await Promise.all(promises)
+  for (let i = 0; i < data.siblings.length; i++) {
+    data.siblings[i].fileSize = result[i]
+  }
+
+  AllQuantizations.forEach((quantization) => {
+    data.siblings.forEach((sibling) => {
+      if (!sibling.quantization && sibling.rfilename.includes(quantization)) {
+        sibling.quantization = quantization
+      }
+    })
+  })
+
+  data.modelUrl = `https://huggingface.co/${paths[2]}/${paths[3]}`
+  return data
+}
+
+/**
+ * Converts a repository ID or URL to a valid Hugging Face API URL.
+ *
+ * @param repoId - The repository ID or URL to convert.
+ * @returns A string representing the Hugging Face API URL.
+ * @throws {InvalidHostError} If the URL is invalid or not from huggingface.co.
+ * @throws {Error} If the URL cannot be parsed.
+ */
+export function toHuggingFaceUrl(repoId: string): string {
+  try {
+    // Attempt to create a URL object from the repoId
+    const url = new URL(repoId)
+
+    // Check if the host is huggingface.co
+    if (url.host !== 'huggingface.co') {
+      throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`)
+    }
+
+    // Split the pathname into parts and filter out empty strings
+    const paths = url.pathname.split('/').filter((e) => e.trim().length > 0)
+
+    // Ensure there are at least two parts in the path (user/repo)
+    if (paths.length < 2) {
+      throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`)
+    }
+
+    // Construct and return the API URL
+    return `${url.origin}/api/models/${paths[0]}/${paths[1]}`
+  } catch (err) {
+    // Re-throw InvalidHostError if it was caught
+    if (err instanceof InvalidHostError) {
+      throw err
+    }
+
+    // If repoId starts with 'https' but couldn't be parsed, throw an error
+    if (repoId.startsWith('https')) {
+      throw new Error(`Cannot parse url: ${repoId}`)
+    }
+
+    // If repoId is not a URL, assume it's a valid repo ID and construct the API URL
+    return `https://huggingface.co/api/models/${repoId}`
+  }
+}
+
+/**
+ * Error thrown when the host of a URL is invalid or not from huggingface.co.
+ */
+export class InvalidHostError extends Error {
+  constructor(message: string) {
+    super(message)
+    this.name = 'InvalidHostError'
+  }
+}
diff --git a/web/utils/model.ts b/web/utils/model.ts
new file mode 100644
index 000000000..cb0f0ff31
--- /dev/null
+++ b/web/utils/model.ts
@@ -0,0 +1,9 @@
+/**
+ * Extracts and normalizes the model ID from a given download URL.
+ *
+ * @param downloadUrl - The URL from which to extract the model ID.
+ * @returns The extracted model ID, or the original URL if extraction fails.
+ */
+export const normalizeModelId = (downloadUrl: string): string => {
+  return downloadUrl.split('/').pop() ?? downloadUrl
+}
diff --git a/web/utils/modelEngine.test.ts b/web/utils/modelEngine.test.ts
index 738e04c2a..04001f726 100644
--- a/web/utils/modelEngine.test.ts
+++ b/web/utils/modelEngine.test.ts
@@ -71,7 +71,7 @@ describe('isLocalEngine', () => {
   describe('getTitleByEngine', () => {
     it('should return correct title for InferenceEngine.nitro', () => {
       const result = getTitleByEngine(InferenceEngine.nitro)
-      expect(result).toBe('Llama.cpp (Nitro)')
+      expect(result).toBe('Llama.cpp (Cortex)')
     })
 
     it('should return correct title for InferenceEngine.nitro_tensorrt_llm', () => {
diff --git a/web/utils/modelEngine.ts b/web/utils/modelEngine.ts
index 33b3ec3e1..2ac4a1acd 100644
--- a/web/utils/modelEngine.ts
+++ b/web/utils/modelEngine.ts
@@ -2,15 +2,16 @@ import { EngineManager, InferenceEngine, LocalOAIEngine } from '@janhq/core'
 
 export const getLogoEngine = (engine: InferenceEngine) => {
   switch (engine) {
-    case InferenceEngine.anthropic:
-      return 'images/ModelProvider/anthropic.svg'
-    case InferenceEngine.nitro_tensorrt_llm:
     case InferenceEngine.nitro:
-      return 'images/ModelProvider/nitro.svg'
     case InferenceEngine.cortex_llamacpp:
     case InferenceEngine.cortex_onnx:
     case InferenceEngine.cortex_tensorrtllm:
       return 'images/ModelProvider/cortex.svg'
+    case InferenceEngine.anthropic:
+      return 'images/ModelProvider/anthropic.svg'
+    case InferenceEngine.nitro_tensorrt_llm:
+      return 'images/ModelProvider/nitro.svg'
+
     case InferenceEngine.mistral:
       return 'images/ModelProvider/mistral.svg'
     case InferenceEngine.martian:
@@ -49,11 +50,10 @@ export const isLocalEngine = (engine: string) => {
 export const getTitleByEngine = (engine: InferenceEngine) => {
   switch (engine) {
     case InferenceEngine.nitro:
-      return 'Llama.cpp (Nitro)'
-    case InferenceEngine.nitro_tensorrt_llm:
-      return 'TensorRT-LLM (Nitro)'
     case InferenceEngine.cortex_llamacpp:
       return 'Llama.cpp (Cortex)'
+    case InferenceEngine.nitro_tensorrt_llm:
+      return 'TensorRT-LLM (Nitro)'
     case InferenceEngine.cortex_onnx:
       return 'Onnx (Cortex)'
     case InferenceEngine.cortex_tensorrtllm:
diff --git a/web/utils/predefinedComponent.ts b/web/utils/predefinedComponent.ts
index 82087f43b..3a9f45e92 100644
--- a/web/utils/predefinedComponent.ts
+++ b/web/utils/predefinedComponent.ts
@@ -145,21 +145,6 @@ export const presetConfiguration: Record<string, SettingComponentProps> = {
     requireModelReload: false,
     configType: 'runtime',
   },
-  n_parallel: {
-    key: 'n_parallel',
-    title: 'N Parallel',
-    description:
-      'The number of parallel operations. Only set when enable continuous batching.	',
-    controllerType: 'slider',
-    controllerProps: {
-      min: 0,
-      max: 4,
-      step: 1,
-      value: 1,
-    },
-    requireModelReload: true,
-    configType: 'setting',
-  },
   cpu_threads: {
     key: 'cpu_threads',
     title: 'CPU Threads',

Experimental (Nightly Build)	Beta (Preview)	+ + + jan.exe + +	+ + + Intel + +	+ + + M1/M2/M3/M4 + +	+ + + jan.deb + +	+ + + jan.AppImage + +
Nightly Build (Experimental)	@@ -108,79 +151,64 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute