Merge pull request #3985 from janhq/dev

2024-11-13 09:22:43 +07:00 · 2024-11-13 09:22:43 +07:00 · 31d0b8d336
commit 31d0b8d336
parent c47d049920 ff2a81e41f
237 changed files with 3942 additions and 5022 deletions
--- a/.github/scripts/rename-app-beta.sh
+++ b/.github/scripts/rename-app-beta.sh
@ -1,47 +0,0 @@
-#!/bin/bash
-
-# Check if the correct number of arguments is provided
-if [ "$#" -ne 1 ]; then
-    echo "Usage: $0 <path_to_json_input_file>"
-    exit 1
-fi
-
-INPUT_JSON_FILE="$1"
-
-# Check if the input file exists
-if [ ! -f "$INPUT_JSON_FILE" ]; then
-    echo "Input file not found: $INPUT_JSON_FILE"
-    exit 1
-fi
-
-# Use jq to transform the content
-jq '
-    .name = "jan-beta" |
-    .productName = "Jan-beta" |
-    .build.appId = "jan-beta.ai.app" |
-    .build.productName = "Jan-beta" |
-    .build.appId = "jan-beta.ai.app" |
-    .build.protocols[0].name = "Jan-beta" |
-    .build.protocols[0].schemes = ["jan-beta"] |
-    .build.artifactName = "jan-beta-${os}-${arch}-${version}.${ext}" |
-    .build.publish[0].channel = "beta"
-' "$INPUT_JSON_FILE" > ./package.json.tmp
-
-cat ./package.json.tmp
-
-rm $INPUT_JSON_FILE
-mv ./package.json.tmp $INPUT_JSON_FILE
-
-# Update the layout file
-LAYOUT_FILE_PATH="web/app/layout.tsx"
-
-if [ ! -f "$LAYOUT_FILE_PATH" ]; then
-    echo "File does not exist: $LAYOUT_FILE_PATH"
-    exit 1
-fi
-
-# Perform the replacements
-sed -i -e "s#Jan#Jan-beta#g" "$LAYOUT_FILE_PATH"
-
-# Notify completion
-echo "File has been updated: $LAYOUT_FILE_PATH"
--- a/.github/scripts/rename-app.sh
+++ b/.github/scripts/rename-app.sh
@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Check if the correct number of arguments is provided
+if [ "$#" -ne 2 ]; then
+    echo "Usage: $0 <path_to_json_input_file> <channel>"
+    exit 1
+fi
+
+INPUT_JSON_FILE="$1"
+
+CHANNEL="$2"
+
+if [ "$CHANNEL" == "nightly" ]; then
+    UPDATER="latest"
+else
+    UPDATER="beta"
+fi
+
+# Check if the input file exists
+if [ ! -f "$INPUT_JSON_FILE" ]; then
+    echo "Input file not found: $INPUT_JSON_FILE"
+    exit 1
+fi
+
+# Use jq to transform the content
+jq --arg channel "$CHANNEL" --arg updater "$UPDATER" '
+    .name = "jan-\($channel)" |
+    .productName = "Jan-\($channel)" |
+    .build.appId = "jan-\($channel).ai.app" |
+    .build.productName = "Jan-\($channel)" |
+    .build.appId = "jan-\($channel).ai.app" |
+    .build.protocols[0].name = "Jan-\($channel)" |
+    .build.protocols[0].schemes = ["jan-\($channel)"] |
+    .build.artifactName = "jan-\($channel)-${os}-${arch}-${version}.${ext}" |
+    .build.publish[0].channel = $updater
+' "$INPUT_JSON_FILE" > ./package.json.tmp
+
+cat ./package.json.tmp
+
+rm $INPUT_JSON_FILE
+mv ./package.json.tmp $INPUT_JSON_FILE
+
+# Update the layout file
+LAYOUT_FILE_PATH="web/app/layout.tsx"
+
+if [ ! -f "$LAYOUT_FILE_PATH" ]; then
+    echo "File does not exist: $LAYOUT_FILE_PATH"
+    exit 1
+fi
+
+# Perform the replacements
+sed -i -e "s#Jan#Jan-$CHANNEL#g" "$LAYOUT_FILE_PATH"
+
+# Notify completion
+echo "File has been updated: $LAYOUT_FILE_PATH"
--- a/.github/scripts/rename-uninstaller-beta.sh
+++ b/.github/scripts/rename-uninstaller-beta.sh
@ -3,6 +3,14 @@
 # File path to be modified
 FILE_PATH="electron/scripts/uninstaller.nsh"

+# Check if the correct number of arguments is provided
+if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 <channel>"
+    exit 1
+fi
+
+CHANNEL="$1"
+
 # Check if the file exists
 if [ ! -f "$FILE_PATH" ]; then
    echo "File does not exist: $FILE_PATH"
@ -10,7 +18,7 @@ if [ ! -f "$FILE_PATH" ]; then
 fi

 # Perform the replacements
-sed -i -e "s#jan#jan-beta#g" "$FILE_PATH"
+sed -i -e "s#jan#jan-$CHANNEL#g" "$FILE_PATH"

 # Notify completion
 echo "File has been updated: $FILE_PATH"
--- a/.github/scripts/rename-workspace-beta.sh
+++ b/.github/scripts/rename-workspace-beta.sh
@ -3,6 +3,8 @@
 # File path to be modified
 FILE_PATH="$1"

+CHANNEL="$2"
+
 # Check if the file exists
 if [ ! -f "$FILE_PATH" ]; then
    echo "File does not exist: $FILE_PATH"
@ -10,7 +12,7 @@ if [ ! -f "$FILE_PATH" ]; then
 fi

 # Perform the replacements
-sed -i -e 's/yarn workspace jan/yarn workspace jan-beta/g' "$FILE_PATH"
+sed -i -e "s/yarn workspace jan/yarn workspace jan-$CHANNEL/g" "$FILE_PATH"

 # Notify completion
 echo "File has been updated: $FILE_PATH"
--- a/.github/workflows/jan-docs-new-release.yaml
+++ b/.github/workflows/jan-docs-new-release.yaml
@ -58,6 +58,6 @@ jobs:
          accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
          projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
          directory: ./docs/out
-          branch: dev
+          branch: main
          # Optional: Enable this if you want to have GitHub Deployments triggered
          gitHubToken: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/jan-docs.yml
+++ b/.github/workflows/jan-docs.yml
@ -83,6 +83,6 @@ jobs:
          accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
          projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
          directory: ./docs/out
-          branch: dev
+          branch: main
          # Optional: Enable this if you want to have GitHub Deployments triggered
          gitHubToken: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/jan-electron-build-beta.yml
+++ b/.github/workflows/jan-electron-build-beta.yml
@ -133,4 +133,26 @@ jobs:
        run: |
          gh release edit v${{ needs.create-draft-release.outputs.version }} --draft=false --prerelease
        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+  noti-discord-and-update-url-readme:
+    needs: [build-macos-x64, build-macos-arm64, create-draft-release, build-windows-x64, build-linux-x64, combine-beta-mac-yml]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set version to environment variable
+        run: |
+          echo "VERSION=${{ needs.create-draft-release.outputs.version }}" >> $GITHUB_ENV
+
+      - name: Notify Discord
+        uses: Ilshidur/action-discord@master
+        with:
+          args: |
+            Jan-beta App version {{ VERSION }}, has been released, use the following links to download the app with faster speed or visit the Github release page for more information:
+            - Windows: https://delta.jan.ai/beta/jan-beta-win-x64-{{ VERSION }}.exe
+            - macOS Intel: https://delta.jan.ai/beta/jan-beta-mac-x64-{{ VERSION }}.dmg
+            - macOS Apple Silicon: https://delta.jan.ai/beta/jan-beta-mac-arm64-{{ VERSION }}.dmg
+            - Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb
+            - Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage
+            - Github Release URL: https://github.com/janhq/jan/releases/tag/v{{ VERSION }}
+        env:
+          DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }}
--- a/.github/workflows/jan-electron-linter-and-test.yml
+++ b/.github/workflows/jan-electron-linter-and-test.yml
@ -319,6 +319,13 @@ jobs:
        #   TURBO_TEAM: 'linux'
        #   TURBO_TOKEN: '${{ secrets.TURBO_TOKEN }}'

+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-report
+          path: electron/playwright-report/
+          retention-days: 2
+
  coverage-check:
    runs-on: [self-hosted, Linux, ubuntu-desktop]
    needs: base_branch_cov
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@ -60,18 +60,25 @@ jobs:
          mv /tmp/package.json electron/package.json
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
          mv /tmp/package.json web/package.json
-          jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          cat electron/package.json
+          # chmod +x .github/scripts/rename-app.sh
+          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          # chmod +x .github/scripts/rename-workspace.sh
+          # .github/scripts/rename-workspace.sh ./package.json nightly
+          # echo "------------------------"
+          # cat ./electron/package.json
+          # echo "------------------------"

      - name: Change App Name for beta version
        if: inputs.beta == true
        shell: bash
        run: |
-          chmod +x .github/scripts/rename-app-beta.sh
-          .github/scripts/rename-app-beta.sh ./electron/package.json
-          chmod +x .github/scripts/rename-workspace-beta.sh
-          .github/scripts/rename-workspace-beta.sh ./package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json beta
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json beta
          echo "------------------------"
          cat ./electron/package.json
          echo "------------------------"
--- a/.github/workflows/template-build-macos-arm64.yml
+++ b/.github/workflows/template-build-macos-arm64.yml
@ -72,22 +72,29 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
          mv /tmp/package.json web/package.json

-          jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json

          jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json

-          cat electron/package.json
+          # cat electron/package.json
+          # chmod +x .github/scripts/rename-app.sh
+          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          # chmod +x .github/scripts/rename-workspace.sh
+          # .github/scripts/rename-workspace.sh ./package.json nightly
+          # echo "------------------------"
+          # cat ./electron/package.json
+          # echo "------------------------"

      - name: Change App Name for beta version
        if: inputs.beta == true
        shell: bash
        run: |
-          chmod +x .github/scripts/rename-app-beta.sh
-          .github/scripts/rename-app-beta.sh ./electron/package.json
-          chmod +x .github/scripts/rename-workspace-beta.sh
-          .github/scripts/rename-workspace-beta.sh ./package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json beta
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json beta
          echo "------------------------"
          cat ./electron/package.json
          echo "------------------------"
@ -186,7 +193,7 @@ jobs:
        uses: actions/upload-artifact@v4
        with:
          name: jan-mac-arm64-${{ inputs.new_version }}
-          path: ./electron/dist/jan-mac-arm64-${{ inputs.new_version }}.dmg
+          path: ./electron/dist/*.dmg

      - name: Upload Artifact
        if: inputs.beta == false
--- a/.github/workflows/template-build-macos-x64.yml
+++ b/.github/workflows/template-build-macos-x64.yml
@ -72,22 +72,29 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
          mv /tmp/package.json web/package.json

-          jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json

          jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json

-          cat electron/package.json
+          # cat electron/package.json
+          # chmod +x .github/scripts/rename-app.sh
+          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          # chmod +x .github/scripts/rename-workspace.sh
+          # .github/scripts/rename-workspace.sh ./package.json nightly
+          # echo "------------------------"
+          # cat ./electron/package.json
+          # echo "------------------------"

      - name: Change App Name for beta version
        if: inputs.beta == true
        shell: bash
        run: |
-          chmod +x .github/scripts/rename-app-beta.sh
-          .github/scripts/rename-app-beta.sh ./electron/package.json
-          chmod +x .github/scripts/rename-workspace-beta.sh
-          .github/scripts/rename-workspace-beta.sh ./package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json beta
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json beta
          echo "------------------------"
          cat ./electron/package.json
          echo "------------------------"
@ -186,7 +193,7 @@ jobs:
        uses: actions/upload-artifact@v4
        with:
          name: jan-mac-x64-${{ inputs.new_version }}
-          path: ./electron/dist/jan-mac-x64-${{ inputs.new_version }}.dmg
+          path: ./electron/dist/*.dmg

      - name: Upload Artifact
        if: inputs.beta == false
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@ -73,23 +73,35 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
          mv /tmp/package.json web/package.json

-          jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json

          jq '.build.win.sign = "./sign.js"' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          cat electron/package.json

+          # chmod +x .github/scripts/rename-app.sh
+          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          # chmod +x .github/scripts/rename-workspace.sh
+          # .github/scripts/rename-workspace.sh ./package.json nightly
+          # chmod +x .github/scripts/rename-uninstaller.sh
+          # .github/scripts/rename-uninstaller.sh nightly
+          # echo "------------------------"
+          # cat ./electron/package.json
+          # echo "------------------------"
+          # cat ./package.json
+          # echo "------------------------"
+
      - name: Change App Name for beta version
        if: inputs.beta == true
        shell: bash
        run: |
-          chmod +x .github/scripts/rename-app-beta.sh
-          .github/scripts/rename-app-beta.sh ./electron/package.json
-          chmod +x .github/scripts/rename-workspace-beta.sh
-          .github/scripts/rename-workspace-beta.sh ./package.json
-          chmod +x .github/scripts/rename-uninstaller-beta.sh
-          .github/scripts/rename-uninstaller-beta.sh
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json beta
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json beta
+          chmod +x .github/scripts/rename-uninstaller.sh
+          .github/scripts/rename-uninstaller.sh beta
          echo "------------------------"
          cat ./electron/package.json
          echo "------------------------"
--- a/.husky/pre-commit
+++ b/.husky/pre-commit
@ -1 +1 @@
-npm run lint --fix
+npx oxlint@latest --fix
--- a/JanBanner.png
+++ b/JanBanner.png
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
-# Jan - Turn your computer into an AI computer
+# Jan - Local AI Assistant

-![Jan banner](https://github.com/janhq/jan/assets/89722390/35daac7d-b895-487c-a6ac-6663daaad78e)
+![Jan banner](./JanBanner.png)

 <p align="center">
  <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
@ -12,18 +12,22 @@
 </p>

 <p align="center">
-  <a href="https://jan.ai/guides">Getting Started</a> 
+  <a href="https://jan.ai/docs/quickstart">Getting Started</a> 
  - <a href="https://jan.ai/docs">Docs</a> 
  - <a href="https://github.com/janhq/jan/releases">Changelog</a> 
  - <a href="https://github.com/janhq/jan/issues">Bug reports</a> 
  - <a href="https://discord.gg/AsJ8krTT3N">Discord</a>
 </p>

-> [!Warning] >**Jan is currently in Development**: Expect breaking changes and bugs!
+<p align="center">
+⚠️ <b> Jan is currently in Development</b>: Expect breaking changes and bugs!
+</p>

-Jan is an open-source ChatGPT alternative that runs 100% offline on your computer.

-**Jan runs on any hardware.** From PCs to multi-GPU clusters, Jan supports universal architectures:
+Jan is a ChatGPT-alternative that runs 100% offline on your device. Our goal is to make it easy for a layperson to download and run LLMs and use AI with **full control** and **privacy**.
+
+Jan is powered by [Cortex](https://github.com/janhq/cortex.cpp), our embeddable local AI engine that runs on any hardware.
+From PCs to multi-GPU clusters, Jan & Cortex supports universal architectures:

 - [x] NVIDIA GPUs (fast)
 - [x] Apple M-series (fast)
@ -31,6 +35,12 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
 - [x] Linux Debian
 - [x] Windows x64

+#### Features:
+- [Model Library](https://jan.ai/docs/models/manage-models#add-models) with popular LLMs like Llama, Gemma, Mistral, or Qwen 
+- Connect to [Remote AI APIs](https://jan.ai/docs/remote-models/openai) like Groq and OpenRouter
+- Local API Server with OpenAI-equivalent API
+- [Extensions](https://jan.ai/docs/extensions) for customizing Jan
+
 ## Download

 <table>
@ -74,7 +84,40 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
    </td>
  </tr>
  <tr style="text-align:center">
-    <td style="text-align:center"><b>Experimental (Nightly Build)</b></td>
+    <td style="text-align:center"><b>Beta (Preview)</b></td>
+    <td style="text-align:center">
+      <a href='https://app.jan.ai/download/beta/win-x64'>
+        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/windows.png' style="height:14px; width: 14px" />
+        <b>jan.exe</b>
+      </a>
+    </td>
+    <td style="text-align:center">
+      <a href='https://app.jan.ai/download/beta/mac-x64'>
+        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/mac.png' style="height:15px; width: 15px" />
+        <b>Intel</b>
+      </a>
+    </td>
+    <td style="text-align:center">
+      <a href='https://app.jan.ai/download/beta/mac-arm64'>
+        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/mac.png' style="height:15px; width: 15px" />
+        <b>M1/M2/M3/M4</b>
+      </a>
+    </td>
+    <td style="text-align:center">
+      <a href='https://app.jan.ai/download/beta/linux-amd64-deb'>
+        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/linux.png' style="height:14px; width: 14px" />
+        <b>jan.deb</b>
+      </a>
+    </td>
+    <td style="text-align:center">
+      <a href='https://app.jan.ai/download/beta/linux-amd64-appimage'>
+        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/linux.png' style="height:14px; width: 14px" />
+        <b>jan.AppImage</b>
+      </a>
+    </td>
+  </tr>
+  <tr style="text-align:center">
+    <td style="text-align:center"><b>Nightly Build (Experimental)</b></td>
    <td style="text-align:center">
      <a href='https://app.jan.ai/download/nightly/win-x64'>
        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/windows.png' style="height:14px; width: 14px" />
@ -108,79 +151,64 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
  </tr>
 </table>

-Download the latest version of Jan at https://jan.ai/ or visit the **[GitHub Releases](https://github.com/janhq/jan/releases)** to download any previous release.
+Download the latest version of Jan at https://jan.ai/ or visit the [GitHub Releases](https://github.com/janhq/jan/releases) to download any previous release.

 ## Demo

-![Demo](/demo.gif)
+https://github.com/user-attachments/assets/c3592fa2-c504-4d9d-a885-7e00122a50f3

-_Realtime Video: Jan v0.4.3-nightly on a Mac M1, 16GB Sonoma 14_
+*Real-time Video: Jan v0.5.7 on a Mac M2, 16GB Sonoma 14.2*

 ## Quicklinks

-#### Jan
+### Jan

- [Jan website](https://jan.ai/)
+- [Jan Website](https://jan.ai/)
 - [Jan GitHub](https://github.com/janhq/jan)
- [User Guides](https://jan.ai/guides/)
- [Developer docs](https://jan.ai/developer/)
- [API reference](https://jan.ai/api-reference/)
- [Specs](https://jan.ai/docs/)
+- [Documentation](https://jan.ai/docs)
+- [Jan Changelog](https://jan.ai/changelog)
+- [Jan Blog](https://jan.ai/blog)

-#### Nitro
+### Cortex.cpp
+Jan is powered by **Cortex.cpp**. It is a C++ command-line interface (CLI) designed as an alternative to [Ollama](https://ollama.com/). By default, it runs on the llama.cpp engine but also supports other engines, including ONNX and TensorRT-LLM, making it a multi-engine platform.

-Nitro is a high-efficiency C++ inference engine for edge computing. It is lightweight and embeddable, and can be used on its own within your own projects.
-
- [Nitro Website](https://nitro.jan.ai)
- [Nitro GitHub](https://github.com/janhq/nitro)
- [Documentation](https://nitro.jan.ai/docs)
- [API Reference](https://nitro.jan.ai/api-reference)
-
-## Troubleshooting
-
-As Jan is in development mode, you might get stuck on a broken build.
-
-To reset your installation:
-
-1. Use the following commands to remove any dangling backend processes:
-
-   ```sh
-   ps aux | grep nitro
-   ```
-
-   Look for processes like "nitro" and "nitro_arm_64," and kill them one by one with:
-
-   ```sh
-   kill -9 <PID>
-   ```
-
-2. **Remove Jan from your Applications folder and Cache folder**
-
-   ```bash
-   make clean
-   ```
-
-   This will remove all build artifacts and cached files:
-
-   - Delete Jan extension from your `~/jan/extensions` folder
-   - Delete all `node_modules` in current folder
-   - Clear Application cache in `~/Library/Caches/jan`

+- [Cortex Website](https://cortex.so/)
+- [Cortex GitHub](https://github.com/janhq/cortex.cpp)
+- [Documentation](https://cortex.so/docs/)
+- [Models Library](https://cortex.so/models)
+- API Reference: *Under development*
+  
 ## Requirements for running Jan

- MacOS: 13 or higher
- Windows:
+- **MacOS**: 13 or higher
+- **Windows**:
  - Windows 10 or higher
  - To enable GPU support:
    - Nvidia GPU with CUDA Toolkit 11.7 or higher
    - Nvidia driver 470.63.01 or higher
- Linux:
+- **Linux**:
  - glibc 2.27 or higher (check with `ldd --version`)
  - gcc 11, g++ 11, cpp 11 or higher, refer to this [link](https://jan.ai/guides/troubleshooting/gpu-not-used/#specific-requirements-for-linux) for more information
  - To enable GPU support:
    - Nvidia GPU with CUDA Toolkit 11.7 or higher
    - Nvidia driver 470.63.01 or higher

+## Troubleshooting
+
+As Jan is in development mode, you might get stuck on a some common issues:
+- [Troubleshooting a broken build](https://jan.ai/docs/troubleshooting#broken-build)
+- [Troubleshooting NVIDIA GPU](https://jan.ai/docs/troubleshooting#troubleshooting-nvidia-gpu)
+- [Troubleshooting Something's Amiss](https://jan.ai/docs/troubleshooting#somethings-amiss)
+
+
+If you can't find what you need in our troubleshooting guide, feel free reach out to us for extra help:
+1. Copy your [error logs & device specifications](https://jan.ai/docs/troubleshooting#how-to-get-error-logs).
+2. Go to our [Discord](https://discord.com/invite/FTk2MvZwJH) & send it to **#🆘|get-help** channel for further support.
+
+*Check the logs to ensure the information is what you intend to send. Note that we retain your logs for only 24 hours, so report any issues promptly.*
+  
+
 ## Contributing

 Contributions are welcome! Please read the [CONTRIBUTING.md](CONTRIBUTING.md) file
@ -209,11 +237,7 @@ Contributions are welcome! Please read the [CONTRIBUTING.md](CONTRIBUTING.md) fi

 This will start the development server and open the desktop app.

-3. (Optional) **Run the API server without frontend**

-   ```bash
-   yarn dev:server
-   ```

 ### For production build

@ -225,102 +249,6 @@ make build

 This will build the app MacOS m1/m2 for production (with code signing already done) and put the result in `dist` folder.

-### Docker mode
-
- Supported OS: Linux, WSL2 Docker
- Pre-requisites:
-
-  - Docker Engine and Docker Compose are required to run Jan in Docker mode. Follow the [instructions](https://docs.docker.com/engine/install/ubuntu/) below to get started with Docker Engine on Ubuntu.
-
-    ```bash
-    curl -fsSL https://get.docker.com -o get-docker.sh
-    sudo sh ./get-docker.sh --dry-run
-    ```
-
-  - If you intend to run Jan in GPU mode, you need to install `nvidia-driver` and `nvidia-docker2`. Follow the instruction [here](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) for installation.
-
- Run Jan in Docker mode
-  > User can choose between `docker-compose.yml` with latest prebuilt docker image or `docker-compose-dev.yml` with local docker build
-
-| Docker compose Profile | Description                                  |
-| ---------------------- | -------------------------------------------- |
-| `cpu-fs`               | Run Jan in CPU mode with default file system |
-| `cpu-s3fs`             | Run Jan in CPU mode with S3 file system      |
-| `gpu-fs`               | Run Jan in GPU mode with default file system |
-| `gpu-s3fs`             | Run Jan in GPU mode with S3 file system      |
-
-| Environment Variable    | Description                                                                                             |
-| ----------------------- | ------------------------------------------------------------------------------------------------------- |
-| `S3_BUCKET_NAME`        | S3 bucket name - leave blank for default file system                                                    |
-| `AWS_ACCESS_KEY_ID`     | AWS access key ID - leave blank for default file system                                                 |
-| `AWS_SECRET_ACCESS_KEY` | AWS secret access key - leave blank for default file system                                             |
-| `AWS_ENDPOINT`          | AWS endpoint URL - leave blank for default file system                                                  |
-| `AWS_REGION`            | AWS region - leave blank for default file system                                                        |
-| `API_BASE_URL`          | Jan Server URL, please modify it as your public ip address or domain name default http://localhost:1377 |
-
- **Option 1**: Run Jan in CPU mode
-
-  ```bash
-  # cpu mode with default file system
-  docker compose --profile cpu-fs up -d
-
-  # cpu mode with S3 file system
-  docker compose --profile cpu-s3fs up -d
-  ```
-
- **Option 2**: Run Jan in GPU mode
-
-  - **Step 1**: Check CUDA compatibility with your NVIDIA driver by running `nvidia-smi` and check the CUDA version in the output
-
-    ```bash
-    nvidia-smi
-
-    # Output
-    +---------------------------------------------------------------------------------------+
-    | NVIDIA-SMI 531.18                 Driver Version: 531.18       CUDA Version: 12.1     |
-    |-----------------------------------------+----------------------+----------------------+
-    | GPU  Name                      TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
-    | Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
-    |                                         |                      |               MIG M. |
-    |=========================================+======================+======================|
-    |   0  NVIDIA GeForce RTX 4070 Ti    WDDM | 00000000:01:00.0  On |                  N/A |
-    |  0%   44C    P8               16W / 285W|   1481MiB / 12282MiB |      2%      Default |
-    |                                         |                      |                  N/A |
-    +-----------------------------------------+----------------------+----------------------+
-    |   1  NVIDIA GeForce GTX 1660 Ti    WDDM | 00000000:02:00.0 Off |                  N/A |
-    |  0%   49C    P8               14W / 120W|      0MiB /  6144MiB |      0%      Default |
-    |                                         |                      |                  N/A |
-    +-----------------------------------------+----------------------+----------------------+
-    |   2  NVIDIA GeForce GTX 1660 Ti    WDDM | 00000000:05:00.0 Off |                  N/A |
-    | 29%   38C    P8               11W / 120W|      0MiB /  6144MiB |      0%      Default |
-    |                                         |                      |                  N/A |
-    +-----------------------------------------+----------------------+----------------------+
-
-    +---------------------------------------------------------------------------------------+
-    | Processes:                                                                            |
-    |  GPU   GI   CI        PID   Type   Process name                            GPU Memory |
-    |        ID   ID                                                             Usage      |
-    |=======================================================================================|
-    ```
-
-  - **Step 2**: Visit [NVIDIA NGC Catalog ](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags) and find the smallest minor version of image tag that matches your CUDA version (e.g., 12.1 -> 12.1.0)
-
-  - **Step 3**: Update the `Dockerfile.gpu` line number 5 with the latest minor version of the image tag from step 2 (e.g. change `FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base` to `FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS base`)
-
-  - **Step 4**: Run command to start Jan in GPU mode
-
-    ```bash
-    # GPU mode with default file system
-    docker compose --profile gpu-fs up -d
-
-    # GPU mode with S3 file system
-    docker compose --profile gpu-s3fs up -d
-    ```
-
-This will start the web server and you can access Jan at `http://localhost:3000`.
-
-> Note: RAG feature is not supported in Docker mode with s3fs yet.
-
 ## Acknowledgements

 Jan builds on top of other open-source projects:
@ -334,18 +262,18 @@ Jan builds on top of other open-source projects:

 - Bugs & requests: file a GitHub ticket
 - For discussion: join our Discord [here](https://discord.gg/FTk2MvZwJH)
- For business inquiries: email hello@jan.ai
+- For business inquiries: email hello@jan.ai 
 - For jobs: please email hr@jan.ai

 ## Trust & Safety

-Beware of scams.
+Beware of scams!

- We will never ask you for personal info
- We are a free product; there's no paid version
- We don't have a token or ICO
- We are not actively fundraising or seeking donations
+- We will never request your personal information.
+- Our product is completely free; no paid version exists.
+- We do not have a token or ICO.
+- We are a [bootstrapped company](https://en.wikipedia.org/wiki/Bootstrapping), and don't have any external investors (*yet*). We're open to exploring opportunities with strategic partners want to tackle [our mission](https://jan.ai/about#mission) together.

 ## License

-Jan is free and open source, under the AGPLv3 license.
+Jan is free and open source, under the **AGPLv3** license.
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@ -1,6 +1,8 @@
-import { SettingComponentProps } from '../types'
+import { Model, ModelEvent, SettingComponentProps } from '../types'
 import { getJanDataFolderPath, joinPath } from './core'
+import { events } from './events'
 import { fs } from './fs'
+import { ModelManager } from './models'

 export enum ExtensionTypeEnum {
  Assistant = 'assistant',
@ -103,6 +105,22 @@ export abstract class BaseExtension implements ExtensionType {
    return undefined
  }

+  /**
+   * Registers models - it persists in-memory shared ModelManager instance's data map.
+   * @param models
+   */
+  async registerModels(models: Model[]): Promise<void> {
+    for (const model of models) {
+      ModelManager.instance().register(model)
+    }
+    events.emit(ModelEvent.OnModelsUpdate, {})
+  }
+
+  /**
+   * Register settings for the extension.
+   * @param settings
+   * @returns
+   */
  async registerSettings(settings: SettingComponentProps[]): Promise<void> {
    if (!this.name) {
      console.error('Extension name is not defined')
@ -139,6 +157,12 @@ export abstract class BaseExtension implements ExtensionType {
    }
  }

+  /**
+   * Get the setting value for the key.
+   * @param key
+   * @param defaultValue
+   * @returns
+   */
  async getSetting<T>(key: string, defaultValue: T) {
    const keySetting = (await this.getSettings()).find((setting) => setting.key === key)

@ -168,6 +192,10 @@ export abstract class BaseExtension implements ExtensionType {
    return
  }

+  /**
+   * Get the settings for the extension.
+   * @returns
+   */
  async getSettings(): Promise<SettingComponentProps[]> {
    if (!this.name) return []

@ -189,6 +217,11 @@ export abstract class BaseExtension implements ExtensionType {
    }
  }

+  /**
+   * Update the settings for the extension.
+   * @param componentProps
+   * @returns
+   */
  async updateSettings(componentProps: Partial<SettingComponentProps>[]): Promise<void> {
    if (!this.name) return

--- a/core/src/browser/extensions/engines/AIEngine.test.ts
+++ b/core/src/browser/extensions/engines/AIEngine.test.ts
@ -1,8 +1,6 @@
 import { AIEngine } from './AIEngine'
 import { events } from '../../events'
-import { ModelEvent, Model, ModelFile, InferenceEngine } from '../../../types'
-import { EngineManager } from './EngineManager'
-import { fs } from '../../fs'
+import { ModelEvent, Model } from '../../../types'

 jest.mock('../../events')
 jest.mock('./EngineManager')
@ -26,7 +24,7 @@ describe('AIEngine', () => {
  })

  it('should load model if provider matches', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'test-provider' } as any
+    const model: any = { id: 'model1', engine: 'test-provider' } as any

    await engine.loadModel(model)

@ -34,7 +32,7 @@ describe('AIEngine', () => {
  })

  it('should not load model if provider does not match', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'other-provider' } as any
+    const model: any = { id: 'model1', engine: 'other-provider' } as any

    await engine.loadModel(model)

--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@ -1,17 +1,14 @@
-import { getJanDataFolderPath, joinPath } from '../../core'
 import { events } from '../../events'
 import { BaseExtension } from '../../extension'
-import { fs } from '../../fs'
-import { MessageRequest, Model, ModelEvent, ModelFile } from '../../../types'
+import { MessageRequest, Model, ModelEvent } from '../../../types'
 import { EngineManager } from './EngineManager'
+import { ModelManager } from '../../models/manager'

 /**
 * Base AIEngine
 * Applicable to all AI Engines
 */
 export abstract class AIEngine extends BaseExtension {
-  private static modelsFolder = 'models'
-
  // The inference engine
  abstract provider: string

@ -21,7 +18,7 @@ export abstract class AIEngine extends BaseExtension {
  override onLoad() {
    this.registerEngine()

-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
    events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
  }

@ -32,53 +29,10 @@ export abstract class AIEngine extends BaseExtension {
    EngineManager.instance().register(this)
  }

-  async registerModels(models: Model[]): Promise<void> {
-    const modelFolderPath = await joinPath([await getJanDataFolderPath(), AIEngine.modelsFolder])
-
-    let shouldNotifyModelUpdate = false
-    for (const model of models) {
-      const modelPath = await joinPath([modelFolderPath, model.id])
-      const isExist = await fs.existsSync(modelPath)
-
-      if (isExist) {
-        await this.migrateModelIfNeeded(model, modelPath)
-        continue
-      }
-
-      await fs.mkdir(modelPath)
-      await fs.writeFileSync(
-        await joinPath([modelPath, 'model.json']),
-        JSON.stringify(model, null, 2)
-      )
-      shouldNotifyModelUpdate = true
-    }
-
-    if (shouldNotifyModelUpdate) {
-      events.emit(ModelEvent.OnModelsUpdate, {})
-    }
-  }
-
-  async migrateModelIfNeeded(model: Model, modelPath: string): Promise<void> {
-    try {
-      const modelJson = await fs.readFileSync(await joinPath([modelPath, 'model.json']), 'utf-8')
-      const currentModel: Model = JSON.parse(modelJson)
-      if (currentModel.version !== model.version) {
-        await fs.writeFileSync(
-          await joinPath([modelPath, 'model.json']),
-          JSON.stringify(model, null, 2)
-        )
-
-        events.emit(ModelEvent.OnModelsUpdate, {})
-      }
-    } catch (error) {
-      console.warn('Error while try to migrating model', error)
-    }
-  }
-
  /**
   * Loads the model.
   */
-  async loadModel(model: ModelFile): Promise<any> {
+  async loadModel(model: Model): Promise<any> {
    if (model.engine.toString() !== this.provider) return Promise.resolve()
    events.emit(ModelEvent.OnModelReady, model)
    return Promise.resolve()
--- a/core/src/browser/extensions/engines/EngineManager.ts
+++ b/core/src/browser/extensions/engines/EngineManager.ts
@ -1,3 +1,4 @@
+import { InferenceEngine } from '../../../types'
 import { AIEngine } from './AIEngine'

 /**
@ -20,6 +21,22 @@ export class EngineManager {
   * @returns The engine, if found.
   */
  get<T extends AIEngine>(provider: string): T | undefined {
+    // Backward compatible provider
+    // nitro is migrated to cortex
+    if (
+      [
+        InferenceEngine.nitro,
+        InferenceEngine.cortex,
+        InferenceEngine.cortex_llamacpp,
+        InferenceEngine.cortex_onnx,
+        InferenceEngine.cortex_tensorrtllm,
+        InferenceEngine.cortex_onnx,
+      ]
+        .map((e) => e.toString())
+        .includes(provider)
+    )
+      provider = InferenceEngine.cortex
+
    return this.engines.get(provider) as T | undefined
  }

@ -27,6 +44,6 @@ export class EngineManager {
   * The instance of the engine manager.
   */
  static instance(): EngineManager {
-    return window.core?.engineManager as EngineManager ?? new EngineManager()
+    return (window.core?.engineManager as EngineManager) ?? new EngineManager()
  }
 }
--- a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
@ -3,7 +3,7 @@
 */
 import { LocalOAIEngine } from './LocalOAIEngine'
 import { events } from '../../events'
-import { ModelEvent, ModelFile, Model } from '../../../types'
+import { ModelEvent, Model } from '../../../types'
 import { executeOnMain, systemInformation, dirName } from '../../core'

 jest.mock('../../core', () => ({
@ -43,7 +43,7 @@ describe('LocalOAIEngine', () => {
  })

  it('should load model correctly', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
    const modelFolder = 'path/to'
    const systemInfo = { os: 'testOS' }
    const res = { error: null }
@ -66,7 +66,7 @@ describe('LocalOAIEngine', () => {
  })

  it('should handle load model error', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
    const modelFolder = 'path/to'
    const systemInfo = { os: 'testOS' }
    const res = { error: 'load error' }
@ -91,9 +91,7 @@ describe('LocalOAIEngine', () => {

  it('should not unload model if engine does not match', async () => {
    const model: Model = { engine: 'otherProvider' } as any
-
    await engine.unloadModel(model)
-
    expect(executeOnMain).not.toHaveBeenCalled()
    expect(events.emit).not.toHaveBeenCalledWith(ModelEvent.OnModelStopped, {})
  })
--- a/core/src/browser/extensions/engines/LocalOAIEngine.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.ts
@ -1,6 +1,6 @@
-import { executeOnMain, systemInformation, dirName } from '../../core'
+import { executeOnMain, systemInformation, dirName, joinPath, getJanDataFolderPath } from '../../core'
 import { events } from '../../events'
-import { Model, ModelEvent, ModelFile } from '../../../types'
+import { Model, ModelEvent } from '../../../types'
 import { OAIEngine } from './OAIEngine'

 /**
@ -22,16 +22,16 @@ export abstract class LocalOAIEngine extends OAIEngine {
  override onLoad() {
    super.onLoad()
    // These events are applicable to local inference providers
-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
    events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
  }

  /**
   * Load the model.
   */
-  override async loadModel(model: ModelFile): Promise<void> {
+  override async loadModel(model: Model & { file_path?: string }): Promise<void> {
    if (model.engine.toString() !== this.provider) return
-    const modelFolder = await dirName(model.file_path)
+    const modelFolder = 'file_path' in model && model.file_path ? await dirName(model.file_path) : await this.getModelFilePath(model.id)
    const systemInfo = await systemInformation()
    const res = await executeOnMain(
      this.nodeModule,
@ -63,4 +63,12 @@ export abstract class LocalOAIEngine extends OAIEngine {
      events.emit(ModelEvent.OnModelStopped, {})
    })
  }
+
+  /// Legacy
+  private getModelFilePath = async (
+    id: string,
+  ): Promise<string> => {
+    return joinPath([await getJanDataFolderPath(), 'models', id])
+  }
+  ///
 }
--- a/core/src/browser/extensions/engines/OAIEngine.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.ts
@ -55,7 +55,21 @@ export abstract class OAIEngine extends AIEngine {
   * Inference request
   */
  override async inference(data: MessageRequest) {
-    if (data.model?.engine?.toString() !== this.provider) return
+    if (!data.model?.id) {
+      events.emit(MessageEvent.OnMessageResponse, {
+        status: MessageStatus.Error,
+        content: [
+          {
+            type: ContentType.Text,
+            text: {
+              value: 'No model ID provided',
+              annotations: [],
+            },
+          },
+        ],
+      })
+      return
+    }

    const timestamp = Date.now()
    const message: ThreadMessage = {
@ -89,7 +103,6 @@ export abstract class OAIEngine extends AIEngine {
      model: model.id,
      stream: true,
      ...model.parameters,
-      ...(this.provider === 'nitro' ? { engine: 'cortex.llamacpp'} : {}),
    }
    if (this.transformPayload) {
      requestBody = this.transformPayload(requestBody)
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@ -10,7 +10,7 @@ export function requestInference(
  requestBody: any,
  model: {
    id: string
-    parameters: ModelRuntimeParams
+    parameters?: ModelRuntimeParams
  },
  controller?: AbortController,
  headers?: HeadersInit,
@ -22,7 +22,7 @@ export function requestInference(
      headers: {
        'Content-Type': 'application/json',
        'Access-Control-Allow-Origin': '*',
-        'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
+        'Accept': model.parameters?.stream ? 'text/event-stream' : 'application/json',
        ...headers,
      },
      body: JSON.stringify(requestBody),
@ -45,7 +45,7 @@ export function requestInference(
          subscriber.complete()
          return
        }
-        if (model.parameters.stream === false) {
+        if (model.parameters?.stream === false) {
          const data = await response.json()
          if (transformResponse) {
            subscriber.next(transformResponse(data))
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@ -1,13 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import {
-  GpuSetting,
-  HuggingFaceRepoData,
-  ImportingModel,
-  Model,
-  ModelFile,
-  ModelInterface,
-  OptionType,
-} from '../../types'
+import { Model, ModelInterface, OptionType } from '../../types'

 /**
 * Model extension for managing models.
@ -20,17 +12,16 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
    return ExtensionTypeEnum.Model
  }

-  abstract downloadModel(
-    model: Model,
-    gpuSettings?: GpuSetting,
-    network?: { proxy: string; ignoreSSL?: boolean }
+  abstract getModels(): Promise<Model[]>
+  abstract pullModel(model: string, id?: string, name?: string): Promise<void>
+  abstract cancelModelPull(modelId: string): Promise<void>
+  abstract importModel(
+    model: string,
+    modePath: string,
+    name?: string,
+    optionType?: OptionType
  ): Promise<void>
-  abstract cancelModelDownload(modelId: string): Promise<void>
-  abstract deleteModel(model: ModelFile): Promise<void>
-  abstract getDownloadedModels(): Promise<ModelFile[]>
-  abstract getConfiguredModels(): Promise<ModelFile[]>
-  abstract importModels(models: ImportingModel[], optionType: OptionType): Promise<void>
-  abstract updateModelInfo(modelInfo: Partial<ModelFile>): Promise<ModelFile>
-  abstract fetchHuggingFaceRepoData(repoId: string): Promise<HuggingFaceRepoData>
-  abstract getDefaultModel(): Promise<Model>
+  abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
+  abstract deleteModel(model: string): Promise<void>
+  abstract isModelLoaded(model: string): Promise<boolean>
 }
--- a/core/src/browser/index.test.ts
+++ b/core/src/browser/index.test.ts
@ -1,32 +1,37 @@
-import * as Core from './core';
-import * as Events from './events';
-import * as FileSystem from './fs';
-import * as Extension from './extension';
-import * as Extensions from './extensions';
-import * as Tools from './tools';
+import * as Core from './core'
+import * as Events from './events'
+import * as FileSystem from './fs'
+import * as Extension from './extension'
+import * as Extensions from './extensions'
+import * as Tools from './tools'
+import * as Models from './models'

 describe('Module Tests', () => {
-    it('should export Core module', () => {
-        expect(Core).toBeDefined();
-    });
+  it('should export Core module', () => {
+    expect(Core).toBeDefined()
+  })

-    it('should export Event module', () => {
-        expect(Events).toBeDefined();
-    });
+  it('should export Event module', () => {
+    expect(Events).toBeDefined()
+  })

-    it('should export Filesystem module', () => {
-        expect(FileSystem).toBeDefined();
-    });
+  it('should export Filesystem module', () => {
+    expect(FileSystem).toBeDefined()
+  })

-    it('should export Extension module', () => {
-        expect(Extension).toBeDefined();
-    });
+  it('should export Extension module', () => {
+    expect(Extension).toBeDefined()
+  })

-    it('should export all base extensions', () => {
-        expect(Extensions).toBeDefined();
-    });
+  it('should export all base extensions', () => {
+    expect(Extensions).toBeDefined()
+  })

-    it('should export all base tools', () => {
-        expect(Tools).toBeDefined();
-    });
-});
+  it('should export all base tools', () => {
+    expect(Tools).toBeDefined()
+  })
+
+  it('should export all base tools', () => {
+    expect(Models).toBeDefined()
+  })
+})
--- a/core/src/browser/index.ts
+++ b/core/src/browser/index.ts
@ -33,3 +33,9 @@ export * from './extensions'
 * @module
 */
 export * from './tools'
+
+/**
+ * Export all base models.
+ * @module
+ */
+export * from './models'
--- a/core/src/browser/models/index.ts
+++ b/core/src/browser/models/index.ts
@ -0,0 +1,10 @@
+/**
+ * Export ModelManager
+ * @module
+ */
+export { ModelManager } from './manager'
+
+/**
+ * Export all utils
+ */
+export * from './utils'
--- a/core/src/browser/models/manager.ts
+++ b/core/src/browser/models/manager.ts
@ -0,0 +1,47 @@
+import { Model, ModelEvent } from '../../types'
+import { events } from '../events'
+
+/**
+ * Manages the registered models across extensions.
+ */
+export class ModelManager {
+  public models = new Map<string, Model>()
+
+  constructor() {
+    if (window) {
+      window.core.modelManager = this
+    }
+  }
+
+  /**
+   * Registers a model.
+   * @param model - The model to register.
+   */
+  register<T extends Model>(model: T) {
+    if (this.models.has(model.id)) {
+      this.models.set(model.id, {
+        ...model,
+        ...this.models.get(model.id),
+      })
+    } else {
+      this.models.set(model.id, model)
+    }
+    events.emit(ModelEvent.OnModelsUpdate, {})
+  }
+
+  /**
+   * Retrieves a model by it's id.
+   * @param id - The id of the model to retrieve.
+   * @returns The model, if found.
+   */
+  get<T extends Model>(id: string): T | undefined {
+    return this.models.get(id) as T | undefined
+  }
+
+  /**
+   * The instance of the tool manager.
+   */
+  static instance(): ModelManager {
+    return (window.core?.modelManager as ModelManager) ?? new ModelManager()
+  }
+}
--- a/core/src/browser/models/utils.test.ts
+++ b/core/src/browser/models/utils.test.ts
@ -1,7 +1,10 @@
 // web/utils/modelParam.test.ts
-import { normalizeValue, validationRules } from './modelParam'
-import { extractModelLoadParams } from './modelParam';
-import { extractInferenceParams } from './modelParam';
+import {
+  normalizeValue,
+  validationRules,
+  extractModelLoadParams,
+  extractInferenceParams,
+} from './utils'

 describe('validationRules', () => {
  it('should validate temperature correctly', () => {
@ -151,13 +154,12 @@ describe('validationRules', () => {
  })
 })

-
-  it('should normalize invalid values for keys not listed in validationRules', () => {
-    expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid')
-    expect(normalizeValue('invalid_key', 123)).toBe(123)
-    expect(normalizeValue('invalid_key', true)).toBe(true)
-    expect(normalizeValue('invalid_key', false)).toBe(false)
-  })
+it('should normalize invalid values for keys not listed in validationRules', () => {
+  expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid')
+  expect(normalizeValue('invalid_key', 123)).toBe(123)
+  expect(normalizeValue('invalid_key', true)).toBe(true)
+  expect(normalizeValue('invalid_key', false)).toBe(false)
+})

 describe('normalizeValue', () => {
  it('should normalize ctx_len correctly', () => {
@ -192,19 +194,16 @@ describe('normalizeValue', () => {
  })
 })

+it('should handle invalid values correctly by falling back to originParams', () => {
+  const modelParams = { temperature: 'invalid', token_limit: -1 }
+  const originParams = { temperature: 0.5, token_limit: 100 }
+  expect(extractInferenceParams(modelParams as any, originParams)).toEqual(originParams)
+})

-  it('should handle invalid values correctly by falling back to originParams', () => {
-    const modelParams = { temperature: 'invalid', token_limit: -1 };
-    const originParams = { temperature: 0.5, token_limit: 100 };
-    expect(extractInferenceParams(modelParams, originParams)).toEqual(originParams);
-  });
+it('should return an empty object when no modelParams are provided', () => {
+  expect(extractModelLoadParams()).toEqual({})
+})

-
-  it('should return an empty object when no modelParams are provided', () => {
-    expect(extractModelLoadParams()).toEqual({});
-  });
-
-
-  it('should return an empty object when no modelParams are provided', () => {
-    expect(extractInferenceParams()).toEqual({});
-  });
+it('should return an empty object when no modelParams are provided', () => {
+  expect(extractInferenceParams()).toEqual({})
+})
--- a/core/src/browser/models/utils.ts
+++ b/core/src/browser/models/utils.ts
@ -1,26 +1,20 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 /* eslint-disable @typescript-eslint/naming-convention */
-import { ModelRuntimeParams, ModelSettingParams } from '@janhq/core'
-
-import { ModelParams } from '@/types/model'
+import { ModelParams, ModelRuntimeParams, ModelSettingParams } from '../../types'

 /**
 * Validation rules for model parameters
 */
 export const validationRules: { [key: string]: (value: any) => boolean } = {
-  temperature: (value: any) =>
-    typeof value === 'number' && value >= 0 && value <= 2,
+  temperature: (value: any) => typeof value === 'number' && value >= 0 && value <= 2,
  token_limit: (value: any) => Number.isInteger(value) && value >= 0,
  top_k: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
  top_p: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
  stream: (value: any) => typeof value === 'boolean',
  max_tokens: (value: any) => Number.isInteger(value) && value >= 0,
-  stop: (value: any) =>
-    Array.isArray(value) && value.every((v) => typeof v === 'string'),
-  frequency_penalty: (value: any) =>
-    typeof value === 'number' && value >= 0 && value <= 1,
-  presence_penalty: (value: any) =>
-    typeof value === 'number' && value >= 0 && value <= 1,
+  stop: (value: any) => Array.isArray(value) && value.every((v) => typeof v === 'string'),
+  frequency_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
+  presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,

  ctx_len: (value: any) => Number.isInteger(value) && value >= 0,
  ngl: (value: any) => Number.isInteger(value) && value >= 0,
@ -76,6 +70,7 @@ export const extractInferenceParams = (
    stop: undefined,
    frequency_penalty: undefined,
    presence_penalty: undefined,
+    engine: undefined,
  }

  const runtimeParams: ModelRuntimeParams = {}
@ -119,11 +114,18 @@ export const extractModelLoadParams = (
    embedding: undefined,
    n_parallel: undefined,
    cpu_threads: undefined,
+    pre_prompt: undefined,
+    system_prompt: undefined,
+    ai_prompt: undefined,
+    user_prompt: undefined,
    prompt_template: undefined,
+    model_path: undefined,
    llama_model_path: undefined,
    mmproj: undefined,
+    cont_batching: undefined,
    vision_model: undefined,
    text_model: undefined,
+    engine: undefined,
  }
  const settingParams: ModelSettingParams = {}

--- a/core/src/node/api/processors/download.test.ts
+++ b/core/src/node/api/processors/download.test.ts
@ -8,7 +8,8 @@ jest.mock('../../helper', () => ({

 jest.mock('../../helper/path', () => ({
  validatePath: jest.fn().mockReturnValue('path/to/folder'),
-  normalizeFilePath: () => process.platform === 'win32' ? 'C:\\Users\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
+  normalizeFilePath: () =>
+    process.platform === 'win32' ? 'C:\\Users\\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
 }))

 jest.mock(
--- a/core/src/node/api/processors/download.ts
+++ b/core/src/node/api/processors/download.ts
@ -1,6 +1,6 @@
 import { resolve, sep } from 'path'
 import { DownloadEvent } from '../../../types/api'
-import { normalizeFilePath, validatePath } from '../../helper/path'
+import { normalizeFilePath } from '../../helper/path'
 import { getJanDataFolderPath } from '../../helper'
 import { DownloadManager } from '../../helper/download'
 import { createWriteStream, renameSync } from 'fs'
@ -37,7 +37,6 @@ export class Downloader implements Processor {
    const modelId = downloadRequest.modelId ?? array.pop() ?? ''

    const destination = resolve(getJanDataFolderPath(), normalizedPath)
-    validatePath(destination)
    const rq = request({ url, strictSSL, proxy })

    // Put request to download manager instance
@ -50,11 +49,6 @@ export class Downloader implements Processor {
    const initialDownloadState: DownloadState = {
      modelId,
      fileName,
-      time: {
-        elapsed: 0,
-        remaining: 0,
-      },
-      speed: 0,
      percent: 0,
      size: {
        total: 0,
--- a/core/src/node/api/processors/fs.ts
+++ b/core/src/node/api/processors/fs.ts
@ -1,5 +1,5 @@
 import { join, resolve } from 'path'
-import { normalizeFilePath, validatePath } from '../../helper/path'
+import { normalizeFilePath } from '../../helper/path'
 import { getJanDataFolderPath } from '../../helper'
 import { Processor } from './Processor'
 import fs from 'fs'
@ -36,7 +36,6 @@ export class FileSystem implements Processor {
              return path
            }
            const absolutePath = resolve(path)
-            validatePath(absolutePath)
            return absolutePath
          })
        )
@ -55,7 +54,6 @@ export class FileSystem implements Processor {
    }

    const absolutePath = resolve(path)
-    validatePath(absolutePath)

    return new Promise((resolve, reject) => {
      fs.rm(absolutePath, { recursive: true, force: true }, (err) => {
@ -79,7 +77,6 @@ export class FileSystem implements Processor {
    }

    const absolutePath = resolve(path)
-    validatePath(absolutePath)

    return new Promise((resolve, reject) => {
      fs.mkdir(absolutePath, { recursive: true }, (err) => {
--- a/core/src/node/api/processors/fsExt.ts
+++ b/core/src/node/api/processors/fsExt.ts
@ -1,6 +1,6 @@
 import { basename, join } from 'path'
 import fs, { readdirSync } from 'fs'
-import { appResourcePath, normalizeFilePath, validatePath } from '../../helper/path'
+import { appResourcePath, normalizeFilePath } from '../../helper/path'
 import { defaultAppConfig, getJanDataFolderPath, getJanDataFolderPath as getPath } from '../../helper'
 import { Processor } from './Processor'
 import { FileStat } from '../../../types'
@ -61,7 +61,6 @@ export class FSExt implements Processor {
      
      const dataBuffer = Buffer.from(data, 'base64')
      const writePath = join(getJanDataFolderPath(), normalizedPath)
-      validatePath(writePath)
      fs.writeFileSync(writePath, dataBuffer)
    } catch (err) {
      console.error(`writeFile ${path} result: ${err}`)
@ -69,7 +68,6 @@ export class FSExt implements Processor {
  }

  copyFile(src: string, dest: string): Promise<void> {
-    validatePath(dest)
    return new Promise((resolve, reject) => {
      fs.copyFile(src, dest, (err) => {
        if (err) {
--- a/core/src/node/api/restful/common.ts
+++ b/core/src/node/api/restful/common.ts
@ -10,6 +10,7 @@ import {
  getMessages,
  retrieveMessage,
  updateThread,
+  models,
 } from './helper/builder'

 import { JanApiRouteConfiguration } from './helper/configuration'
@ -26,9 +27,12 @@ export const commonRouter = async (app: HttpServer) => {
  // Common Routes
  // Read & Delete :: Threads | Models | Assistants
  Object.keys(JanApiRouteConfiguration).forEach((key) => {
-    app.get(`/${key}`, async (_request) =>
-      getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
-    )
+    app.get(`/${key}`, async (_req, _res) => {
+      if (key === 'models') {
+        return models(_req, _res)
+      }
+      return getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
+    })

    app.get(`/${key}/:id`, async (request: any) =>
      retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)
--- a/core/src/node/api/restful/helper/builder.test.ts
+++ b/core/src/node/api/restful/helper/builder.test.ts
@ -220,22 +220,6 @@ describe('builder helper functions', () => {
  })

  describe('chatCompletions', () => {
-    it('should return an error if model is not found', async () => {
-      const request = { body: { model: 'nonexistentModel' } }
-      const reply = { code: jest.fn().mockReturnThis(), send: jest.fn() }
-
-      await chatCompletions(request, reply)
-      expect(reply.code).toHaveBeenCalledWith(404)
-      expect(reply.send).toHaveBeenCalledWith({
-        error: {
-          message: 'The model nonexistentModel does not exist',
-          type: 'invalid_request_error',
-          param: null,
-          code: 'model_not_found',
-        },
-      })
-    })
-
    it('should return the error on status not ok', async () => {
      const request = { body: { model: 'model1' } }
      const mockSend = jest.fn()
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@ -10,9 +10,9 @@ import {
 } from 'fs'
 import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
 import { join } from 'path'
-import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../types'
-import { getEngineConfiguration, getJanDataFolderPath } from '../../../helper'
-import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
+import { ContentType, InferenceEngine, MessageStatus, ThreadMessage } from '../../../../types'
+import { getJanDataFolderPath } from '../../../helper'
+import { CORTEX_API_URL } from './consts'

 // TODO: Refactor these
 export const getBuilder = async (configuration: RouteConfiguration) => {
@ -297,57 +297,56 @@ export const downloadModel = async (
  }
 }

-export const chatCompletions = async (request: any, reply: any) => {
-  const modelList = await getBuilder(JanApiRouteConfiguration.models)
-  const modelId = request.body.model
-
-  const matchedModels = modelList.filter((model: Model) => model.id === modelId)
-  if (matchedModels.length === 0) {
-    const error = {
-      error: {
-        message: `The model ${request.body.model} does not exist`,
-        type: 'invalid_request_error',
-        param: null,
-        code: 'model_not_found',
-      },
-    }
-    reply.code(404).send(error)
-    return
-  }
-
-  const requestedModel = matchedModels[0]
-
-  const engineConfiguration = await getEngineConfiguration(requestedModel.engine)
-
-  let apiKey: string | undefined = undefined
-  let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL
-
-  if (engineConfiguration) {
-    apiKey = engineConfiguration.api_key
-    apiUrl = engineConfiguration.full_url ?? DEFAULT_CHAT_COMPLETION_URL
-  }
-
+/**
+ * Proxy /models to cortex
+ * @param request
+ * @param reply
+ */
+export const models = async (request: any, reply: any) => {
+  const fetch = require('node-fetch')
  const headers: Record<string, any> = {
    'Content-Type': 'application/json',
  }

-  if (apiKey) {
-    headers['Authorization'] = `Bearer ${apiKey}`
-    headers['api-key'] = apiKey
-  }
+  const response = await fetch(`${CORTEX_API_URL}/models`, {
+    method: request.method,
+    headers: headers,
+    body: JSON.stringify(request.body),
+  })

-  if (requestedModel.engine === 'openai' && request.body.stop) {
-    // openai only allows max 4 stop words
-    request.body.stop = request.body.stop.slice(0, 4)
+  if (response.status !== 200) {
+    // Forward the error response to client via reply
+    const responseBody = await response.text()
+    const responseHeaders = Object.fromEntries(response.headers)
+    reply.code(response.status).headers(responseHeaders).send(responseBody)
+  } else {
+    reply.raw.writeHead(200, {
+      'Content-Type': 'application/json',
+      'Cache-Control': 'no-cache',
+      'Connection': 'keep-alive',
+      'Access-Control-Allow-Origin': '*',
+    })
+    response.body.pipe(reply.raw)
+  }
+}
+
+/**
+ * Proxy chat completions
+ * @param request
+ * @param reply
+ */
+export const chatCompletions = async (request: any, reply: any) => {
+  const headers: Record<string, any> = {
+    'Content-Type': 'application/json',
  }

  // add engine for new cortex cpp engine
-  if (requestedModel.engine === 'nitro') {
-    request.body.engine = 'cortex.llamacpp'
+  if (request.body.engine === InferenceEngine.nitro) {
+    request.body.engine = InferenceEngine.cortex_llamacpp
  }

  const fetch = require('node-fetch')
-  const response = await fetch(apiUrl, {
+  const response = await fetch(`${CORTEX_API_URL}/chat/completions`, {
    method: 'POST',
    headers: headers,
    body: JSON.stringify(request.body),
--- a/core/src/node/api/restful/helper/consts.test.ts
+++ b/core/src/node/api/restful/helper/consts.test.ts
@ -1,6 +1,5 @@
+import { CORTEX_DEFAULT_PORT } from './consts'

-import { NITRO_DEFAULT_PORT } from './consts';
-
-it('should test NITRO_DEFAULT_PORT', () => {
-  expect(NITRO_DEFAULT_PORT).toBe(3928);
-});
+it('should test CORTEX_DEFAULT_PORT', () => {
+  expect(CORTEX_DEFAULT_PORT).toBe(39291)
+})
--- a/core/src/node/api/restful/helper/consts.ts
+++ b/core/src/node/api/restful/helper/consts.ts
@ -1,19 +1,7 @@
-// The PORT to use for the Nitro subprocess
-export const NITRO_DEFAULT_PORT = 3928
+export const CORTEX_DEFAULT_PORT = 39291

-// The HOST address to use for the Nitro subprocess
 export const LOCAL_HOST = '127.0.0.1'

 export const SUPPORTED_MODEL_FORMAT = '.gguf'

-// The URL for the Nitro subprocess
-const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
-// The URL for the Nitro subprocess to load a model
-export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
-// The URL for the Nitro subprocess to validate a model
-export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
-
-// The URL for the Nitro subprocess to kill itself
-export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
-
-export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
+export const CORTEX_API_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1`
--- a/core/src/node/api/restful/helper/startStopModel.test.ts
+++ b/core/src/node/api/restful/helper/startStopModel.test.ts
@ -1,16 +1,10 @@
+import { startModel } from './startStopModel'

+describe('startModel', () => {
+  it('test_startModel_error', async () => {
+    const modelId = 'testModelId'
+    const settingParams = undefined

-  import { startModel } from './startStopModel'
-  
-  describe('startModel', () => {
-    it('test_startModel_error', async () => {
-      const modelId = 'testModelId'
-      const settingParams = undefined
-  
-      const result = await startModel(modelId, settingParams)
-  
-      expect(result).toEqual({
-        error: expect.any(Error),
-      })
-    })
+    expect(startModel(modelId, settingParams)).resolves.toThrow()
  })
+})
--- a/core/src/node/api/restful/helper/startStopModel.ts
+++ b/core/src/node/api/restful/helper/startStopModel.ts
@ -1,6 +1,5 @@
-import { join } from 'path'
-import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper'
 import { ModelSettingParams } from '../../../../types'
+import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'

 /**
 * Start a model
@ -9,70 +8,18 @@ import { ModelSettingParams } from '../../../../types'
 * @returns
 */
 export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
-  try {
-    await runModel(modelId, settingParams)
-
-    return {
-      message: `Model ${modelId} started`,
-    }
-  } catch (e) {
-    return {
-      error: e,
-    }
-  }
+  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
+    method: 'POST',
+    body: JSON.stringify({ model: modelId, ...settingParams }),
+  })
 }

-/**
- * Run a model using installed cortex extension
- * @param model
- * @param settingParams
- */
-const runModel = async (model: string, settingParams?: ModelSettingParams): Promise<void> => {
-  const janDataFolderPath = getJanDataFolderPath()
-  const modelFolder = join(janDataFolderPath, 'models', model)
-  let module = join(
-    getJanExtensionsPath(),
-    '@janhq',
-    'inference-cortex-extension',
-    'dist',
-    'node',
-    'index.cjs'
-  )
-  // Just reuse the cortex extension implementation, don't duplicate then lost of sync
-  return import(module).then((extension) =>
-    extension
-      .loadModel(
-        {
-          modelFolder,
-          model,
-        },
-        settingParams
-      )
-      .then(() => log(`[SERVER]::Debug: Model is loaded`))
-      .then({
-        message: 'Model started',
-      })
-  )
-}
 /*
- * Stop model and kill nitro process.
+ * Stop model.
 */
-export const stopModel = async (_modelId: string) => {
-  let module = join(
-    getJanExtensionsPath(),
-    '@janhq',
-    'inference-cortex-extension',
-    'dist',
-    'node',
-    'index.cjs'
-  )
-  // Just reuse the cortex extension implementation, don't duplicate then lost of sync
-  return import(module).then((extension) =>
-    extension
-      .unloadModel()
-      .then(() => log(`[SERVER]::Debug: Model is unloaded`))
-      .then({
-        message: 'Model stopped',
-      })
-  )
+export const stopModel = async (modelId: string) => {
+  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
+    method: 'POST',
+    body: JSON.stringify({ model: modelId }),
+  })
 }
--- a/core/src/node/helper/path.ts
+++ b/core/src/node/helper/path.ts
@ -34,18 +34,4 @@ export function appResourcePath() {

  // server
  return join(global.core.appPath(), '../../..')
-}
-
-export function validatePath(path: string) {
-  const appDataFolderPath = getJanDataFolderPath()
-  const resourcePath = appResourcePath()
-  const applicationSupportPath = global.core?.appPath() ?? resourcePath
-  const absolutePath = resolve(__dirname, path)
-  if (
-    ![appDataFolderPath, resourcePath, applicationSupportPath].some((whiteListedPath) =>
-      absolutePath.startsWith(whiteListedPath)
-    )
-  ) {
-    throw new Error(`Invalid path: ${absolutePath}`)
-  }
-}
+}
--- a/core/src/types/api/index.ts
+++ b/core/src/types/api/index.ts
@ -72,6 +72,8 @@ export enum DownloadEvent {
  onFileDownloadUpdate = 'onFileDownloadUpdate',
  onFileDownloadError = 'onFileDownloadError',
  onFileDownloadSuccess = 'onFileDownloadSuccess',
+  onFileDownloadStopped = 'onFileDownloadStopped',
+  onFileDownloadStarted = 'onFileDownloadStarted',
  onFileUnzipSuccess = 'onFileUnzipSuccess',
 }

--- a/core/src/types/file/index.ts
+++ b/core/src/types/file/index.ts
@ -6,8 +6,8 @@ export type FileStat = {
 export type DownloadState = {
  modelId: string // TODO: change to download id
  fileName: string
-  time: DownloadTime
-  speed: number
+  time?: DownloadTime
+  speed?: number

  percent: number
  size: DownloadSize
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -6,8 +6,8 @@ import { FileMetadata } from '../file'
 */
 export type ModelInfo = {
  id: string
-  settings: ModelSettingParams
-  parameters: ModelRuntimeParams
+  settings?: ModelSettingParams
+  parameters?: ModelRuntimeParams
  engine?: InferenceEngine
 }

@ -15,7 +15,6 @@ export type ModelInfo = {
 * Represents the inference engine.
 * @stored
 */
-
 export enum InferenceEngine {
  anthropic = 'anthropic',
  mistral = 'mistral',
@ -28,11 +27,13 @@ export enum InferenceEngine {
  nitro_tensorrt_llm = 'nitro-tensorrt-llm',
  cohere = 'cohere',
  nvidia = 'nvidia',
-  cortex_llamacpp = 'cortex.llamacpp',
-  cortex_onnx = 'cortex.onnx',
-  cortex_tensorrtllm = 'cortex.tensorrt-llm',
+  cortex = 'cortex',
+  cortex_llamacpp = 'llama-cpp',
+  cortex_onnx = 'onnxruntime',
+  cortex_tensorrtllm = 'tensorrt-llm',
 }

+// Represents an artifact of a model, including its filename and URL
 export type ModelArtifact = {
  filename: string
  url: string
@ -104,6 +105,7 @@ export type Model = {
  engine: InferenceEngine
 }

+// Represents metadata associated with a model
 export type ModelMetadata = {
  author: string
  tags: string[]
@ -124,14 +126,20 @@ export type ModelSettingParams = {
  n_parallel?: number
  cpu_threads?: number
  prompt_template?: string
+  pre_prompt?: string
  system_prompt?: string
  ai_prompt?: string
  user_prompt?: string
+  // path param
+  model_path?: string
+  // legacy path param
  llama_model_path?: string
+  // clip model path
  mmproj?: string
  cont_batching?: boolean
  vision_model?: boolean
  text_model?: boolean
+  engine?: boolean
 }

 /**
@ -150,11 +158,12 @@ export type ModelRuntimeParams = {
  engine?: string
 }

+// Represents a model that failed to initialize, including the error
 export type ModelInitFailed = Model & {
  error: Error
 }

 /**
- * ModelFile is the model.json entity and it's file metadata
+ * ModelParams types
 */
-export type ModelFile = Model & FileMetadata
+export type ModelParams = ModelRuntimeParams | ModelSettingParams
--- a/core/src/types/model/modelImport.ts
+++ b/core/src/types/model/modelImport.ts
@ -1,4 +1,4 @@
-export type OptionType = 'SYMLINK' | 'MOVE_BINARY_FILE'
+export type OptionType = 'symlink' | 'copy'

 export type ModelImportOption = {
  type: OptionType
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@ -1,5 +1,5 @@
-import { GpuSetting } from '../miscellaneous'
-import { Model, ModelFile } from './modelEntity'
+import { Model } from './modelEntity'
+import { OptionType } from './modelImport'

 /**
 * Model extension for managing models.
@ -8,38 +8,46 @@ export interface ModelInterface {
  /**
   * Downloads a model.
   * @param model - The model to download.
-   * @param network - Optional object to specify proxy/whether to ignore SSL certificates.
   * @returns A Promise that resolves when the model has been downloaded.
   */
-  downloadModel(
-    model: ModelFile,
-    gpuSettings?: GpuSetting,
-    network?: { ignoreSSL?: boolean; proxy?: string }
-  ): Promise<void>
+  pullModel(model: string, id?: string, name?: string): Promise<void>

  /**
   * Cancels the download of a specific model.
   * @param {string} modelId - The ID of the model to cancel the download for.
   * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
   */
-  cancelModelDownload(modelId: string): Promise<void>
+  cancelModelPull(model: string): Promise<void>

  /**
   * Deletes a model.
   * @param modelId - The ID of the model to delete.
   * @returns A Promise that resolves when the model has been deleted.
   */
-  deleteModel(model: ModelFile): Promise<void>
+  deleteModel(model: string): Promise<void>

  /**
-   * Gets a list of downloaded models.
+   * Gets downloaded models.
   * @returns A Promise that resolves with an array of downloaded models.
   */
-  getDownloadedModels(): Promise<ModelFile[]>
+  getModels(): Promise<Model[]>

  /**
-   * Gets a list of configured models.
-   * @returns A Promise that resolves with an array of configured models.
+   * Update a pulled model's metadata
+   * @param model - The model to update.
+   * @returns A Promise that resolves when the model has been updated.
   */
-  getConfiguredModels(): Promise<ModelFile[]>
+  updateModel(model: Partial<Model>): Promise<Model>
+
+  /**
+   * Import an existing model file.
+   * @param model id of the model to import
+   * @param modelPath - path of the model file
+   */
+  importModel(
+    model: string,
+    modePath: string,
+    name?: string,
+    optionType?: OptionType
+  ): Promise<void>
 }
--- a/core/src/types/monitoring/index.test.ts
+++ b/core/src/types/monitoring/index.test.ts
@ -1,16 +1,13 @@
+import * as monitoringInterface from './monitoringInterface'
+import * as resourceInfo from './resourceInfo'

-import * as monitoringInterface from './monitoringInterface';
-import * as resourceInfo from './resourceInfo';
+import * as index from './index'

-    import * as index from './index';
-    import * as monitoringInterface from './monitoringInterface';
-    import * as resourceInfo from './resourceInfo';
-    
-    it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
-      for (const key in monitoringInterface) {
-        expect(index[key]).toBe(monitoringInterface[key]);
-      }
-      for (const key in resourceInfo) {
-        expect(index[key]).toBe(resourceInfo[key]);
-      }
-    });
+it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
+  for (const key in monitoringInterface) {
+    expect(index[key]).toBe(monitoringInterface[key])
+  }
+  for (const key in resourceInfo) {
+    expect(index[key]).toBe(resourceInfo[key])
+  }
+})
--- a/core/tsconfig.json
+++ b/core/tsconfig.json
@ -1,7 +1,7 @@
 {
  "compilerOptions": {
    "moduleResolution": "node",
-    "target": "es5",
+    "target": "ES2015",
    "module": "ES2020",
    "lib": ["es2015", "es2016", "es2017", "dom"],
    "strict": true,
@ -13,7 +13,7 @@
    "declarationDir": "dist/types",
    "outDir": "dist/lib",
    "importHelpers": true,
-    "types": ["@types/jest"],
+    "types": ["@types/jest"]
  },
  "include": ["src"],
  "exclude": ["**/*.test.ts"]
--- a/docs/public/assets/images/changelog/jan-v0.5.5.jpeg
+++ b/docs/public/assets/images/changelog/jan-v0.5.5.jpeg
--- a/docs/public/assets/images/changelog/jan-v0.5.7.gif
+++ b/docs/public/assets/images/changelog/jan-v0.5.7.gif
--- a/docs/src/pages/changelog/2024-02-10-jan-is-more-stable.mdx
+++ b/docs/src/pages/changelog/2024-02-10-jan-is-more-stable.mdx
@ -0,0 +1,27 @@
+---
+title: "Jan is more stable 👋"
+version: 0.5.5
+description: "Jan supports Llama 3.2 and Qwen 2.5"
+date: 2024-10-02
+ogImage: "/assets/images/changelog/jan-v0.5.5.jpeg"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+<ChangelogHeader title= "Jan is faster now" date="2024-09-01" ogImage= "/assets/images/changelog/jan-v0.5.5.jpeg"  />
+
+Highlights 🎉
+
+- Meta's Llama 3.2 and Alibaba's Qwen 2.5 added to the hub
+- Improved starter screen
+- Better local vs. cloud model navigation
+
+Fixes 💫
+
+- Solved GPU acceleration for GGUF models
+- Improved model caching & threading
+- Resolved input & toolbar overlaps
+
+Update your product or download the latest: https://jan.ai
+
+For more details, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.5.5).
--- a/docs/src/pages/changelog/2024-10-24-jan-stable.mdx
+++ b/docs/src/pages/changelog/2024-10-24-jan-stable.mdx
@ -0,0 +1,26 @@
+---
+title: "Jan has Stable, Beta and Nightly versions"
+version: 0.5.7
+description: "This release is mostly focused on bug fixes."
+date: 2024-10-24
+ogImage: "/assets/images/changelog/jan-v0.5.7.gif"
+---
+
+import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
+
+<ChangelogHeader title= "Jan is faster now" date="2024-09-01" ogImage= "/assets/images/changelog/jan-v0.5.7.gif"  />
+
+Highlights 🎉
+
+- Jan has Stable, Beta and Nightly versions
+- Saving instructions for new threads is now stable
+
+Fixes 💫
+
+- Fixed broken links, hardware issues, and multi-modal download
+- Resolved text overlap, scrolling, and multi-monitor reset problems
+- Adjusted LLava model EOS token and context input
+
+Update your product or download the latest: https://jan.ai
+
+For more details, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.5.7).
--- a/docs/src/pages/docs/_assets/jan-app.png
+++ b/docs/src/pages/docs/_assets/jan-app.png
--- a/docs/src/pages/docs/_assets/jan-display.png
+++ b/docs/src/pages/docs/_assets/jan-display.png
--- a/docs/src/pages/docs/index.mdx
+++ b/docs/src/pages/docs/index.mdx
@ -22,7 +22,7 @@ import FAQBox from '@/components/FaqBox'

 # Jan

-![Jan's Cover Image](./_assets/jan-display.png)
+![Jan's Cover Image](./_assets/jan-app.png)


 Jan is a ChatGPT-alternative that runs 100% offline on your [Desktop](/docs/desktop-installation). Our goal is to make it easy for a layperson[^1] to download and run LLMs and use AI with full control and [privacy](https://www.reuters.com/legal/legalindustry/privacy-paradox-with-ai-2023-10-31/).
--- a/electron/package.json
+++ b/electron/package.json
@ -18,7 +18,8 @@
      "docs/**/*",
      "scripts/**/*",
      "icons/**/*",
-      "themes"
+      "themes",
+      "shared"
    ],
    "asarUnpack": [
      "pre-install",
@ -26,7 +27,8 @@
      "docs",
      "scripts",
      "icons",
-      "themes"
+      "themes",
+      "shared"
    ],
    "publish": [
      {
@ -111,7 +113,7 @@
    "@kirillvakalov/nut-tree__nut-js": "4.2.1-2"
  },
  "devDependencies": {
-    "@electron/notarize": "^2.1.0",
+    "@electron/notarize": "^2.5.0",
    "@playwright/test": "^1.38.1",
    "@types/npmcli__arborist": "^5.6.4",
    "@types/pacote": "^11.1.7",
--- a/electron/shared/.gitkeep
+++ b/electron/shared/.gitkeep
--- a/electron/tests/config/fixtures.ts
+++ b/electron/tests/config/fixtures.ts
@ -15,6 +15,8 @@ import {
 import { Constants } from './constants'
 import { HubPage } from '../pages/hubPage'
 import { CommonActions } from '../pages/commonActions'
+import { rmSync } from 'fs'
+import * as path from 'path'

 export let electronApp: ElectronApplication
 export let page: Page
@ -103,10 +105,14 @@ export const test = base.extend<
    },
    { auto: true },
  ],
-  
 })

 test.beforeAll(async () => {
+  await rmSync(path.join(__dirname, '../../test-data'), {
+    recursive: true,
+    force: true,
+  })
+
  test.setTimeout(TIMEOUT)
  await setupElectron()
  await page.waitForSelector('img[alt="Jan - Logo"]', {
--- a/electron/tests/e2e/hub.e2e.spec.ts
+++ b/electron/tests/e2e/hub.e2e.spec.ts
@ -16,7 +16,8 @@ test.beforeAll(async () => {
 test('explores hub', async ({ hubPage }) => {
  await hubPage.navigateByMenu()
  await hubPage.verifyContainerVisible()
-  const useModelBtn= page.getByTestId(/^use-model-btn-.*/).first()
+  await hubPage.scrollToBottom()
+  const useModelBtn = page.getByTestId(/^use-model-btn-.*/).first()

  await expect(useModelBtn).toBeVisible({
    timeout: TIMEOUT,
--- a/electron/tests/pages/basePage.ts
+++ b/electron/tests/pages/basePage.ts
@ -8,9 +8,8 @@ export class BasePage {
  constructor(
    protected readonly page: Page,
    readonly action: CommonActions,
-    protected containerId: string,
-  ) {
-  }
+    protected containerId: string
+  ) {}

  public getValue(key: string) {
    return this.action.getValue(key)
@ -37,6 +36,12 @@ export class BasePage {
    expect(container.isVisible()).toBeTruthy()
  }

+  async scrollToBottom() {
+    await this.page.evaluate(() => {
+      window.scrollTo(0, document.body.scrollHeight)
+    })
+  }
+
  async waitUpdateLoader() {
    await this.isElementVisible('img[alt="Jan - Logo"]')
  }
--- a/electron/utils/migration.ts
+++ b/electron/utils/migration.ts
@ -47,9 +47,6 @@ async function migrateThemes() {
  const themes = readdirSync(join(appResourcePath(), 'themes'))
  for (const theme of themes) {
    const themePath = join(appResourcePath(), 'themes', theme)
-    if (existsSync(themePath) && !lstatSync(themePath).isDirectory()) {
-      continue
-    }
    await checkAndMigrateTheme(theme, themePath)
  }
 }
@ -64,21 +61,14 @@ async function checkAndMigrateTheme(
  )
  if (existingTheme) {
    const desTheme = join(janDataThemesFolder, existingTheme)
-    if (!existsSync(desTheme) || !lstatSync(desTheme).isDirectory()) return
-
-    const desThemeData = JSON.parse(
-      readFileSync(join(desTheme, 'theme.json'), 'utf-8')
-    )
-    const sourceThemeData = JSON.parse(
-      readFileSync(join(sourceThemePath, 'theme.json'), 'utf-8')
-    )
-    if (desThemeData.version !== sourceThemeData.version) {
-      console.debug('Updating theme', existingTheme)
-      rmdirSync(desTheme, { recursive: true })
-      cpSync(sourceThemePath, join(janDataThemesFolder, sourceThemeName), {
-        recursive: true,
-      })
+    if (!lstatSync(desTheme).isDirectory()) {
+      return
    }
+    console.debug('Updating theme', existingTheme)
+    rmdirSync(desTheme, { recursive: true })
+    cpSync(sourceThemePath, join(janDataThemesFolder, sourceThemeName), {
+      recursive: true,
+    })
  } else {
    console.debug('Adding new theme', sourceThemeName)
    cpSync(sourceThemePath, join(janDataThemesFolder, sourceThemeName), {
--- a/extensions/assistant-extension/src/index.ts
+++ b/extensions/assistant-extension/src/index.ts
@ -63,39 +63,46 @@ export default class JanAssistantExtension extends AssistantExtension {
  }

  async getAssistants(): Promise<Assistant[]> {
-    // get all the assistant directories
-    // get all the assistant metadata json
-    const results: Assistant[] = []
-    const allFileName: string[] = await fs.readdirSync(
-      JanAssistantExtension._homeDir
-    )
-    for (const fileName of allFileName) {
-      const filePath = await joinPath([
-        JanAssistantExtension._homeDir,
-        fileName,
-      ])
+    try {
+      // get all the assistant directories
+      // get all the assistant metadata json
+      const results: Assistant[] = []

-      if (!(await fs.fileStat(filePath))?.isDirectory) continue
-      const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter(
-        (file: string) => file === 'assistant.json'
+      const allFileName: string[] = await fs.readdirSync(
+        JanAssistantExtension._homeDir
      )

-      if (jsonFiles.length !== 1) {
-        // has more than one assistant file -> ignore
-        continue
+      for (const fileName of allFileName) {
+        const filePath = await joinPath([
+          JanAssistantExtension._homeDir,
+          fileName,
+        ])
+
+        if (!(await fs.fileStat(filePath))?.isDirectory) continue
+        const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter(
+          (file: string) => file === 'assistant.json'
+        )
+
+        if (jsonFiles.length !== 1) {
+          // has more than one assistant file -> ignore
+          continue
+        }
+
+        const content = await fs.readFileSync(
+          await joinPath([filePath, jsonFiles[0]]),
+          'utf-8'
+        )
+        const assistant: Assistant =
+          typeof content === 'object' ? content : JSON.parse(content)
+
+        results.push(assistant)
      }

-      const content = await fs.readFileSync(
-        await joinPath([filePath, jsonFiles[0]]),
-        'utf-8'
-      )
-      const assistant: Assistant =
-        typeof content === 'object' ? content : JSON.parse(content)
-
-      results.push(assistant)
+      return results
+    } catch (err) {
+      console.debug(err)
+      return [this.defaultAssistant]
    }
-
-    return results
  }

  async deleteAssistant(assistant: Assistant): Promise<void> {
@ -112,39 +119,39 @@ export default class JanAssistantExtension extends AssistantExtension {
  }

  private async createJanAssistant(): Promise<void> {
-    const janAssistant: Assistant = {
-      avatar: '',
-      thread_location: undefined,
-      id: 'jan',
-      object: 'assistant',
-      created_at: Date.now(),
-      name: 'Jan',
-      description: 'A default assistant that can use all downloaded models',
-      model: '*',
-      instructions: '',
-      tools: [
-        {
-          type: 'retrieval',
-          enabled: false,
-          useTimeWeightedRetriever: false,
-          settings: {
-            top_k: 2,
-            chunk_size: 1024,
-            chunk_overlap: 64,
-            retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+    await this.createAssistant(this.defaultAssistant)
+  }
+
+  private defaultAssistant: Assistant = {
+    avatar: '',
+    thread_location: undefined,
+    id: 'jan',
+    object: 'assistant',
+    created_at: Date.now(),
+    name: 'Jan',
+    description: 'A default assistant that can use all downloaded models',
+    model: '*',
+    instructions: '',
+    tools: [
+      {
+        type: 'retrieval',
+        enabled: false,
+        useTimeWeightedRetriever: false,
+        settings: {
+          top_k: 2,
+          chunk_size: 1024,
+          chunk_overlap: 64,
+          retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
 ----------------
 CONTEXT: {CONTEXT}
 ----------------
 QUESTION: {QUESTION}
 ----------------
 Helpful Answer:`,
-          },
        },
-      ],
-      file_ids: [],
-      metadata: undefined,
-    }
-
-    await this.createAssistant(janAssistant)
+      },
+    ],
+    file_ids: [],
+    metadata: undefined,
  }
 }
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@ -10,8 +10,6 @@ import { HNSWLib } from 'langchain/vectorstores/hnswlib'
 import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
 import { readEmbeddingEngine } from './engine'

-import path from 'path'
-
 export class Retrieval {
  public chunkSize: number = 100
  public chunkOverlap?: number = 0
--- a/extensions/assistant-extension/tsconfig.json
+++ b/extensions/assistant-extension/tsconfig.json
@ -1,7 +1,7 @@
 {
  "compilerOptions": {
    "moduleResolution": "node",
-    "target": "es5",
+    "target": "ES2015",
    "module": "ES2020",
    "lib": ["es2015", "es2016", "es2017", "dom"],
    "strict": true,
--- a/extensions/inference-cortex-extension/.gitignore
+++ b/extensions/inference-cortex-extension/.gitignore
--- a/extensions/inference-cortex-extension/README.md
+++ b/extensions/inference-cortex-extension/README.md
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@ -0,0 +1 @@
+1.0.2
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@ -0,0 +1,41 @@
+@echo off
+set BIN_PATH=./bin
+set SHARED_PATH=./../../electron/shared
+set /p CORTEX_VERSION=<./bin/version.txt
+
+@REM Download cortex.llamacpp binaries
+set VERSION=v0.1.35
+set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
+set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
+set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
+
+call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
+call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
+call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
+
+move %BIN_PATH%\cortex-server-beta.exe %BIN_PATH%\cortex-server.exe
+del %BIN_PATH%\cortex-beta.exe
+del %BIN_PATH%\cortex.exe
+
+@REM Loop through each folder and move DLLs (excluding engine.dll)
+for %%F in (%SUBFOLDERS%) do (
+    echo Processing folder: %BIN_PATH%\%%F
+
+    @REM Move all .dll files except engine.dll
+    for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
+        if /I not "%%~nxD"=="engine.dll" (
+            move "%%D" "%BIN_PATH%"
+        )
+    )
+)
+
+echo DLL files moved successfully.
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Read CORTEX_VERSION
+CORTEX_VERSION=$(cat ./bin/version.txt)
+CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
+CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
+# Detect platform
+OS_TYPE=$(uname)
+
+if [ "$OS_TYPE" == "Linux" ]; then
+    # Linux downloads
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
+    mv ./bin/cortex-server-beta ./bin/cortex-server
+    rm -rf ./bin/cortex
+    rm -rf ./bin/cortex-beta
+    chmod +x "./bin/cortex-server"
+
+    # Download engines for Linux
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+
+elif [ "$OS_TYPE" == "Darwin" ]; then
+    # macOS downloads
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1
+    mv ./bin/cortex-server-beta ./bin/cortex-server
+    rm -rf ./bin/cortex
+    rm -rf ./bin/cortex-beta
+    chmod +x "./bin/cortex-server"
+
+    # Download engines for macOS
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
+
+else
+    echo "Unsupported operating system: $OS_TYPE"
+    exit 1
+fi
--- a/extensions/inference-cortex-extension/jest.config.js
+++ b/extensions/inference-cortex-extension/jest.config.js
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/inference-cortex-extension",
  "productName": "Cortex Inference Engine",
-  "version": "1.0.20",
+  "version": "1.0.21",
  "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
@ -10,12 +10,12 @@
  "scripts": {
    "test": "jest",
    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux:darwin": "./download.sh",
-    "downloadnitro:win32": "download.bat",
-    "downloadnitro": "run-script-os",
-    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish": "yarn test && run-script-os"
+    "downloadcortex:linux:darwin": "./download.sh",
+    "downloadcortex:win32": "download.bat",
+    "downloadcortex": "run-script-os",
+    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish": "run-script-os"
  },
  "exports": {
    ".": "./dist/index.js",
@ -50,6 +50,8 @@
    "cpu-instructions": "^0.0.13",
    "decompress": "^4.2.1",
    "fetch-retry": "^5.0.6",
+    "ky": "^1.7.2",
+    "p-queue": "^8.0.1",
    "rxjs": "^7.8.1",
    "tcp-port-used": "^1.0.2",
    "terminate": "2.6.1",
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
--- a/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
@ -31,5 +31,5 @@
      "tags": ["34B", "Finetuned"],
      "size": 21556982144
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
@ -31,5 +31,5 @@
      "tags": ["7B", "Finetuned"],
      "size": 5056982144
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
+++ b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
@ -31,5 +31,5 @@
    "tags": ["Vision"],
    "size": 5750000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
@ -30,5 +30,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
@ -31,6 +31,6 @@
      "tags": ["22B", "Finetuned", "Featured"],
      "size": 13341237440
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
@ -31,6 +31,6 @@
      "tags": ["34B", "Finetuned"],
      "size": 21500000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
@ -31,5 +31,5 @@
    "tags": ["Tiny"],
    "size": 1430000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
@ -31,5 +31,5 @@
    "tags": ["33B"],
    "size": 19940000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
@ -31,5 +31,5 @@
    "tags": ["2B", "Finetuned", "Tiny"],
    "size": 1630000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 5330000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
@ -37,5 +37,5 @@
    ],
    "size": 16600000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
@ -38,5 +38,5 @@
    ],
    "size": 1710000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
@ -37,5 +37,5 @@
    ],
    "size": 5760000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
@ -31,5 +31,5 @@
    "tags": ["70B", "Foundational Model"],
    "size": 43920000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Foundational Model"],
    "size": 4080000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
@ -31,5 +31,5 @@
      "tags": ["8B"],
      "size": 4920000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
@ -34,5 +34,5 @@
      ],
      "size": 4920000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
@ -37,5 +37,5 @@
    ],
    "size": 42500000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
@ -37,5 +37,5 @@
    ],
    "size": 4920000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
@ -31,5 +31,5 @@
    "tags": ["1B", "Featured"],
    "size": 1320000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
@ -31,5 +31,5 @@
    "tags": ["3B", "Featured"],
    "size": 3420000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
@ -34,5 +34,5 @@
      ],
      "size": 1170000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
@ -32,5 +32,5 @@
    "tags": ["Vision"],
    "size": 7870000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
@ -32,5 +32,5 @@
    "tags": ["Vision"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
@ -32,5 +32,5 @@
    "size": 4370000000,
    "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png"
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
@ -30,5 +30,5 @@
    "tags": ["70B", "Foundational Model"],
    "size": 26440000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/Show More
+++ b/Show More