Merge pull request #4071 from janhq/dev

Release cut 0.5.9 - sync dev into main
2024-11-21 15:39:54 +07:00 · 2024-11-21 15:39:54 +07:00 · 7de6800c54
commit 7de6800c54
parent 54e8421953 b002069284
64 changed files with 956 additions and 587 deletions
--- a/.github/workflows/jan-electron-build-nightly.yml
+++ b/.github/workflows/jan-electron-build-nightly.yml
@ -114,15 +114,14 @@ jobs:
      - name: Upload latest-mac.yml
        if: ${{ needs.set-public-provider.outputs.public_provider == 'aws-s3' }}
        run: |
-          aws s3 cp ./latest-mac.yml "s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/temp-latest/latest-mac.yml"
+          aws s3 cp ./latest-mac.yml "s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/temp-nightly/latest-mac.yml"
-          aws s3 sync s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/temp-latest/ s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/latest/
+          aws s3 sync s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/temp-nightly/ s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/nightly/
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.DELTA_AWS_SECRET_ACCESS_KEY }}
          AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }}
          AWS_EC2_METADATA_DISABLED: "true"
  noti-discord-nightly-and-update-url-readme:
    needs: [build-macos-x64, build-macos-arm64, build-windows-x64, build-linux-x64, get-update-version, set-public-provider, combine-latest-mac-yml]
    secrets: inherit
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@ -60,16 +60,16 @@ jobs:
          mv /tmp/package.json electron/package.json
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
          mv /tmp/package.json web/package.json
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/nightly", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-nightly", "channel": "latest"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          cat electron/package.json
-          # chmod +x .github/scripts/rename-app.sh
+          chmod +x .github/scripts/rename-app.sh
-          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          .github/scripts/rename-app.sh ./electron/package.json nightly
-          # chmod +x .github/scripts/rename-workspace.sh
+          chmod +x .github/scripts/rename-workspace.sh
-          # .github/scripts/rename-workspace.sh ./package.json nightly
+          .github/scripts/rename-workspace.sh ./package.json nightly
-          # echo "------------------------"
+          echo "------------------------"
-          # cat ./electron/package.json
+          cat ./electron/package.json
-          # echo "------------------------"
+          echo "------------------------"
      - name: Change App Name for beta version
        if: inputs.beta == true
--- a/.github/workflows/template-build-macos-arm64.yml
+++ b/.github/workflows/template-build-macos-arm64.yml
@ -72,20 +72,20 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
          mv /tmp/package.json web/package.json
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/nightly", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-nightly", "channel": "latest"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          # cat electron/package.json
-          # chmod +x .github/scripts/rename-app.sh
+          chmod +x .github/scripts/rename-app.sh
-          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          .github/scripts/rename-app.sh ./electron/package.json nightly
-          # chmod +x .github/scripts/rename-workspace.sh
+          chmod +x .github/scripts/rename-workspace.sh
-          # .github/scripts/rename-workspace.sh ./package.json nightly
+          .github/scripts/rename-workspace.sh ./package.json nightly
-          # echo "------------------------"
+          echo "------------------------"
-          # cat ./electron/package.json
+          cat ./electron/package.json
-          # echo "------------------------"
+          echo "------------------------"
      - name: Change App Name for beta version
        if: inputs.beta == true
--- a/.github/workflows/template-build-macos-x64.yml
+++ b/.github/workflows/template-build-macos-x64.yml
@ -72,20 +72,20 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
          mv /tmp/package.json web/package.json
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/nightly", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-nightly", "channel": "latest"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
-          # cat electron/package.json
+          cat electron/package.json
-          # chmod +x .github/scripts/rename-app.sh
+          chmod +x .github/scripts/rename-app.sh
-          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          .github/scripts/rename-app.sh ./electron/package.json nightly
-          # chmod +x .github/scripts/rename-workspace.sh
+          chmod +x .github/scripts/rename-workspace.sh
-          # .github/scripts/rename-workspace.sh ./package.json nightly
+          .github/scripts/rename-workspace.sh ./package.json nightly
-          # echo "------------------------"
+          echo "------------------------"
-          # cat ./electron/package.json
+          cat ./electron/package.json
-          # echo "------------------------"
+          echo "------------------------"
      - name: Change App Name for beta version
        if: inputs.beta == true
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@ -73,24 +73,24 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
          mv /tmp/package.json web/package.json
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/nightly", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-nightly", "channel": "latest"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          jq '.build.win.sign = "./sign.js"' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          cat electron/package.json
-          # chmod +x .github/scripts/rename-app.sh
+          chmod +x .github/scripts/rename-app.sh
-          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          .github/scripts/rename-app.sh ./electron/package.json nightly
-          # chmod +x .github/scripts/rename-workspace.sh
+          chmod +x .github/scripts/rename-workspace.sh
-          # .github/scripts/rename-workspace.sh ./package.json nightly
+          .github/scripts/rename-workspace.sh ./package.json nightly
-          # chmod +x .github/scripts/rename-uninstaller.sh
+          chmod +x .github/scripts/rename-uninstaller.sh
-          # .github/scripts/rename-uninstaller.sh nightly
+          .github/scripts/rename-uninstaller.sh nightly
-          # echo "------------------------"
+          echo "------------------------"
-          # cat ./electron/package.json
+          cat ./electron/package.json
-          # echo "------------------------"
+          echo "------------------------"
-          # cat ./package.json
+          cat ./package.json
-          # echo "------------------------"
+          echo "------------------------"
      - name: Change App Name for beta version
        if: inputs.beta == true
--- a/.github/workflows/template-noti-discord-and-update-url-readme.yml
+++ b/.github/workflows/template-noti-discord-and-update-url-readme.yml
@ -47,11 +47,11 @@ jobs:
        with:
          args: |
            Jan App ${{ inputs.build_reason }} build artifact version {{ VERSION }}:
-            - Windows: https://delta.jan.ai/latest/jan-win-x64-{{ VERSION }}.exe
+            - Windows: https://delta.jan.ai/nightly/jan-nightly-win-x64-{{ VERSION }}.exe
-            - macOS Intel: https://delta.jan.ai/latest/jan-mac-x64-{{ VERSION }}.dmg
+            - macOS Intel: https://delta.jan.ai/nightly/jan-nightly-mac-x64-{{ VERSION }}.dmg
-            - macOS Apple Silicon: https://delta.jan.ai/latest/jan-mac-arm64-{{ VERSION }}.dmg
+            - macOS Apple Silicon: https://delta.jan.ai/nightly/jan-nightly-mac-arm64-{{ VERSION }}.dmg
-            - Linux Deb: https://delta.jan.ai/latest/jan-linux-amd64-{{ VERSION }}.deb
+            - Linux Deb: https://delta.jan.ai/nightly/jan-nightly-linux-amd64-{{ VERSION }}.deb
-            - Linux AppImage: https://delta.jan.ai/latest/jan-linux-x86_64-{{ VERSION }}.AppImage
+            - Linux AppImage: https://delta.jan.ai/nightly/jan-nightly-linux-x86_64-{{ VERSION }}.AppImage
            - Github action run: https://github.com/janhq/jan/actions/runs/{{ GITHUB_RUN_ID }}
        env:
          DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }}
--- a/.gitignore
+++ b/.gitignore
@ -47,3 +47,4 @@ coverage
 .yarnrc
 test_results.html
 *.tsbuildinfo
 electron/shared/**
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@ -113,7 +113,6 @@ export abstract class BaseExtension implements ExtensionType {
    for (const model of models) {
      ModelManager.instance().register(model)
    }
    events.emit(ModelEvent.OnModelsUpdate, {})
  }
  /**
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@ -38,14 +38,16 @@ export function requestInference(
            errorCode = ErrorCode.InvalidApiKey
          }
          const error = {
-            message: data.error?.message ?? 'Error occurred.',
+            message: data.error?.message ?? data.message ?? 'Error occurred.',
            code: errorCode,
          }
          subscriber.error(error)
          subscriber.complete()
          return
        }
-        if (model.parameters?.stream === false) {
+        // There could be overriden stream parameter in the model
        // that is set in request body (transformed payload)
        if (requestBody?.stream === false || model.parameters?.stream === false) {
          const data = await response.json()
          if (transformResponse) {
            subscriber.next(transformResponse(data))
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@ -12,6 +12,7 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
    return ExtensionTypeEnum.Model
  }
  abstract configurePullOptions(configs: { [key: string]: any }): Promise<any>
  abstract getModels(): Promise<Model[]>
  abstract pullModel(model: string, id?: string, name?: string): Promise<void>
  abstract cancelModelPull(modelId: string): Promise<void>
--- a/electron/managers/window.ts
+++ b/electron/managers/window.ts
@ -28,6 +28,7 @@ class WindowManager {
      ...mainWindowConfig,
      width: bounds.width,
      height: bounds.height,
      show: false,
      x: bounds.x,
      y: bounds.y,
      webPreferences: {
@ -78,6 +79,10 @@ class WindowManager {
        windowManager.hideMainWindow()
      }
    })
    windowManager.mainWindow?.on('ready-to-show', function () {
      windowManager.mainWindow?.show()
    })
  }
  createQuickAskWindow(preloadPath: string, startUrl: string): void {
--- a/extensions/assistant-extension/src/node/engine.ts
+++ b/extensions/assistant-extension/src/node/engine.ts
@ -1,38 +0,0 @@
 import fs from 'fs'
 import path from 'path'
 import { SettingComponentProps, getJanDataFolderPath } from '@janhq/core/node'
 // Sec: Do not send engine settings over requests
 // Read it manually instead
 export const readEmbeddingEngine = (engineName: string) => {
  if (engineName !== 'openai' && engineName !== 'groq') {
    const engineSettings = fs.readFileSync(
      path.join(getJanDataFolderPath(), 'engines', `${engineName}.json`),
      'utf-8'
    )
    return JSON.parse(engineSettings)
  } else {
    const settingDirectoryPath = path.join(
      getJanDataFolderPath(),
      'settings',
      '@janhq',
      // TODO: James - To be removed
      engineName === 'openai'
        ? 'inference-openai-extension'
        : 'inference-groq-extension',
      'settings.json'
    )
    const content = fs.readFileSync(settingDirectoryPath, 'utf-8')
    const settings: SettingComponentProps[] = JSON.parse(content)
    const apiKeyId = engineName === 'openai' ? 'openai-api-key' : 'groq-api-key'
    const keySetting = settings.find((setting) => setting.key === apiKeyId)
    let apiKey = keySetting?.controllerProps.value
    if (typeof apiKey !== 'string') apiKey = ''
    return {
      api_key: apiKey,
    }
  }
 }
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@ -8,7 +8,6 @@ import { MemoryVectorStore } from 'langchain/vectorstores/memory'
 import { HNSWLib } from 'langchain/vectorstores/hnswlib'
 import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
 import { readEmbeddingEngine } from './engine'
 export class Retrieval {
  public chunkSize: number = 100
@ -28,8 +27,8 @@ export class Retrieval {
    // declare time-weighted retriever and storage
    this.timeWeightedVectorStore = new MemoryVectorStore(
      new OpenAIEmbeddings(
-        { openAIApiKey: 'nitro-embedding' },
+        { openAIApiKey: 'cortex-embedding' },
-        { basePath: 'http://127.0.0.1:3928/v1' }
+        { basePath: 'http://127.0.0.1:39291/v1' }
      )
    )
    this.timeWeightedretriever = new TimeWeightedVectorStoreRetriever({
@ -49,21 +48,11 @@ export class Retrieval {
  }
  public updateEmbeddingEngine(model: string, engine: string): void {
-    // Engine settings are not compatible with the current embedding model params
+    this.embeddingModel = new OpenAIEmbeddings(
-    // Switch case manually for now
+      { openAIApiKey: 'cortex-embedding', model },
-    if (engine === 'nitro') {
+      // TODO: Raw settings
-      this.embeddingModel = new OpenAIEmbeddings(
+      { basePath: 'http://127.0.0.1:39291/v1' }
-        { openAIApiKey: 'nitro-embedding', model },
+    )
        // TODO: Raw settings
        { basePath: 'http://127.0.0.1:3928/v1' },
      )
    } else {
      // Fallback to OpenAI Settings
      const settings = readEmbeddingEngine(engine)
      this.embeddingModel = new OpenAIEmbeddings({
        openAIApiKey: settings.api_key,
      })
    }
    // update time-weighted embedding model
    this.timeWeightedVectorStore.embeddings = this.embeddingModel
--- a/extensions/inference-cohere-extension/src/index.ts
+++ b/extensions/inference-cohere-extension/src/index.ts
@ -113,6 +113,8 @@ export default class JanInferenceCohereExtension extends RemoteOAIEngine {
  }
  transformResponse = (data: any) => {
-    return typeof data === 'object' ? data.text : JSON.parse(data).text ?? ''
+    return typeof data === 'object'
      ? data.text
      : (JSON.parse(data.replace('data: ', '').trim()).text ?? '')
  }
 }
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@ -1 +1 @@
-1.0.2
+1.0.3-rc5
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@ -2,23 +2,24 @@
 set BIN_PATH=./bin
 set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
 set ENGINE_VERSION=0.1.39
@REM Download cortex.llamacpp binaries
-set VERSION=v0.1.35
+set VERSION=v0.1.39
-set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
+set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.39-windows-amd64
 set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
-set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
+set SUBFOLDERS=windows-amd64-noavx-cuda-12-0 windows-amd64-noavx-cuda-11-7 windows-amd64-avx2-cuda-12-0 windows-amd64-avx2-cuda-11-7 windows-amd64-noavx windows-amd64-avx windows-amd64-avx2 windows-amd64-avx512 windows-amd64-vulkan
-call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
+call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-12-0/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-11-7/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-12-0/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-11-7/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx512/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-vulkan/v%ENGINE_VERSION%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
@ -28,10 +29,10 @@ del %BIN_PATH%\cortex.exe
@REM Loop through each folder and move DLLs (excluding engine.dll)
 for %%F in (%SUBFOLDERS%) do (
-    echo Processing folder: %BIN_PATH%\%%F
+    echo Processing folder: %SHARED_PATH%\engines\cortex.llamacpp\%%F\v%ENGINE_VERSION%
    @REM Move all .dll files except engine.dll
-    for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
+    for %%D in (%SHARED_PATH%\engines\cortex.llamacpp\%%F\v%ENGINE_VERSION%\*.dll) do (
        if /I not "%%~nxD"=="engine.dll" (
            move "%%D" "%BIN_PATH%"
        )
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@ -2,9 +2,11 @@
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
-CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
+ENGINE_VERSION=0.1.39
-ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
+CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download"
-CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}"
 CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}"
 SHARED_PATH="../../electron/shared"
 # Detect platform
 OS_TYPE=$(uname)
@ -17,17 +19,19 @@ if [ "$OS_TYPE" == "Linux" ]; then
    chmod +x "./bin/cortex-server"
    # Download engines for Linux
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx512/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-12-0/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-11-7/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-12-0/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-11-7/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-vulkan/v${ENGINE_VERSION}" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
    mkdir -p "${SHARED_PATH}/engines/cortex.llamacpp/deps"
    touch "${SHARED_PATH}/engines/cortex.llamacpp/deps/keep"
 elif [ "$OS_TYPE" == "Darwin" ]; then
    # macOS downloads
@ -38,8 +42,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then
    chmod +x "./bin/cortex-server"
    # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-arm64/v0.1.39"
-    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-amd64/v0.1.39"
 else
    echo "Unsupported operating system: $OS_TYPE"
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@ -120,6 +120,7 @@ export default [
        DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
        CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
        CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
        CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.39'),
      }),
      // Allow json resolution
      json(),
--- a/extensions/inference-cortex-extension/src/@types/global.d.ts
+++ b/extensions/inference-cortex-extension/src/@types/global.d.ts
@ -1,6 +1,7 @@
 declare const NODE: string
 declare const CORTEX_API_URL: string
 declare const CORTEX_SOCKET_URL: string
 declare const CORTEX_ENGINE_VERSION: string
 declare const DEFAULT_SETTINGS: Array<any>
 declare const MODELS: Array<any>
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@ -18,6 +18,8 @@ import {
  fs,
  events,
  ModelEvent,
  SystemInformation,
  dirName,
 } from '@janhq/core'
 import PQueue from 'p-queue'
 import ky from 'ky'
@ -67,13 +69,13 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    super.onLoad()
    this.queue.add(() => this.clean())
    // Run the process watchdog
    const systemInfo = await systemInformation()
-    await this.clean()
+    this.queue.add(() => executeOnMain(NODE, 'run', systemInfo))
    await executeOnMain(NODE, 'run', systemInfo)
    this.queue.add(() => this.healthz())
-
+    this.queue.add(() => this.setDefaultEngine(systemInfo))
    this.subscribeToEvents()
    window.addEventListener('beforeunload', () => {
@ -93,7 +95,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    model: Model & { file_path?: string }
  ): Promise<void> {
    if (
-      model.engine === InferenceEngine.nitro &&
+      (model.engine === InferenceEngine.nitro || model.settings.vision_model) &&
      model.settings.llama_model_path
    ) {
      // Legacy chat model support
@ -109,7 +111,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
      model.settings = settings
    }
-    if (model.engine === InferenceEngine.nitro && model.settings.mmproj) {
+    if (
      (model.engine === InferenceEngine.nitro || model.settings.vision_model) &&
      model.settings.mmproj
    ) {
      // Legacy clip vision model support
      model.settings = {
        ...model.settings,
@ -131,6 +136,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
                ? InferenceEngine.cortex_llamacpp
                : model.engine,
          },
          timeout: false,
        })
        .json()
        .catch(async (e) => {
@ -153,25 +159,46 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
   * Do health check on cortex.cpp
   * @returns
   */
-  healthz(): Promise<void> {
+  private healthz(): Promise<void> {
    return ky
      .get(`${CORTEX_API_URL}/healthz`, {
        retry: {
-          limit: 10,
+          limit: 20,
          delay: () => 500,
          methods: ['get'],
        },
      })
      .then(() => {})
  }
  /**
   * Set default engine variant on launch
   */
  private async setDefaultEngine(systemInfo: SystemInformation) {
    const variant = await executeOnMain(
      NODE,
      'engineVariant',
      systemInfo.gpuSetting
    )
    return ky
      .post(
        `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`,
        { json: {} }
      )
      .then(() => {})
  }
  /**
   * Clean cortex processes
   * @returns
   */
-  clean(): Promise<any> {
+  private clean(): Promise<any> {
    return ky
      .delete(`${CORTEX_API_URL}/processmanager/destroy`, {
        timeout: 2000, // maximum 2 seconds
        retry: {
          limit: 0,
        },
      })
      .catch(() => {
        // Do nothing
@ -181,7 +208,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
  /**
   * Subscribe to cortex.cpp websocket events
   */
-  subscribeToEvents() {
+  private subscribeToEvents() {
    this.queue.add(
      () =>
        new Promise<void>((resolve) => {
@ -215,7 +242,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
              // Delay for the state update from cortex.cpp
              // Just to be sure
              setTimeout(() => {
-                events.emit(ModelEvent.OnModelsUpdate, {})
+                events.emit(ModelEvent.OnModelsUpdate, {
                  fetch: true,
                })
              }, 500)
            }
          })
@ -235,8 +264,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 }
 /// Legacy
-export const getModelFilePath = async (
+const getModelFilePath = async (
-  model: Model,
+  model: Model & { file_path?: string },
  file: string
 ): Promise<string> => {
  // Symlink to the model file
@ -246,6 +275,9 @@ export const getModelFilePath = async (
  ) {
    return model.sources[0]?.url
  }
  if (model.file_path) {
    await joinPath([await dirName(model.file_path), file])
  }
  return joinPath([await getJanDataFolderPath(), 'models', model.id, file])
 }
 ///
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@ -1,6 +1,6 @@
 import { describe, expect, it } from '@jest/globals'
-import { executableCortexFile } from './execute'
+import { engineVariant, executableCortexFile } from './execute'
-import { GpuSetting } from '@janhq/core'
+import { GpuSetting } from '@janhq/core/node'
 import { cpuInfo } from 'cpu-instructions'
 let testSettings: GpuSetting = {
@ -30,6 +30,11 @@ jest.mock('cpu-instructions', () => ({
 let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
 mockCpuInfo.mockReturnValue([])
 jest.mock('@janhq/core/node', () => ({
  appResourcePath: () => ".",
  log: jest.fn()
 }))
 describe('test executable cortex file', () => {
  afterAll(function () {
    Object.defineProperty(process, 'platform', {
@ -46,8 +51,7 @@ describe('test executable cortex file', () => {
    })
    expect(executableCortexFile(testSettings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`arm64`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath:
          originalPlatform === 'darwin'
            ? expect.stringContaining(`cortex-server`)
@ -56,13 +60,13 @@ describe('test executable cortex file', () => {
        vkVisibleDevices: '',
      })
    )
    expect(engineVariant(testSettings)).toEqual('mac-arm64')
    Object.defineProperty(process, 'arch', {
      value: 'x64',
    })
    expect(executableCortexFile(testSettings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`x64`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath:
          originalPlatform === 'darwin'
            ? expect.stringContaining(`cortex-server`)
@ -71,6 +75,7 @@ describe('test executable cortex file', () => {
        vkVisibleDevices: '',
      })
    )
    expect(engineVariant(testSettings)).toEqual('mac-amd64')
  })
  it('executes on Windows CPU', () => {
@ -84,13 +89,13 @@ describe('test executable cortex file', () => {
    mockCpuInfo.mockReturnValue(['avx'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`avx`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
    )
    expect(engineVariant()).toEqual('windows-amd64-avx')
  })
  it('executes on Windows Cuda 11', () => {
@ -120,13 +125,13 @@ describe('test executable cortex file', () => {
    mockCpuInfo.mockReturnValue(['avx2'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`avx2-cuda-11-7`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
    )
    expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-11-7')
  })
  it('executes on Windows Cuda 12', () => {
@ -156,13 +161,15 @@ describe('test executable cortex file', () => {
    mockCpuInfo.mockReturnValue(['noavx'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`noavx-cuda-12-0`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
    )
    expect(engineVariant(settings)).toEqual('windows-amd64-noavx-cuda-12-0')
    mockCpuInfo.mockReturnValue(['avx512'])
    expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-12-0')
  })
  it('executes on Linux CPU', () => {
@ -176,12 +183,13 @@ describe('test executable cortex file', () => {
    mockCpuInfo.mockReturnValue(['noavx'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`noavx`),
+        enginePath: expect.stringContaining("shared"),
        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
    )
    expect(engineVariant()).toEqual('linux-amd64-noavx')
  })
  it('executes on Linux Cuda 11', () => {
@ -208,15 +216,16 @@ describe('test executable cortex file', () => {
        },
      ],
    }
    mockCpuInfo.mockReturnValue(['avx512'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-11-7`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
    )
    expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-11-7')
  })
  it('executes on Linux Cuda 12', () => {
@ -245,13 +254,13 @@ describe('test executable cortex file', () => {
    }
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-12-0`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
    )
    expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-12-0')
  })
  // Generate test for different cpu instructions on Linux
@ -270,14 +279,14 @@ describe('test executable cortex file', () => {
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(instruction),
+          enginePath: expect.stringContaining('shared'),
          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server`),
          cudaVisibleDevices: '',
          vkVisibleDevices: '',
        })
      )
      expect(engineVariant(settings)).toEqual(`linux-amd64-${instruction}`)
    })
  })
  // Generate test for different cpu instructions on Windows
@ -294,13 +303,13 @@ describe('test executable cortex file', () => {
      mockCpuInfo.mockReturnValue([instruction])
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(instruction),
+          enginePath: expect.stringContaining('shared'),
          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server.exe`),
          cudaVisibleDevices: '',
          vkVisibleDevices: '',
        })
      )
      expect(engineVariant(settings)).toEqual(`windows-amd64-${instruction}`)
    })
  })
@ -334,13 +343,15 @@ describe('test executable cortex file', () => {
      mockCpuInfo.mockReturnValue([instruction])
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`cuda-12-0`),
+          enginePath: expect.stringContaining("shared"),
          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server.exe`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
      )
      expect(engineVariant(settings)).toEqual(
        `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
      )
    })
  })
@ -374,13 +385,15 @@ describe('test executable cortex file', () => {
      mockCpuInfo.mockReturnValue([instruction])
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`cuda-12-0`),
+          enginePath: expect.stringContaining("shared"),
          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
      )
      expect(engineVariant(settings)).toEqual(
        `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
      )
    })
  })
@ -415,13 +428,13 @@ describe('test executable cortex file', () => {
      mockCpuInfo.mockReturnValue([instruction])
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`vulkan`),
+          enginePath: expect.stringContaining("shared"),
          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
      )
      expect(engineVariant(settings)).toEqual(`linux-amd64-vulkan`)
    })
  })
@ -442,8 +455,7 @@ describe('test executable cortex file', () => {
      mockCpuInfo.mockReturnValue([])
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`x64`),
+          enginePath: expect.stringContaining("shared"),
          binPath: expect.stringContaining(`bin`),
          executablePath:
            originalPlatform === 'darwin'
              ? expect.stringContaining(`cortex-server`)
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@ -1,10 +1,9 @@
 import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
 import { cpuInfo } from 'cpu-instructions'
 import { GpuSetting, appResourcePath, log } from '@janhq/core/node'
 export interface CortexExecutableOptions {
  enginePath: string
  binPath: string
  executablePath: string
  cudaVisibleDevices: string
  vkVisibleDevices: string
@ -21,11 +20,7 @@ const gpuRunMode = (settings?: GpuSetting): string => {
  if (!settings) return ''
-  return settings.vulkan === true
+  return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
    ? 'vulkan'
    : settings.run_mode === 'cpu'
      ? ''
      : 'cuda'
 }
 /**
@ -34,12 +29,12 @@ const gpuRunMode = (settings?: GpuSetting): string => {
 */
 const os = (): string => {
  return process.platform === 'win32'
-    ? 'win'
+    ? 'windows-amd64'
    : process.platform === 'darwin'
      ? process.arch === 'arm64'
-        ? 'arm64'
+        ? 'mac-arm64'
-        : 'x64'
+        : 'mac-amd64'
-      : 'linux'
+      : 'linux-amd64'
 }
 /**
@ -57,7 +52,7 @@ const extension = (): '.exe' | '' => {
 */
 const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
  const isUsingCuda =
-    settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac'
+    settings?.vulkan !== true && settings?.run_mode === 'gpu' && !os().includes('mac')
  if (!isUsingCuda) return undefined
  return settings?.cuda?.version === '11' ? '11-7' : '12-0'
@ -79,36 +74,45 @@ const cpuInstructions = (): string => {
 }
 /**
- * Find which executable file to run based on the current platform.
+ * The executable options for the cortex.cpp extension.
 * @returns The name of the executable file to run.
 */
 export const executableCortexFile = (
  gpuSetting?: GpuSetting
 ): CortexExecutableOptions => {
  const cpuInstruction = cpuInstructions()
  let engineFolder = gpuSetting?.vulkan
    ? 'vulkan'
    : process.platform === 'darwin'
      ? os()
      : [
        gpuRunMode(gpuSetting) !== 'cuda' ||
          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
          ? cpuInstruction
          : 'noavx',
        gpuRunMode(gpuSetting),
        cudaVersion(gpuSetting),
      ]
        .filter((e) => !!e)
        .join('-')
  let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
  let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
  let binaryName = `cortex-server${extension()}`
  const binPath = path.join(__dirname, '..', 'bin')
  return {
-    enginePath: path.join(binPath, engineFolder),
+    enginePath: path.join(appResourcePath(), 'shared'),
    executablePath: path.join(binPath, binaryName),
    binPath: binPath,
    cudaVisibleDevices,
    vkVisibleDevices,
  }
 }
 /**
 * Find which variant to run based on the current platform.
 */
 export const engineVariant = (gpuSetting?: GpuSetting): string => {
  const cpuInstruction = cpuInstructions()
  let engineVariant = [
    os(),
    gpuSetting?.vulkan
      ? 'vulkan'
      : gpuRunMode(gpuSetting) !== 'cuda'
        ? // CPU mode - support all variants
          cpuInstruction
        : // GPU mode - packaged CUDA variants of avx2 and noavx
          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
          ? 'avx2'
          : 'noavx',
    gpuRunMode(gpuSetting),
    cudaVersion(gpuSetting),
  ]
    .filter((e) => !!e)
    .join('-')
  log(`[CORTEX]: Engine variant: ${engineVariant}`)
  return engineVariant
 }
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@ -1,8 +1,7 @@
 import path from 'path'
 import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
-import { executableCortexFile } from './execute'
+import { engineVariant, executableCortexFile } from './execute'
 import { ProcessWatchdog } from './watchdog'
 import { appResourcePath } from '@janhq/core/node'
 // The HOST address to use for the Nitro subprocess
 const LOCAL_PORT = '39291'
@ -20,9 +19,9 @@ function run(systemInfo?: SystemInformation): Promise<any> {
      // If ngl is not set or equal to 0, run on CPU with correct instructions
      systemInfo?.gpuSetting
        ? {
-          ...systemInfo.gpuSetting,
+            ...systemInfo.gpuSetting,
-          run_mode: systemInfo.gpuSetting.run_mode,
+            run_mode: systemInfo.gpuSetting.run_mode,
-        }
+          }
        : undefined
    )
@ -30,16 +29,13 @@ function run(systemInfo?: SystemInformation): Promise<any> {
    log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
    log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
    addEnvPaths(path.join(appResourcePath(), 'shared'))
    addEnvPaths(executableOptions.binPath)
    addEnvPaths(executableOptions.enginePath)
    // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH
    // This is required for the cortex engine to run for now since dlls are not moved to the root
    addEnvPaths(
      path.join(executableOptions.enginePath, 'engines', 'cortex.llamacpp')
    )
    const dataFolderPath = getJanDataFolderPath()
    if (watchdog) {
      watchdog.terminate()
    }
    watchdog = new ProcessWatchdog(
      executableOptions.executablePath,
      [
@ -81,17 +77,12 @@ function dispose() {
 function addEnvPaths(dest: string) {
  // Add engine path to the PATH and LD_LIBRARY_PATH
  if (process.platform === 'win32') {
-    process.env.PATH = (process.env.PATH || '').concat(
+    process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
      path.delimiter,
      dest,
    )
    log(`[CORTEX] PATH: ${process.env.PATH}`)
  } else {
    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
      path.delimiter,
-      dest,
+      dest
    )
    log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
  }
 }
@ -105,4 +96,5 @@ export interface CortexProcessInfo {
 export default {
  run,
  dispose,
  engineVariant,
 }
--- a/extensions/inference-openai-extension/package.json
+++ b/extensions/inference-openai-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/inference-openai-extension",
  "productName": "OpenAI Inference Engine",
-  "version": "1.0.3",
+  "version": "1.0.4",
  "description": "This extension enables OpenAI chat completion API calls",
  "main": "dist/index.js",
  "module": "dist/module.js",
--- a/extensions/inference-openai-extension/resources/models.json
+++ b/extensions/inference-openai-extension/resources/models.json
@ -97,11 +97,10 @@
    "format": "api",
    "settings": {},
    "parameters": {
-      "max_tokens": 4096,
+      "temperature": 1,
-      "temperature": 0.7,
+      "top_p": 1,
      "top_p": 0.95,
      "stream": true,
-      "stop": [],
+      "max_tokens": 32768,
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
@ -125,11 +124,10 @@
    "format": "api",
    "settings": {},
    "parameters": {
-      "max_tokens": 4096,
+      "temperature": 1,
-      "temperature": 0.7,
+      "top_p": 1,
-      "top_p": 0.95,
+      "max_tokens": 65536,
      "stream": true,
      "stop": [],
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@ -76,11 +76,10 @@ export default class JanInferenceOpenAIExtension extends RemoteOAIEngine {
  transformPayload = (payload: OpenAIPayloadType): OpenAIPayloadType => {
    // Transform the payload for preview models
    if (this.previewModels.includes(payload.model)) {
-      const { max_tokens, temperature, top_p, stop, ...params } = payload
+      const { max_tokens, stop, ...params } = payload
      return {
        ...params,
        max_completion_tokens: max_tokens,
        stream: false // o1 only support stream = false
      }
    }
    // Pass through for non-preview models
--- a/extensions/inference-openrouter-extension/resources/models.json
+++ b/extensions/inference-openrouter-extension/resources/models.json
@ -1,4 +1,4 @@
-  [
+[
  {
    "sources": [
      {
@ -13,7 +13,7 @@
    "format": "api",
    "settings": {},
    "parameters": {
-      "max_tokens": 1024,
+      "max_tokens": 128000,
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
--- a/extensions/inference-openrouter-extension/src/index.ts
+++ b/extensions/inference-openrouter-extension/src/index.ts
@ -83,6 +83,6 @@ export default class JanInferenceOpenRouterExtension extends RemoteOAIEngine {
  transformPayload = (payload: PayloadType) => ({
    ...payload,
-    model: this.model,
+    model: payload.model !== 'open-router-auto' ? payload.model : this.model,
  })
 }
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/model-extension",
  "productName": "Model Management",
-  "version": "1.0.34",
+  "version": "1.0.35",
  "description": "Model Management Extension provides model exploration and seamless downloads",
  "main": "dist/index.js",
  "author": "Jan <service@jan.ai>",
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@ -1,6 +1,6 @@
 import PQueue from 'p-queue'
 import ky from 'ky'
-import {  extractModelLoadParams, Model } from '@janhq/core'
+import { extractModelLoadParams, Model } from '@janhq/core'
 import { extractInferenceParams } from '@janhq/core'
 /**
 * cortex.cpp Model APIs interface
@ -18,6 +18,7 @@ interface ICortexAPI {
  deleteModel(model: string): Promise<void>
  updateModel(model: object): Promise<void>
  cancelModelPull(model: string): Promise<void>
  configs(body: { [key: string]: any }): Promise<void>
 }
 type ModelList = {
@ -52,7 +53,7 @@ export class CortexAPI implements ICortexAPI {
   */
  getModels(): Promise<Model[]> {
    return this.queue
-      .add(() => ky.get(`${API_URL}/models`).json<ModelList>())
+      .add(() => ky.get(`${API_URL}/v1/models`).json<ModelList>())
      .then((e) =>
        typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : []
      )
@ -104,7 +105,7 @@ export class CortexAPI implements ICortexAPI {
   */
  deleteModel(model: string): Promise<void> {
    return this.queue.add(() =>
-      ky.delete(`${API_URL}/models/${model}`).json().then()
+      ky.delete(`${API_URL}/v1/models/${model}`).json().then()
    )
  }
@ -130,7 +131,7 @@ export class CortexAPI implements ICortexAPI {
  cancelModelPull(model: string): Promise<void> {
    return this.queue.add(() =>
      ky
-        .delete(`${API_URL}/models/pull`, { json: { taskId: model } })
+        .delete(`${API_URL}/v1/models/pull`, { json: { taskId: model } })
        .json()
        .then()
    )
@ -142,7 +143,7 @@ export class CortexAPI implements ICortexAPI {
   */
  async getModelStatus(model: string): Promise<boolean> {
    return this.queue
-      .add(() => ky.get(`${API_URL}/models/status/${model}`))
+      .add(() => ky.get(`${API_URL}/v1/models/status/${model}`))
      .then((e) => true)
      .catch(() => false)
  }
@ -155,13 +156,24 @@ export class CortexAPI implements ICortexAPI {
    return ky
      .get(`${API_URL}/healthz`, {
        retry: {
-          limit: 10,
+          limit: 20,
          delay: () => 500,
          methods: ['get'],
        },
      })
      .then(() => {})
  }
  /**
   * Configure model pull options
   * @param body
   */
  configs(body: { [key: string]: any }): Promise<void> {
    return this.queue.add(() =>
      ky.patch(`${API_URL}/v1/configs`, { json: body }).then(() => {})
    )
  }
  /**
   * TRansform model to the expected format (e.g. parameters, settings, metadata)
   * @param model
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@ -20,11 +20,8 @@ import { deleteModelFiles } from './legacy/delete'
 declare const SETTINGS: Array<any>
-/**
+export enum Settings {
- * Extension enum
+  huggingfaceToken = 'hugging-face-access-token',
 */
 enum ExtensionEnum {
  downloadedModels = 'downloadedModels',
 }
 /**
@ -40,15 +37,29 @@ export default class JanModelExtension extends ModelExtension {
  async onLoad() {
    this.registerSettings(SETTINGS)
-    // Try get models from cortex.cpp
+    // Configure huggingface token if available
-    this.getModels().then((models) => {
+    const huggingfaceToken = await this.getSetting<string>(
-      this.registerModels(models)
+      Settings.huggingfaceToken,
-    })
+      undefined
    )
    if (huggingfaceToken)
      this.cortexAPI.configs({ huggingface_token: huggingfaceToken })
    // Listen to app download events
    this.handleDesktopEvents()
  }
  /**
   * Subscribe to settings update and make change accordingly
   * @param key
   * @param value
   */
  onSettingUpdate<T>(key: string, value: T): void {
    if (key === Settings.huggingfaceToken) {
      this.cortexAPI.configs({ huggingface_token: value })
    }
  }
  /**
   * Called when the extension is unloaded.
   * @override
@ -127,55 +138,43 @@ export default class JanModelExtension extends ModelExtension {
   * @returns A Promise that resolves with an array of all models.
   */
  async getModels(): Promise<Model[]> {
    /**
     * In this action, if return empty array right away
     * it would reset app cache and app will not function properly
     * should compare and try import
     */
    let currentModels: Model[] = []
    /**
     * Legacy models should be supported
     */
    let legacyModels = await scanModelsFolder()
    try {
      if (!localStorage.getItem(ExtensionEnum.downloadedModels)) {
        // Updated from an older version than 0.5.5
        // Scan through the models folder and import them (Legacy flow)
        // Return models immediately
        currentModels = legacyModels
      } else {
        currentModels = JSON.parse(
          localStorage.getItem(ExtensionEnum.downloadedModels)
        ) as Model[]
      }
    } catch (e) {
      currentModels = []
      console.error(e)
    }
    /**
     * Here we are filtering out the models that are not imported
     * and are not using llama.cpp engine
     */
-    var toImportModels = currentModels.filter(
+    var toImportModels = legacyModels.filter(
      (e) => e.engine === InferenceEngine.nitro
    )
-    await this.cortexAPI.getModels().then((models) => {
+    /**
-      const existingIds = models.map((e) => e.id)
+     * Fetch models from cortex.cpp
-      toImportModels = toImportModels.filter(
+     */
-        (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model
+    var fetchedModels = await this.cortexAPI.getModels().catch(() => [])
    // Checking if there are models to import
    const existingIds = fetchedModels.map((e) => e.id)
    toImportModels = toImportModels.filter(
      (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model
    )
    /**
     * There is no model to import
     * just return fetched models
     */
    if (!toImportModels.length)
      return fetchedModels.concat(
        legacyModels.filter((e) => !fetchedModels.some((x) => x.id === e.id))
      )
    })
    console.log('To import models:', toImportModels.length)
    /**
     * There are models to import
-     * do not return models from cortex.cpp yet
+     */
     * otherwise it will reset the app cache
     * */
    if (toImportModels.length > 0) {
      // Import models
      await Promise.all(
@ -193,17 +192,19 @@ export default class JanModelExtension extends ModelExtension {
                ]) // Copied models
              : model.sources[0].url, // Symlink models,
            model.name
-          ).then((e) => {
+          )
-            this.updateModel({
+            .then((e) => {
-              id: model.id,
+              this.updateModel({
-              ...model.settings,
+                id: model.id,
-              ...model.parameters,
+                ...model.settings,
-            } as Partial<Model>)
+                ...model.parameters,
-          })
+              } as Partial<Model>)
            })
            .catch((e) => {
              console.debug(e)
            })
        })
      )
      return currentModels
    }
    /**
@ -252,6 +253,13 @@ export default class JanModelExtension extends ModelExtension {
    return this.cortexAPI.getModelStatus(model)
  }
  /**
   * Configure pull options such as proxy, headers, etc.
   */
  async configurePullOptions(options: { [key: string]: any }): Promise<any> {
    return this.cortexAPI.configs(options).catch((e) => console.debug(e))
  }
  /**
   * Handle download state from main app
   */
--- a/extensions/model-extension/src/legacy/delete.ts
+++ b/extensions/model-extension/src/legacy/delete.ts
@ -1,10 +1,12 @@
-import { fs, joinPath } from '@janhq/core'
+import { dirName, fs } from '@janhq/core'
 import { scanModelsFolder } from './model-json'
 export const deleteModelFiles = async (id: string) => {
  try {
-    const dirPath = await joinPath(['file://models', id])
+    const models = await scanModelsFolder()
    const dirPath = models.find((e) => e.id === id)?.file_path
    // remove model folder directory
-    await fs.rm(dirPath)
+    if (dirPath) await fs.rm(await dirName(dirPath))
  } catch (err) {
    console.error(err)
  }
--- a/extensions/model-extension/src/legacy/model-json.ts
+++ b/extensions/model-extension/src/legacy/model-json.ts
@ -12,7 +12,9 @@ const LocalEngines = [
 * Scan through models folder and return downloaded models
 * @returns
 */
-export const scanModelsFolder = async (): Promise<Model[]> => {
+export const scanModelsFolder = async (): Promise<
  (Model & { file_path?: string })[]
 > => {
  const _homeDir = 'file://models'
  try {
    if (!(await fs.existsSync(_homeDir))) {
@ -37,7 +39,7 @@ export const scanModelsFolder = async (): Promise<Model[]> => {
      const jsonPath = await getModelJsonPath(folderFullPath)
-      if (await fs.existsSync(jsonPath)) {
+      if (jsonPath && (await fs.existsSync(jsonPath))) {
        // if we have the model.json file, read it
        let model = await fs.readFileSync(jsonPath, 'utf-8')
@ -83,7 +85,10 @@ export const scanModelsFolder = async (): Promise<Model[]> => {
                  file.toLowerCase().endsWith('.gguf') || // GGUF
                  file.toLowerCase().endsWith('.engine') // Tensort-LLM
                )
-              })?.length >= (model.engine === InferenceEngine.nitro_tensorrt_llm ? 1 : (model.sources?.length ?? 1))
+              })?.length >=
                (model.engine === InferenceEngine.nitro_tensorrt_llm
                  ? 1
                  : (model.sources?.length ?? 1))
            )
          })
--- a/extensions/monitoring-extension/src/node/index.ts
+++ b/extensions/monitoring-extension/src/node/index.ts
@ -259,15 +259,15 @@ const updateGpuInfo = async () =>
            data.gpu_highest_vram = highestVramId
          } else {
            data.gpus = []
-            data.gpu_highest_vram = ''
+            data.gpu_highest_vram = undefined
          }
          if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
-            data.gpus_in_use = [data.gpu_highest_vram]
+            data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : []
          }
          data = await updateCudaExistence(data)
-          console.log(data)
+          console.log('[MONITORING]::Cuda info: ', data)
          writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
          log(`[APP]::${JSON.stringify(data)}`)
          resolve({})
@ -344,7 +344,7 @@ const updateCudaExistence = async (
            data.cuda.version = match[1]
          }
        }
-        console.log(data)
+        console.log('[MONITORING]::Finalized cuda info update: ', data)
        resolve()
      })
    })
--- a/web/containers/ErrorMessage/index.test.tsx
+++ b/web/containers/ErrorMessage/index.test.tsx
@ -63,9 +63,6 @@ describe('ErrorMessage Component', () => {
    render(<ErrorMessage message={message} />)
    expect(
      screen.getByText('Apologies, something’s amiss!')
    ).toBeInTheDocument()
    expect(screen.getByText('troubleshooting assistance')).toBeInTheDocument()
  })
--- a/web/containers/ErrorMessage/index.tsx
+++ b/web/containers/ErrorMessage/index.tsx
@ -27,8 +27,6 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
  const getErrorTitle = () => {
    switch (message.error_code) {
      case ErrorCode.Unknown:
        return 'Apologies, something’s amiss!'
      case ErrorCode.InvalidApiKey:
      case ErrorCode.AuthenticationError:
      case ErrorCode.InvalidRequestError:
@ -55,17 +53,17 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
        )
      default:
        return (
-          <>
+          <p>
            {message.content[0]?.text?.value && (
              <AutoLink text={message.content[0].text.value} />
            )}
-          </>
+          </p>
        )
    }
  }
  return (
-    <div className="mt-10">
+    <div className="mx-auto mt-10 max-w-[700px]">
      {message.status === MessageStatus.Error && (
        <div
          key={message.id}
--- a/web/containers/ListContainer/index.tsx
+++ b/web/containers/ListContainer/index.tsx
@ -25,6 +25,11 @@ const ListContainer = ({ children }: Props) => {
        isUserManuallyScrollingUp.current = false
      }
    }
    if (isUserManuallyScrollingUp.current === true) {
      event.preventDefault()
      event.stopPropagation()
    }
    prevScrollTop.current = currentScrollTop
  }, [])
--- a/web/containers/Providers/DataLoader.tsx
+++ b/web/containers/Providers/DataLoader.tsx
@ -29,13 +29,21 @@ const DataLoader: React.FC<Props> = ({ children }) => {
  const setQuickAskEnabled = useSetAtom(quickAskEnabledAtom)
  const setJanDefaultDataFolder = useSetAtom(defaultJanDataFolderAtom)
  const setJanSettingScreen = useSetAtom(janSettingScreenAtom)
  const { loadDataModel, configurePullOptions } = useModels()
  useModels()
  useThreads()
  useAssistants()
  useGetSystemResources()
  useLoadTheme()
  useEffect(() => {
    // Load data once
    loadDataModel()
    // Configure pull options once
    configurePullOptions()
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [])
  useEffect(() => {
    window.core?.api
      ?.getAppConfigurations()
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/EventHandler.tsx
@ -180,7 +180,11 @@ export default function EventHandler({ children }: { children: ReactNode }) {
          setIsGeneratingResponse(false)
        }
        return
-      } else if (message.status === MessageStatus.Error) {
+      } else if (
        message.status === MessageStatus.Error &&
        activeModelRef.current?.engine &&
        isLocalEngine(activeModelRef.current.engine)
      ) {
        ;(async () => {
          if (
            !(await extensionManager
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@ -112,8 +112,8 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
        state.downloadState = 'end'
        setDownloadState(state)
        removeDownloadingModel(state.modelId)
        events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
      }
      events.emit(ModelEvent.OnModelsUpdate, {})
    },
    [removeDownloadingModel, setDownloadState]
  )
--- a/web/containers/Providers/ModelImportListener.tsx
+++ b/web/containers/Providers/ModelImportListener.tsx
@ -43,7 +43,7 @@ const ModelImportListener = ({ children }: PropsWithChildren) => {
  const onImportModelSuccess = useCallback(
    (state: ImportingModel) => {
      if (!state.modelId) return
-      events.emit(ModelEvent.OnModelsUpdate, {})
+      events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
      setImportingModelSuccess(state.importId, state.modelId)
    },
    [setImportingModelSuccess]
--- a/web/containers/ServerLogs/index.tsx
+++ b/web/containers/ServerLogs/index.tsx
@ -1,8 +1,8 @@
 /* eslint-disable @typescript-eslint/naming-convention */
-import { memo, useCallback, useEffect, useState } from 'react'
+import { memo, useCallback, useEffect, useRef, useState } from 'react'
-import { Button, useClipboard } from '@janhq/joi'
+import { Button, ScrollArea, useClipboard } from '@janhq/joi'
 import { useAtomValue } from 'jotai'
 import { FolderIcon, CheckIcon, CopyIcon } from 'lucide-react'
@ -22,6 +22,9 @@ const ServerLogs = (props: ServerLogsProps) => {
  const { getLogs } = useLogs()
  const serverEnabled = useAtomValue(serverEnabledAtom)
  const [logs, setLogs] = useState<string[]>([])
  const listRef = useRef<HTMLDivElement>(null)
  const prevScrollTop = useRef(0)
  const isUserManuallyScrollingUp = useRef(false)
  const updateLogs = useCallback(
    () =>
@ -58,13 +61,45 @@ const ServerLogs = (props: ServerLogsProps) => {
  const clipboard = useClipboard({ timeout: 1000 })
  const handleScroll = useCallback((event: React.UIEvent<HTMLElement>) => {
    const currentScrollTop = event.currentTarget.scrollTop
    if (prevScrollTop.current > currentScrollTop) {
      isUserManuallyScrollingUp.current = true
    } else {
      const currentScrollTop = event.currentTarget.scrollTop
      const scrollHeight = event.currentTarget.scrollHeight
      const clientHeight = event.currentTarget.clientHeight
      if (currentScrollTop + clientHeight >= scrollHeight) {
        isUserManuallyScrollingUp.current = false
      }
    }
    if (isUserManuallyScrollingUp.current === true) {
      event.preventDefault()
      event.stopPropagation()
    }
    prevScrollTop.current = currentScrollTop
  }, [])
  useEffect(() => {
    if (isUserManuallyScrollingUp.current === true || !listRef.current) return
    const scrollHeight = listRef.current?.scrollHeight ?? 0
    listRef.current?.scrollTo({
      top: scrollHeight,
      behavior: 'instant',
    })
  }, [listRef.current?.scrollHeight, isUserManuallyScrollingUp, logs])
  return (
-    <div
+    <ScrollArea
      ref={listRef}
      className={twMerge(
-        'p-4 pb-0',
+        'h-[calc(100%-49px)] w-full p-4 py-0',
        !withCopy && 'max-w-[38vw] lg:max-w-[40vw] xl:max-w-[50vw]',
        logs.length === 0 && 'mx-auto'
      )}
      onScroll={handleScroll}
    >
      {withCopy && (
        <div className="absolute right-2 top-7">
@ -107,7 +142,7 @@ const ServerLogs = (props: ServerLogsProps) => {
      )}
      <div className="flex h-full w-full flex-col">
        {logs.length > 0 ? (
-          <code className="inline-block whitespace-break-spaces text-[13px]">
+          <code className="inline-block max-w-[38vw] whitespace-break-spaces text-[13px] lg:max-w-[40vw] xl:max-w-[50vw]">
            {logs.slice(-limit).map((log, i) => {
              return (
                <p key={i} className="my-2 leading-relaxed">
@ -256,7 +291,7 @@ const ServerLogs = (props: ServerLogsProps) => {
          </div>
        )}
      </div>
-    </div>
+    </ScrollArea>
  )
 }
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@ -26,15 +26,13 @@ export const stateModelAtom = atom<ModelState>({
  model: undefined,
 })
 const pendingModelLoadAtom = atom<boolean>(false)
 export function useActiveModel() {
  const [activeModel, setActiveModel] = useAtom(activeModelAtom)
  const activeThread = useAtomValue(activeThreadAtom)
  const [stateModel, setStateModel] = useAtom(stateModelAtom)
  const downloadedModels = useAtomValue(downloadedModelsAtom)
  const setLoadModelError = useSetAtom(loadModelErrorAtom)
-  const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom)
+  const pendingModelLoad = useRef(false)
  const isVulkanEnabled = useAtomValue(vulkanEnabledAtom)
  const downloadedModelsRef = useRef<Model[]>([])
@ -55,7 +53,7 @@ export function useActiveModel() {
    if (activeModel) {
      await stopModel(activeModel)
    }
-    setPendingModelLoad(true)
+    pendingModelLoad.current = true
    let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
@ -120,16 +118,16 @@ export function useActiveModel() {
          undefined,
        }))
-        if (!pendingModelLoad && abortable) {
+        if (!pendingModelLoad.current && abortable) {
          return Promise.reject(new Error('aborted'))
        }
        toaster({
          title: 'Failed!',
-          description: `Model ${model.id} failed to start.`,
+          description: `Model ${model.id} failed to start. ${error.message ?? ''}`,
          type: 'error',
        })
-        setLoadModelError(error)
+        setLoadModelError(error.message ?? error)
        return Promise.reject(error)
      })
  }
@ -147,16 +145,10 @@ export function useActiveModel() {
        .then(() => {
          setActiveModel(undefined)
          setStateModel({ state: 'start', loading: false, model: undefined })
-          setPendingModelLoad(false)
+          pendingModelLoad.current = false
        })
    },
-    [
+    [activeModel, setStateModel, setActiveModel, stateModel]
      activeModel,
      setStateModel,
      setActiveModel,
      setPendingModelLoad,
      stateModel,
    ]
  )
  const stopInference = useCallback(async () => {
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@ -9,7 +9,6 @@ import {
  OptionType,
  events,
  fs,
  baseName,
 } from '@janhq/core'
 import { atom, useAtomValue, useSetAtom } from 'jotai'
--- a/web/hooks/useModels.test.ts
+++ b/web/hooks/useModels.test.ts
@ -1,5 +1,5 @@
 // useModels.test.ts
-import { renderHook, act } from '@testing-library/react'
+import { renderHook, act, waitFor } from '@testing-library/react'
 import { events, ModelEvent, ModelManager } from '@janhq/core'
 import { extensionManager } from '@/extension'
@ -36,19 +36,98 @@ describe('useModels', () => {
        }),
        get: () => undefined,
        has: () => true,
        // set: () => {}
      },
    })
    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
-    act(() => {
+    const { result } = renderHook(() => useModels())
-      renderHook(() => useModels())
+    await act(() => {
      result.current?.loadDataModel()
    })
    expect(mockModelExtension.getModels).toHaveBeenCalled()
  })
  it('should return empty on error', async () => {
    const mockModelExtension = {
      getModels: jest.fn().mockRejectedValue(new Error('Error')),
    } as any
    ;(ModelManager.instance as jest.Mock).mockReturnValue({
      models: {
        values: () => ({
          toArray: () => ({
            filter: () => models,
          }),
        }),
        get: () => undefined,
        has: () => true,
      },
    })
    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
    const { result } = renderHook(() => useModels())
    await act(() => {
      result.current?.loadDataModel()
    })
    expect(mockModelExtension.getModels()).rejects.toThrow()
  })
  it('should update states on models update', async () => {
    const mockModelExtension = {
      getModels: jest.fn().mockResolvedValue(models),
    } as any
    ;(ModelManager.instance as jest.Mock).mockReturnValue({
      models: {
        values: () => ({
          toArray: () => ({
            filter: () => models,
          }),
        }),
        get: () => undefined,
        has: () => true,
      },
    })
    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
    jest.spyOn(events, 'on').mockImplementationOnce((event, cb) => {
      cb({ fetch: false })
    })
    renderHook(() => useModels())
    expect(mockModelExtension.getModels).not.toHaveBeenCalled()
  })
  it('should update states on models update', async () => {
    const mockModelExtension = {
      getModels: jest.fn().mockResolvedValue(models),
    } as any
    ;(ModelManager.instance as jest.Mock).mockReturnValue({
      models: {
        values: () => ({
          toArray: () => ({
            filter: () => models,
          }),
        }),
        get: () => undefined,
        has: () => true,
      },
    })
    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
    jest.spyOn(events, 'on').mockImplementationOnce((event, cb) => {
      cb({ fetch: true })
    })
    renderHook(() => useModels())
    expect(mockModelExtension.getModels).toHaveBeenCalled()
  })
  it('should remove event listener on unmount', async () => {
    const removeListenerSpy = jest.spyOn(events, 'off')
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@ -9,13 +9,18 @@ import {
  ModelManager,
 } from '@janhq/core'
-import { useSetAtom } from 'jotai'
+import { useSetAtom, useAtom, useAtomValue } from 'jotai'
 import { useDebouncedCallback } from 'use-debounce'
 import { isLocalEngine } from '@/utils/modelEngine'
 import { extensionManager } from '@/extension'
 import {
  ignoreSslAtom,
  proxyAtom,
  proxyEnabledAtom,
 } from '@/helpers/atoms/AppConfig.atom'
 import {
  configuredModelsAtom,
  downloadedModelsAtom,
@ -27,14 +32,17 @@ import {
 * and updates the atoms accordingly.
 */
 const useModels = () => {
-  const setDownloadedModels = useSetAtom(downloadedModelsAtom)
+  const [downloadedModels, setDownloadedModels] = useAtom(downloadedModelsAtom)
  const setExtensionModels = useSetAtom(configuredModelsAtom)
  const proxyEnabled = useAtomValue(proxyEnabledAtom)
  const proxyUrl = useAtomValue(proxyAtom)
  const proxyIgnoreSSL = useAtomValue(ignoreSslAtom)
  const getData = useCallback(() => {
    const getDownloadedModels = async () => {
      const localModels = (await getModels()).map((e) => ({
        ...e,
-        name: ModelManager.instance().models.get(e.id)?.name ?? e.id,
+        name: ModelManager.instance().models.get(e.id)?.name ?? e.name ?? e.id,
        metadata:
          ModelManager.instance().models.get(e.id)?.metadata ?? e.metadata,
      }))
@ -53,9 +61,11 @@ const useModels = () => {
      setDownloadedModels(toUpdate)
      let isUpdated = false
      toUpdate.forEach((model) => {
        if (!ModelManager.instance().models.has(model.id)) {
          ModelManager.instance().models.set(model.id, model)
          // eslint-disable-next-line react-hooks/exhaustive-deps
          isUpdated = true
        }
      })
@ -75,21 +85,56 @@ const useModels = () => {
  const reloadData = useDebouncedCallback(() => getData(), 300)
-  useEffect(() => {
+  const updateStates = useCallback(() => {
-    // Try get data on mount
+    const cachedModels = ModelManager.instance().models.values().toArray()
-    reloadData()
+    const toUpdate = [
      ...downloadedModels,
      ...cachedModels.filter(
        (e: Model) => !downloadedModels.some((g: Model) => g.id === e.id)
      ),
    ]
    setDownloadedModels(toUpdate)
  }, [downloadedModels, setDownloadedModels])
  const getModels = async (): Promise<Model[]> =>
    extensionManager
      .get<ModelExtension>(ExtensionTypeEnum.Model)
      ?.getModels()
      .catch(() => []) ?? []
  useEffect(() => {
    // Listen for model updates
-    events.on(ModelEvent.OnModelsUpdate, async () => reloadData())
+    events.on(ModelEvent.OnModelsUpdate, async (data: { fetch?: boolean }) => {
      if (data.fetch) reloadData()
      else updateStates()
    })
    return () => {
      // Remove listener on unmount
      events.off(ModelEvent.OnModelsUpdate, async () => {})
    }
-  }, [getData, reloadData])
+  }, [reloadData, updateStates])
  const configurePullOptions = useCallback(() => {
    extensionManager
      .get<ModelExtension>(ExtensionTypeEnum.Model)
      ?.configurePullOptions(
        proxyEnabled
          ? {
              proxy_url: proxyUrl,
              verify_peer_ssl: !proxyIgnoreSSL,
            }
          : {
              proxy_url: '',
              verify_peer_ssl: false,
            }
      )
  }, [proxyEnabled, proxyUrl, proxyIgnoreSSL])
  return {
    loadDataModel: getData,
    configurePullOptions,
  }
 }
 const getModels = async (): Promise<Model[]> =>
  extensionManager.get<ModelExtension>(ExtensionTypeEnum.Model)?.getModels() ??
  []
 export default useModels
--- a/web/hooks/usePath.ts
+++ b/web/hooks/usePath.ts
@ -42,39 +42,6 @@ export const usePath = () => {
    openFileExplorer(fullPath)
  }
  const onViewJson = async (type: string) => {
    // TODO: this logic should be refactored.
    if (type !== 'Model' && !activeThread) return
    let filePath = undefined
    const assistantId = activeThread?.assistants[0]?.assistant_id
    switch (type) {
      case 'Engine':
      case 'Thread':
        filePath = await joinPath([
          'threads',
          activeThread?.id ?? '',
          'thread.json',
        ])
        break
      case 'Model':
        if (!selectedModel) return
        filePath = await joinPath(['models', selectedModel.id, 'model.json'])
        break
      case 'Assistant':
      case 'Tools':
        if (!assistantId) return
        filePath = await joinPath(['assistants', assistantId, 'assistant.json'])
        break
      default:
        break
    }
    if (!filePath) return
    const fullPath = await joinPath([janDataFolderPath, filePath])
    openFileExplorer(fullPath)
  }
  const onViewFile = async (id: string) => {
    if (!activeThread) return
@ -99,7 +66,6 @@ export const usePath = () => {
  return {
    onRevealInFinder,
    onViewJson,
    onViewFile,
    onViewFileContainer,
  }
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@ -27,7 +27,7 @@ import { MessageRequestBuilder } from '@/utils/messageRequestBuilder'
 import { ThreadMessageBuilder } from '@/utils/threadMessageBuilder'
-import { loadModelErrorAtom, useActiveModel } from './useActiveModel'
+import { useActiveModel } from './useActiveModel'
 import { extensionManager } from '@/extension/ExtensionManager'
 import {
@ -60,10 +60,8 @@ export default function useSendChatMessage() {
  const currentMessages = useAtomValue(getCurrentChatMessagesAtom)
  const selectedModel = useAtomValue(selectedModelAtom)
  const { activeModel, startModel } = useActiveModel()
  const loadModelFailed = useAtomValue(loadModelErrorAtom)
  const modelRef = useRef<Model | undefined>()
  const loadModelFailedRef = useRef<string | undefined>()
  const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
  const engineParamsUpdate = useAtomValue(engineParamsUpdateAtom)
@ -80,10 +78,6 @@ export default function useSendChatMessage() {
    modelRef.current = activeModel
  }, [activeModel])
  useEffect(() => {
    loadModelFailedRef.current = loadModelFailed
  }, [loadModelFailed])
  useEffect(() => {
    activeThreadRef.current = activeThread
  }, [activeThread])
--- a/web/hooks/useSettings.ts
+++ b/web/hooks/useSettings.ts
@ -53,7 +53,7 @@ export const useSettings = () => {
    const settings = await readSettings()
    if (runMode != null) settings.run_mode = runMode
    if (notify != null) settings.notify = notify
-    if (gpusInUse != null) settings.gpus_in_use = gpusInUse
+    if (gpusInUse != null) settings.gpus_in_use = gpusInUse.filter((e) => !!e)
    if (vulkan != null) {
      settings.vulkan = vulkan
      // GPU enabled, set run_mode to 'gpu'
--- a/web/jest.config.js
+++ b/web/jest.config.js
@ -13,6 +13,11 @@ const config = {
  moduleNameMapper: {
    // ...
    '^@/(.*)$': '<rootDir>/$1',
    'react-markdown': '<rootDir>/mock/empty-mock.tsx',
    'rehype-highlight': '<rootDir>/mock/empty-mock.tsx',
    'rehype-katex': '<rootDir>/mock/empty-mock.tsx',
    'rehype-raw': '<rootDir>/mock/empty-mock.tsx',
    'remark-math': '<rootDir>/mock/empty-mock.tsx',
  },
  // Add more setup options before each test is run
  // setupFilesAfterEnv: ['<rootDir>/jest.setup.ts'],
--- a/web/mock/empty-mock.tsx
+++ b/web/mock/empty-mock.tsx
@ -0,0 +1,2 @@
 const EmptyMock = {}
 export default EmptyMock
--- a/web/package.json
+++ b/web/package.json
@ -1,6 +1,6 @@
 {
  "name": "@janhq/web",
-  "version": "0.1.0",
+  "version": "0.5.9",
  "private": true,
  "homepage": "./",
  "scripts": {
@ -14,13 +14,10 @@
    "test": "jest"
  },
  "dependencies": {
    "@heroicons/react": "^2.0.18",
    "@hookform/resolvers": "^3.3.2",
    "@janhq/core": "link:./core",
    "@janhq/joi": "link:./joi",
    "autoprefixer": "10.4.16",
    "class-variance-authority": "^0.7.0",
    "csstype": "^3.0.10",
    "framer-motion": "^10.16.4",
    "highlight.js": "^11.9.0",
    "jotai": "^2.6.0",
@ -28,8 +25,6 @@
    "lodash": "^4.17.21",
    "lucide-react": "^0.291.0",
    "marked": "^9.1.2",
    "marked-highlight": "^2.0.6",
    "marked-katex-extension": "^5.0.2",
    "next": "14.2.3",
    "next-themes": "^0.2.1",
    "postcss": "8.4.31",
@ -39,22 +34,25 @@
    "react-circular-progressbar": "^2.1.0",
    "react-dom": "18.2.0",
    "react-dropzone": "14.2.3",
    "react-hook-form": "^7.47.0",
    "react-hot-toast": "^2.4.1",
    "react-icons": "^4.12.0",
-    "react-scroll-to-bottom": "^4.2.0",
+    "react-markdown": "^9.0.1",
    "react-toastify": "^9.1.3",
    "rehype-highlight": "^7.0.1",
    "rehype-highlight-code-lines": "^1.0.4",
    "rehype-katex": "^7.0.1",
    "rehype-raw": "^7.0.0",
    "remark-math": "^6.0.0",
    "sass": "^1.69.4",
    "slate": "latest",
    "slate-dom": "0.111.0",
    "slate-history": "0.110.3",
    "slate-react": "0.110.3",
    "tailwind-merge": "^2.0.0",
    "tailwindcss": "3.3.5",
    "ulidx": "^2.3.0",
    "use-debounce": "^10.0.0",
-    "uuid": "^9.0.1",
+    "uuid": "^9.0.1"
    "zod": "^3.22.4",
    "slate": "latest",
    "slate-dom": "0.111.0",
    "slate-react": "0.110.3",
    "slate-history": "0.110.3"
  },
  "devDependencies": {
    "@next/eslint-plugin-next": "^14.0.1",
@ -65,7 +63,7 @@
    "@types/react": "18.2.34",
    "@types/react-dom": "18.2.14",
    "@types/react-icons": "^3.0.0",
-    "@types/react-scroll-to-bottom": "^4.2.4",
+    "@types/react-syntax-highlighter": "^15.5.13",
    "@types/uuid": "^9.0.6",
    "@typescript-eslint/eslint-plugin": "^6.8.0",
    "@typescript-eslint/parser": "^6.8.0",
--- a/web/screens/LocalServer/LocalServerCenterPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerCenterPanel/index.tsx
@ -1,6 +1,6 @@
 import { useEffect, useState } from 'react'
-import { Button, ScrollArea } from '@janhq/joi'
+import { Button } from '@janhq/joi'
 import { CodeIcon, Paintbrush } from 'lucide-react'
 import { InfoIcon } from 'lucide-react'
@ -26,8 +26,8 @@ const LocalServerCenterPanel = () => {
  return (
    <CenterPanelContainer>
-      <div className="flex h-full w-full flex-col overflow-hidden">
+      <div className="flex h-full w-full flex-col">
-        <div className="sticky top-0 flex  items-center justify-between border-b border-[hsla(var(--app-border))] px-4 py-2">
+        <div className="sticky top-0 z-10  flex items-center justify-between border-b border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))] px-4 py-2">
          <h2 className="font-bold">Server Logs</h2>
          <div className="space-x-2">
            <Button
@ -72,9 +72,7 @@ const LocalServerCenterPanel = () => {
            </div>
          </div>
        ) : (
-          <ScrollArea className="h-full w-full">
+          <ServerLogs />
            <ServerLogs />
          </ScrollArea>
        )}
      </div>
    </CenterPanelContainer>
--- a/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
@ -29,6 +29,7 @@ const LocalServerLeftPanel = () => {
  const [errorRangePort, setErrorRangePort] = useState(false)
  const [errorPrefix, setErrorPrefix] = useState(false)
  const [serverEnabled, setServerEnabled] = useAtom(serverEnabledAtom)
  const [isLoading, setIsLoading] = useState(false)
  const { startModel, stateModel } = useActiveModel()
  const selectedModel = useAtomValue(selectedModelAtom)
@ -66,6 +67,7 @@ const LocalServerLeftPanel = () => {
  const onStartServerClick = async () => {
    if (selectedModel == null) return
    try {
      setIsLoading(true)
      const isStarted = await window.core?.api?.startServer({
        host,
        port,
@ -79,8 +81,10 @@ const LocalServerLeftPanel = () => {
        setFirstTimeVisitAPIServer(false)
      }
      startModel(selectedModel.id, false).catch((e) => console.error(e))
      setIsLoading(false)
    } catch (e) {
      console.error(e)
      setIsLoading(false)
      toaster({
        title: `Failed to start server!`,
        description: 'Please check Server Logs for more details.',
@ -93,6 +97,7 @@ const LocalServerLeftPanel = () => {
    window.core?.api?.stopServer()
    setServerEnabled(false)
    setLoadModelError(undefined)
    setIsLoading(false)
  }
  const onToggleServer = async () => {
@ -117,6 +122,7 @@ const LocalServerLeftPanel = () => {
              block
              theme={serverEnabled ? 'destructive' : 'primary'}
              disabled={
                isLoading ||
                stateModel.loading ||
                errorRangePort ||
                errorPrefix ||
@ -124,7 +130,11 @@ const LocalServerLeftPanel = () => {
              }
              onClick={onToggleServer}
            >
-              {serverEnabled ? 'Stop' : 'Start'} Server
+              {isLoading
                ? 'Starting...'
                : serverEnabled
                  ? 'Stop Server'
                  : 'Start Server'}
            </Button>
            {serverEnabled && (
              <Button variant="soft" asChild className="whitespace-nowrap">
--- a/web/screens/LocalServer/LocalServerRightPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
@ -19,8 +19,10 @@ import { getConfigurationsData } from '@/utils/componentSettings'
 import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 import { getActiveThreadModelParamsAtom } from '@/helpers/atoms/Thread.atom'
 const LocalServerRightPanel = () => {
  const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
  const loadModelError = useAtomValue(loadModelErrorAtom)
  const serverEnabled = useAtomValue(serverEnabledAtom)
  const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
@ -48,8 +50,17 @@ const LocalServerRightPanel = () => {
    selectedModel
  )
  const modelEngineParams = extractModelLoadParams(
    {
      ...selectedModel?.settings,
      ...activeModelParams,
    },
    selectedModel?.settings
  )
  const componentDataEngineSetting = getConfigurationsData(
-    currentModelSettingParams
+    modelEngineParams,
    selectedModel
  )
  const engineSettings = useMemo(
@ -57,6 +68,7 @@ const LocalServerRightPanel = () => {
      componentDataEngineSetting.filter(
        (x) => x.key !== 'prompt_template' && x.key !== 'embedding'
      ),
    [componentDataEngineSetting]
  )
--- a/web/screens/Settings/Advanced/index.tsx
+++ b/web/screens/Settings/Advanced/index.tsx
@ -20,9 +20,12 @@ import { AlertTriangleIcon, AlertCircleIcon } from 'lucide-react'
 import { twMerge } from 'tailwind-merge'
 import { useDebouncedCallback } from 'use-debounce'
 import { snackbar, toaster } from '@/containers/Toast'
 import { useActiveModel } from '@/hooks/useActiveModel'
 import useModels from '@/hooks/useModels'
 import { useSettings } from '@/hooks/useSettings'
 import DataFolder from './DataFolder'
@ -65,6 +68,7 @@ const Advanced = () => {
  const [dropdownOptions, setDropdownOptions] = useState<HTMLDivElement | null>(
    null
  )
  const { configurePullOptions } = useModels()
  const [toggle, setToggle] = useState<HTMLDivElement | null>(null)
@ -78,6 +82,15 @@ const Advanced = () => {
      return y['name']
    })
  /**
   * There could be a case where the state update is not synced
   * so that retrieving state value from other hooks would not be accurate
   * there is also a case where state update persist everytime user type in the input
   */
  const updatePullOptions = useDebouncedCallback(
    () => configurePullOptions(),
    300
  )
  /**
   * Handle proxy change
   */
@ -90,8 +103,9 @@ const Advanced = () => {
      } else {
        setProxy('')
      }
      updatePullOptions()
    },
-    [setPartialProxy, setProxy]
+    [setPartialProxy, setProxy, updatePullOptions]
  )
  /**
@ -193,7 +207,12 @@ const Advanced = () => {
    let updatedGpusInUse = [...gpusInUse]
    if (updatedGpusInUse.includes(gpuId)) {
      updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId)
-      if (gpuEnabled && updatedGpusInUse.length === 0) {
+      if (
        gpuEnabled &&
        updatedGpusInUse.length === 0 &&
        gpuId &&
        gpuId.trim()
      ) {
        // Vulkan support only allow 1 active device at a time
        if (vulkanEnabled) {
          updatedGpusInUse = []
@ -205,11 +224,13 @@ const Advanced = () => {
      if (vulkanEnabled) {
        updatedGpusInUse = []
      }
-      updatedGpusInUse.push(gpuId)
+      if (gpuId && gpuId.trim()) updatedGpusInUse.push(gpuId)
    }
    setGpusInUse(updatedGpusInUse)
-    await saveSettings({ gpusInUse: updatedGpusInUse })
+    await saveSettings({ gpusInUse: updatedGpusInUse.filter((e) => !!e) })
-    window.core?.api?.relaunch()
+    // Reload window to apply changes
    // This will trigger engine servers to restart
    window.location.reload()
  }
  const gpuSelectionPlaceHolder =
@ -452,7 +473,10 @@ const Advanced = () => {
            <Switch
              data-testid="proxy-switch"
              checked={proxyEnabled}
-              onChange={() => setProxyEnabled(!proxyEnabled)}
+              onChange={() => {
                setProxyEnabled(!proxyEnabled)
                updatePullOptions()
              }}
            />
            <div className="w-full">
              <Input
@ -481,7 +505,10 @@ const Advanced = () => {
          <Switch
            data-testid="ignore-ssl-switch"
            checked={ignoreSSL}
-            onChange={(e) => setIgnoreSSL(e.target.checked)}
+            onChange={(e) => {
              setIgnoreSSL(e.target.checked)
              updatePullOptions()
            }}
          />
        </div>
--- a/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
+++ b/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
@ -123,7 +123,6 @@ const InputExtraActions: React.FC<InputActionProps> = ({
  return (
    <div className="flex flex-row space-x-2">
      {actions.map((action) => {
        console.log(action)
        switch (action) {
          case 'copy':
            return copied ? (
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
@ -3,8 +3,8 @@ import { useCallback, useEffect, useRef, useState } from 'react'
 import { MessageStatus } from '@janhq/core'
 import hljs from 'highlight.js'
 import { useAtom, useAtomValue } from 'jotai'
 import { BaseEditor, createEditor, Editor, Transforms } from 'slate'
 import { withHistory } from 'slate-history' // Import withHistory
 import {
@ -270,7 +270,8 @@ const RichTextEditor = ({
      textareaRef.current.style.height = activeSettingInputBox
        ? '100px'
        : '40px'
-      textareaRef.current.style.height = textareaRef.current.scrollHeight + 'px'
+      textareaRef.current.style.height =
        textareaRef.current.scrollHeight + 2 + 'px'
      textareaRef.current?.scrollTo({
        top: textareaRef.current.scrollHeight,
        behavior: 'instant',
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
@ -249,19 +249,11 @@ const ChatInput = () => {
                  <li
                    className={twMerge(
                      'text-[hsla(var(--text-secondary)] hover:bg-secondary flex w-full cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                      activeThread?.assistants[0].model.settings?.text_model ===
+                      'cursor-pointer'
                        false
                        ? 'cursor-not-allowed opacity-50'
                        : 'cursor-pointer'
                    )}
                    onClick={() => {
-                      if (
+                      fileInputRef.current?.click()
-                        activeThread?.assistants[0].model.settings
+                      setShowAttacmentMenus(false)
                          ?.text_model !== false
                      ) {
                        fileInputRef.current?.click()
                        setShowAttacmentMenus(false)
                      }
                    }}
                  >
                    <FileTextIcon size={16} />
@ -270,22 +262,11 @@ const ChatInput = () => {
                }
                content={
                  (!activeThread?.assistants[0].tools ||
-                    !activeThread?.assistants[0].tools[0]?.enabled ||
+                    !activeThread?.assistants[0].tools[0]?.enabled) && (
-                    activeThread?.assistants[0].model.settings?.text_model ===
+                    <span>
-                      false) && (
+                      Turn on Retrieval in Assistant Settings to use this
-                    <>
+                      feature.
-                      {activeThread?.assistants[0].model.settings
+                    </span>
                        ?.text_model === false ? (
                        <span>
                          This model does not support text-based retrieval.
                        </span>
                      ) : (
                        <span>
                          Turn on Retrieval in Assistant Settings to use this
                          feature.
                        </span>
                      )}
                    </>
                  )
                }
              />
--- a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
@ -9,8 +9,6 @@ import { MainViewState } from '@/constants/screens'
 import { loadModelErrorAtom } from '@/hooks/useActiveModel'
 import { useSettings } from '@/hooks/useSettings'
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
@ -21,25 +19,9 @@ const LoadModelError = () => {
  const setMainState = useSetAtom(mainViewStateAtom)
  const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
  const activeThread = useAtomValue(activeThreadAtom)
  const { settings } = useSettings()
  const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE'
  const ErrorMessage = () => {
-    if (loadModelError === PORT_NOT_AVAILABLE) {
+    if (
      return (
        <p>
          Port 3928 is currently unavailable. Check for conflicting apps, or
          access&nbsp;
          <span
            className="cursor-pointer text-[hsla(var(--app-link))]"
            onClick={() => setModalTroubleShooting(true)}
          >
            troubleshooting assistance
          </span>
        </p>
      )
    } else if (
      typeof loadModelError?.includes === 'function' &&
      loadModelError.includes('EXTENSION_IS_NOT_INSTALLED')
    ) {
@ -66,10 +48,10 @@ const LoadModelError = () => {
      )
    } else {
      return (
-        <div>
+        <div className="mx-6 flex flex-col items-center space-y-2 text-center font-medium text-[hsla(var(--text-secondary))]">
-          Apologies, {`Something's wrong.`}.&nbsp;
+          {loadModelError && <p>{loadModelError}</p>}
          <p>
-            Access&nbsp;
+            {`Something's wrong.`}&nbsp;Access&nbsp;
            <span
              className="cursor-pointer text-[hsla(var(--app-link))]"
              onClick={() => setModalTroubleShooting(true)}
--- a/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
@ -1,4 +1,9 @@
-import React, { useEffect, useRef, useState } from 'react'
+/* eslint-disable @typescript-eslint/no-explicit-any */
 /* eslint-disable react-hooks/exhaustive-deps */
 /* eslint-disable @typescript-eslint/naming-convention */
 import React, { useEffect, useState } from 'react'
 import Markdown from 'react-markdown'
 import {
  ChatCompletionRole,
@ -8,14 +13,15 @@ import {
 } from '@janhq/core'
 import { Tooltip } from '@janhq/joi'
 import hljs from 'highlight.js'
 import { useAtomValue } from 'jotai'
 import { FolderOpenIcon } from 'lucide-react'
-import { Marked, Renderer } from 'marked'
+import rehypeHighlight from 'rehype-highlight'
-import { markedHighlight } from 'marked-highlight'
+import rehypeHighlightCodeLines from 'rehype-highlight-code-lines'
-import markedKatex from 'marked-katex-extension'
+import rehypeKatex from 'rehype-katex'
-
+import rehypeRaw from 'rehype-raw'
 import remarkMath from 'remark-math'
 import 'katex/dist/katex.min.css'
 import { twMerge } from 'tailwind-merge'
 import LogoMark from '@/containers/Brand/Logo/Mark'
@ -23,6 +29,7 @@ import LogoMark from '@/containers/Brand/Logo/Mark'
 import { useClipboard } from '@/hooks/useClipboard'
 import { usePath } from '@/hooks/usePath'
 import { getLanguageFromExtension } from '@/utils/codeLanguageExtension'
 import { toGibibytes } from '@/utils/converter'
 import { displayDate } from '@/utils/datetime'
@ -53,88 +60,183 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
  const clipboard = useClipboard({ timeout: 1000 })
-  function escapeHtml(html: string): string {
+  function extractCodeLines(node: { children: { children: any[] }[] }) {
-    return html
+    const codeLines: any[] = []
-      .replace(/&/g, '&amp;')
+
-      .replace(/</g, '&lt;')
+    // Helper function to extract text recursively from children
-      .replace(/>/g, '&gt;')
+    function getTextFromNode(node: {
-      .replace(/"/g, '&quot;')
+      type: string
-      .replace(/'/g, '&#039;')
+      value: any
      children: any[]
    }): string {
      if (node.type === 'text') {
        return node.value
      } else if (node.children) {
        return node.children.map(getTextFromNode).join('')
      }
      return ''
    }
    // Traverse each line in the <code> block
    node.children[0].children.forEach(
      (lineNode: {
        type: string
        tagName: string
        value: any
        children: any[]
      }) => {
        if (lineNode.type === 'element' && lineNode.tagName === 'span') {
          const lineContent = getTextFromNode(lineNode)
          codeLines.push(lineContent)
        }
      }
    )
    // Join the lines with newline characters for proper formatting
    return codeLines.join('\n')
  }
  function wrapCodeBlocksWithoutVisit() {
    return (tree: { children: any[] }) => {
      tree.children = tree.children.map((node) => {
        if (node.tagName === 'pre' && node.children[0]?.tagName === 'code') {
          const language = node.children[0].properties.className?.[1]?.replace(
            'language-',
            ''
          )
          if (extractCodeLines(node) === '') {
            return node
          }
          return {
            type: 'element',
            tagName: 'div',
            properties: {
              className: ['code-block-wrapper'],
            },
            children: [
              {
                type: 'element',
                tagName: 'div',
                properties: {
                  className: [
                    'code-block',
                    'group/item',
                    'relative',
                    'my-4',
                    'overflow-auto',
                  ],
                },
                children: [
                  {
                    type: 'element',
                    tagName: 'div',
                    properties: {
                      className:
                        'code-header bg-[hsla(var(--app-code-block))] flex justify-between items-center py-2 px-3 border-b border-[hsla(var(--app-border))] rounded-t-lg',
                    },
                    children: [
                      {
                        type: 'element',
                        tagName: 'span',
                        properties: {
                          className: 'text-xs font-medium text-gray-300',
                        },
                        children: [
                          {
                            type: 'text',
                            value: language
                              ? `${getLanguageFromExtension(language)}`
                              : '',
                          },
                        ],
                      },
                      {
                        type: 'element',
                        tagName: 'button',
                        properties: {
                          className:
                            'copy-button ml-auto flex items-center gap-1 text-xs font-medium text-gray-400 hover:text-gray-600 focus:outline-none',
                          onClick: (event: Event) => {
                            clipboard.copy(extractCodeLines(node))
                            const button = event.currentTarget as HTMLElement
                            button.innerHTML = `
                              <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-check pointer-events-none text-green-600"><path d="M20 6 9 17l-5-5"/></svg>
                              <span>Copied</span>
                            `
                            setTimeout(() => {
                              button.innerHTML = `
                                <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-copy pointer-events-none text-gray-400"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>
                                <span>Copy</span>
                              `
                            }, 2000)
                          },
                        },
                        children: [
                          {
                            type: 'element',
                            tagName: 'svg',
                            properties: {
                              xmlns: 'http://www.w3.org/2000/svg',
                              width: '16',
                              height: '16',
                              viewBox: '0 0 24 24',
                              fill: 'none',
                              stroke: 'currentColor',
                              strokeWidth: '2',
                              strokeLinecap: 'round',
                              strokeLinejoin: 'round',
                              className:
                                'lucide lucide-copy pointer-events-none text-gray-400',
                            },
                            children: [
                              {
                                type: 'element',
                                tagName: 'rect',
                                properties: {
                                  width: '14',
                                  height: '14',
                                  x: '8',
                                  y: '8',
                                  rx: '2',
                                  ry: '2',
                                },
                                children: [],
                              },
                              {
                                type: 'element',
                                tagName: 'path',
                                properties: {
                                  d: 'M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2',
                                },
                                children: [],
                              },
                            ],
                          },
                          { type: 'text', value: 'Copy' },
                        ],
                      },
                    ],
                  },
                  node,
                ],
              },
            ],
          }
        }
        return node
      })
    }
  }
  const marked: Marked = new Marked(
    markedHighlight({
      langPrefix: 'hljs',
      highlight(code, lang) {
        if (lang === undefined || lang === '') {
          return hljs.highlight(code, { language: 'plaintext' }).value
        }
        try {
          return hljs.highlight(code, { language: lang }).value
        } catch (err) {
          return hljs.highlight(code, { language: 'javascript' }).value
        }
      },
    }),
    {
      renderer: {
        html: (html: string) => {
          return escapeHtml(html) // Escape any HTML
        },
        link: (href, title, text) => {
          return Renderer.prototype.link
            ?.apply(this, [href, title, text])
            .replace('<a', "<a target='_blank'")
        },
        code(code, lang) {
          return `
          <div class="relative code-block group/item overflow-auto">
            <button class='text-xs copy-action hidden group-hover/item:block p-2 rounded-lg absolute top-6 right-2'>
              ${
                clipboard.copied
                  ? `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-check pointer-events-none text-green-600"><path d="M20 6 9 17l-5-5"/></svg>`
                  : `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-copy pointer-events-none text-gray-400"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>`
              }
            </button>
            <pre class="hljs">
              <code class="language-${lang ?? ''}">${code}</code>
            </pre>
          </div>
          `
        },
      },
    }
  )
  marked.use(markedKatex({ throwOnError: false }))
  const { onViewFile, onViewFileContainer } = usePath()
  const parsedText = marked.parse(text)
  const [tokenCount, setTokenCount] = useState(0)
  const [lastTimestamp, setLastTimestamp] = useState<number | undefined>()
  const [tokenSpeed, setTokenSpeed] = useState(0)
  const messages = useAtomValue(getCurrentChatMessagesAtom)
  const codeBlockCopyEvent = useRef((e: Event) => {
    const target: HTMLElement = e.target as HTMLElement
    if (typeof target.className !== 'string') return null
    const isCopyActionClassName = target?.className.includes('copy-action')
    if (isCopyActionClassName) {
      const content = target?.parentNode?.querySelector('code')?.innerText ?? ''
      clipboard.copy(content)
    }
  })
  useEffect(() => {
    document.addEventListener('click', codeBlockCopyEvent.current)
    return () => {
      // eslint-disable-next-line react-hooks/exhaustive-deps
      document.removeEventListener('click', codeBlockCopyEvent.current)
    }
  }, [])
  useEffect(() => {
    if (props.status !== MessageStatus.Pending) {
      return
@ -283,10 +385,24 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
          ) : (
            <div
              className={twMerge(
-                'message max-width-[100%] flex flex-col gap-y-2 overflow-auto break-all leading-relaxed	'
+                'message max-width-[100%] flex flex-col gap-y-2 overflow-auto leading-relaxed'
              )}
-              dangerouslySetInnerHTML={{ __html: parsedText }}
+              dir="ltr"
-            />
+            >
              <Markdown
                remarkPlugins={[remarkMath]}
                rehypePlugins={[
                  [rehypeKatex, { throwOnError: false }],
                  rehypeRaw,
                  rehypeHighlight,
                  [rehypeHighlightCodeLines, { showLineNumbers: true }],
                  wrapCodeBlocksWithoutVisit,
                ]}
                skipHtml={true}
              >
                {text}
              </Markdown>
            </div>
          )}
        </>
      </div>
--- a/web/styles/components/code-block.scss
+++ b/web/styles/components/code-block.scss
@ -55,22 +55,25 @@
 .hljs {
  overflow: auto;
  display: block;
  width: auto;
  background: hsla(var(--app-code-block));
  color: #f8f8f2;
  padding: 16px;
  font-size: 14px;
-  word-wrap: normal;
+  border-bottom-left-radius: 0.4rem;
  border-bottom-right-radius: 0.4rem;
  color: #f8f8f2;
 }
 pre {
  background: hsla(var(--app-code-block));
  overflow: auto;
  border-radius: 0.4rem;
  margin-top: 1rem;
  margin-bottom: 1rem;
  white-space: normal;
 }
 pre > code {
  display: block;
  text-indent: 0;
  white-space: pre;
-  max-width: 10vw;
+  font-size: 14px;
  overflow: auto;
  color: #f8f8f2;
  display: block;
  padding: 16px;
 }
 .hljs-emphasis {
@ -81,6 +84,14 @@ pre > code {
  font-weight: bold;
 }
 .code-block {
  pre {
    padding: 0;
    border-top-left-radius: 0;
    border-top-right-radius: 0;
  }
 }
@media screen and (-ms-high-contrast: active) {
  .hljs-addition,
  .hljs-attribute,
@ -105,3 +116,51 @@ pre > code {
    font-weight: bold;
  }
 }
 .code-block-wrapper {
  white-space: nowrap;
 }
 .code-line {
  // padding-left: 12px;
  padding-right: 12px;
  margin-left: -12px;
  margin-right: -12px;
  border-left: 4px solid transparent;
 }
 div.code-line:empty {
  height: 21.5938px;
 }
 span.code-line {
  // min-width: 100%;
  white-space: pre;
  display: inline-block;
  max-width: 10vw;
 }
 .code-line.inserted {
  background-color: var(--color-inserted-line);
 }
 .code-line.deleted {
  background-color: var(--color-deleted-line);
 }
 .highlighted-code-line {
  background-color: var(--color-highlighted-line);
  border-left: 4px solid var(--color-highlighted-line-indicator);
 }
 .numbered-code-line::before {
  content: attr(data-line-number);
  margin-left: -4px;
  margin-right: 16px;
  width: 1.2rem;
  font-size: 12px;
  color: hsla(var(--text-tertiary));
  text-align: right;
  display: inline-block;
 }
--- a/web/styles/components/message.scss
+++ b/web/styles/components/message.scss
@ -27,11 +27,3 @@
    @apply inline-flex flex-col border-s-4 border-[hsla(var(--primary-bg))] bg-[hsla(var(--primary-bg-soft))] px-4 py-2;
  }
 }
 .code-block {
  white-space: normal;
 }
 pre {
  max-width: 95vw;
 }
--- a/web/utils/codeLanguageExtension.ts
+++ b/web/utils/codeLanguageExtension.ts
@ -0,0 +1,34 @@
 // Utility function using switch-case for extension to language mapping
 export function getLanguageFromExtension(extension: string): string {
  switch (extension.toLowerCase()) {
    case 'ts':
    case 'tsx':
      return 'typescript'
    case 'js':
    case 'jsx':
      return 'javascript'
    case 'py':
      return 'python'
    case 'java':
      return 'java'
    case 'rb':
      return 'ruby'
    case 'cs':
      return 'csharp'
    case 'md':
      return 'markdown'
    case 'yaml':
    case 'yml':
      return 'yaml'
    case 'sh':
      return 'bash'
    case 'rs':
      return 'rust'
    case 'kt':
      return 'kotlin'
    case 'swift':
      return 'swift'
    default:
      return extension
  }
 }
		`@ -0,0 +1,2 @@`
							`const EmptyMock = {}`
							`export default EmptyMock`