Merge pull request #4022 from janhq/feat/cortex-cpp-engine-variants

feat: support cortex.cpp engine variants
2024-11-18 16:35:13 +07:00 · 2024-11-18 16:35:13 +07:00 · 1eb600f881
commit 1eb600f881
parent 888da28e91 f75dc662ee
12 changed files with 145 additions and 122 deletions
--- a/.gitignore
+++ b/.gitignore
@ -47,3 +47,4 @@ coverage
 .yarnrc
 test_results.html
 *.tsbuildinfo
 electron/shared/**
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@ -1 +1 @@
-1.0.2
+1.0.3-rc1
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@ -2,23 +2,24 @@
 set BIN_PATH=./bin
 set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
 set ENGINE_VERSION=0.1.39
@REM Download cortex.llamacpp binaries
-set VERSION=v0.1.35
+set VERSION=v0.1.39
-set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
+set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.39-windows-amd64
 set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
 set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
-call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
+call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-12-0/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-11-7/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-12-0/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-11-7/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx512/v%ENGINE_VERSION%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-vulkan/v%ENGINE_VERSION%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
@ -28,12 +29,12 @@ del %BIN_PATH%\cortex.exe
@REM Loop through each folder and move DLLs (excluding engine.dll)
 for %%F in (%SUBFOLDERS%) do (
-    echo Processing folder: %BIN_PATH%\%%F
+    echo Processing folder: %SHARED_PATH%\engines\cortex.llamacpp\%%F
    @REM Move all .dll files except engine.dll
-    for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
+    for %%D in (%SHARED_PATH%\engines\cortex.llamacpp\%%F\*.dll) do (
        if /I not "%%~nxD"=="engine.dll" (
-            move "%%D" "%BIN_PATH%"
+            move "%%D" "%SHARED_PATH%"
        )
    )
 )
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@ -2,9 +2,11 @@
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
-CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
+ENGINE_VERSION=0.1.39
-ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
+CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download"
-CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}"
 CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}"
 SHARED_PATH="../../electron/shared"
 # Detect platform
 OS_TYPE=$(uname)
@ -17,17 +19,17 @@ if [ "$OS_TYPE" == "Linux" ]; then
    chmod +x "./bin/cortex-server"
    # Download engines for Linux
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx512/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-12-0/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-11-7/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-12-0/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-11-7/v${ENGINE_VERSION}" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-vulkan/v${ENGINE_VERSION}" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
 elif [ "$OS_TYPE" == "Darwin" ]; then
    # macOS downloads
@ -38,8 +40,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then
    chmod +x "./bin/cortex-server"
    # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-arm64/v0.1.39"
-    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-amd64/v0.1.39"
 else
    echo "Unsupported operating system: $OS_TYPE"
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@ -120,6 +120,7 @@ export default [
        DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
        CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
        CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
        CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.39'),
      }),
      // Allow json resolution
      json(),
--- a/extensions/inference-cortex-extension/src/@types/global.d.ts
+++ b/extensions/inference-cortex-extension/src/@types/global.d.ts
@ -1,6 +1,7 @@
 declare const NODE: string
 declare const CORTEX_API_URL: string
 declare const CORTEX_SOCKET_URL: string
 declare const CORTEX_ENGINE_VERSION: string
 declare const DEFAULT_SETTINGS: Array<any>
 declare const MODELS: Array<any>
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@ -18,6 +18,7 @@ import {
  fs,
  events,
  ModelEvent,
  SystemInformation,
 } from '@janhq/core'
 import PQueue from 'p-queue'
 import ky from 'ky'
@ -67,13 +68,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    super.onLoad()
    this.queue.add(() => this.healthz())
    this.queue.add(() => this.setDefaultEngine(systemInfo))
    // Run the process watchdog
    const systemInfo = await systemInformation()
    await this.clean()
    await executeOnMain(NODE, 'run', systemInfo)
    this.queue.add(() => this.healthz())
    this.subscribeToEvents()
    window.addEventListener('beforeunload', () => {
@ -153,7 +153,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
   * Do health check on cortex.cpp
   * @returns
   */
-  healthz(): Promise<void> {
+  private healthz(): Promise<void> {
    return ky
      .get(`${CORTEX_API_URL}/healthz`, {
        retry: {
@ -164,11 +164,24 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
      .then(() => {})
  }
  /**
   * Set default engine variant on launch
   */
  private async setDefaultEngine(systemInfo: SystemInformation) {
    const variant = await executeOnMain(NODE, 'engineVariant', systemInfo.gpuSetting)
    return ky
      .post(
        `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`,
        { json: {} }
      )
      .then(() => {})
  }
  /**
   * Clean cortex processes
   * @returns
   */
-  clean(): Promise<any> {
+  private clean(): Promise<any> {
    return ky
      .delete(`${CORTEX_API_URL}/processmanager/destroy`, {
        timeout: 2000, // maximum 2 seconds
@ -181,7 +194,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
  /**
   * Subscribe to cortex.cpp websocket events
   */
-  subscribeToEvents() {
+  private subscribeToEvents() {
    this.queue.add(
      () =>
        new Promise<void>((resolve) => {
@ -235,7 +248,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 }
 /// Legacy
-export const getModelFilePath = async (
+const getModelFilePath = async (
  model: Model,
  file: string
 ): Promise<string> => {
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@ -1,6 +1,6 @@
 import { describe, expect, it } from '@jest/globals'
-import { executableCortexFile } from './execute'
+import { engineVariant, executableCortexFile } from './execute'
-import { GpuSetting } from '@janhq/core'
+import { GpuSetting } from '@janhq/core/node'
 import { cpuInfo } from 'cpu-instructions'
 let testSettings: GpuSetting = {
@ -30,6 +30,11 @@ jest.mock('cpu-instructions', () => ({
 let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
 mockCpuInfo.mockReturnValue([])
 jest.mock('@janhq/core/node', () => ({
  appResourcePath: () => ".",
  log: jest.fn()
 }))
 describe('test executable cortex file', () => {
  afterAll(function () {
    Object.defineProperty(process, 'platform', {
@ -46,8 +51,7 @@ describe('test executable cortex file', () => {
    })
    expect(executableCortexFile(testSettings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`arm64`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath:
          originalPlatform === 'darwin'
            ? expect.stringContaining(`cortex-server`)
@ -56,13 +60,13 @@ describe('test executable cortex file', () => {
        vkVisibleDevices: '',
      })
    )
    expect(engineVariant(testSettings)).toEqual('mac-arm64')
    Object.defineProperty(process, 'arch', {
      value: 'x64',
    })
    expect(executableCortexFile(testSettings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`x64`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath:
          originalPlatform === 'darwin'
            ? expect.stringContaining(`cortex-server`)
@ -71,6 +75,7 @@ describe('test executable cortex file', () => {
        vkVisibleDevices: '',
      })
    )
    expect(engineVariant(testSettings)).toEqual('mac-amd64')
  })
  it('executes on Windows CPU', () => {
@ -84,13 +89,13 @@ describe('test executable cortex file', () => {
    mockCpuInfo.mockReturnValue(['avx'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`avx`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
    )
    expect(engineVariant()).toEqual('windows-amd64-avx')
  })
  it('executes on Windows Cuda 11', () => {
@ -120,13 +125,13 @@ describe('test executable cortex file', () => {
    mockCpuInfo.mockReturnValue(['avx2'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`avx2-cuda-11-7`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
    )
    expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-11-7')
  })
  it('executes on Windows Cuda 12', () => {
@ -156,13 +161,15 @@ describe('test executable cortex file', () => {
    mockCpuInfo.mockReturnValue(['noavx'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`noavx-cuda-12-0`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
    )
    expect(engineVariant(settings)).toEqual('windows-amd64-noavx-cuda-12-0')
    mockCpuInfo.mockReturnValue(['avx512'])
    expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-12-0')
  })
  it('executes on Linux CPU', () => {
@ -176,12 +183,13 @@ describe('test executable cortex file', () => {
    mockCpuInfo.mockReturnValue(['noavx'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`noavx`),
+        enginePath: expect.stringContaining("shared"),
        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
    )
    expect(engineVariant()).toEqual('linux-amd64-noavx')
  })
  it('executes on Linux Cuda 11', () => {
@ -208,15 +216,16 @@ describe('test executable cortex file', () => {
        },
      ],
    }
    mockCpuInfo.mockReturnValue(['avx512'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-11-7`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
    )
    expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-11-7')
  })
  it('executes on Linux Cuda 12', () => {
@ -245,13 +254,13 @@ describe('test executable cortex file', () => {
    }
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-12-0`),
+        enginePath: expect.stringContaining("shared"),
        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
    )
    expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-12-0')
  })
  // Generate test for different cpu instructions on Linux
@ -270,14 +279,14 @@ describe('test executable cortex file', () => {
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(instruction),
+          enginePath: expect.stringContaining('shared'),
          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server`),
          cudaVisibleDevices: '',
          vkVisibleDevices: '',
        })
      )
      expect(engineVariant(settings)).toEqual(`linux-amd64-${instruction}`)
    })
  })
  // Generate test for different cpu instructions on Windows
@ -294,13 +303,13 @@ describe('test executable cortex file', () => {
      mockCpuInfo.mockReturnValue([instruction])
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(instruction),
+          enginePath: expect.stringContaining('shared'),
          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server.exe`),
          cudaVisibleDevices: '',
          vkVisibleDevices: '',
        })
      )
      expect(engineVariant(settings)).toEqual(`windows-amd64-${instruction}`)
    })
  })
@ -334,13 +343,15 @@ describe('test executable cortex file', () => {
      mockCpuInfo.mockReturnValue([instruction])
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`cuda-12-0`),
+          enginePath: expect.stringContaining("shared"),
          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server.exe`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
      )
      expect(engineVariant(settings)).toEqual(
        `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
      )
    })
  })
@ -374,13 +385,15 @@ describe('test executable cortex file', () => {
      mockCpuInfo.mockReturnValue([instruction])
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`cuda-12-0`),
+          enginePath: expect.stringContaining("shared"),
          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
      )
      expect(engineVariant(settings)).toEqual(
        `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
      )
    })
  })
@ -415,13 +428,13 @@ describe('test executable cortex file', () => {
      mockCpuInfo.mockReturnValue([instruction])
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`vulkan`),
+          enginePath: expect.stringContaining("shared"),
          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
      )
      expect(engineVariant(settings)).toEqual(`linux-amd64-vulkan`)
    })
  })
@ -442,8 +455,7 @@ describe('test executable cortex file', () => {
      mockCpuInfo.mockReturnValue([])
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`x64`),
+          enginePath: expect.stringContaining("shared"),
          binPath: expect.stringContaining(`bin`),
          executablePath:
            originalPlatform === 'darwin'
              ? expect.stringContaining(`cortex-server`)
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@ -1,10 +1,9 @@
 import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
 import { cpuInfo } from 'cpu-instructions'
 import { GpuSetting, appResourcePath, log } from '@janhq/core/node'
 export interface CortexExecutableOptions {
  enginePath: string
  binPath: string
  executablePath: string
  cudaVisibleDevices: string
  vkVisibleDevices: string
@ -21,11 +20,7 @@ const gpuRunMode = (settings?: GpuSetting): string => {
  if (!settings) return ''
-  return settings.vulkan === true
+  return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
    ? 'vulkan'
    : settings.run_mode === 'cpu'
      ? ''
      : 'cuda'
 }
 /**
@ -34,12 +29,12 @@ const gpuRunMode = (settings?: GpuSetting): string => {
 */
 const os = (): string => {
  return process.platform === 'win32'
-    ? 'win'
+    ? 'windows-amd64'
    : process.platform === 'darwin'
      ? process.arch === 'arm64'
-        ? 'arm64'
+        ? 'mac-arm64'
-        : 'x64'
+        : 'mac-amd64'
-      : 'linux'
+      : 'linux-amd64'
 }
 /**
@ -57,7 +52,7 @@ const extension = (): '.exe' | '' => {
 */
 const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
  const isUsingCuda =
-    settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac'
+    settings?.vulkan !== true && settings?.run_mode === 'gpu' && !os().includes('mac')
  if (!isUsingCuda) return undefined
  return settings?.cuda?.version === '11' ? '11-7' : '12-0'
@ -79,36 +74,45 @@ const cpuInstructions = (): string => {
 }
 /**
- * Find which executable file to run based on the current platform.
+ * The executable options for the cortex.cpp extension.
 * @returns The name of the executable file to run.
 */
 export const executableCortexFile = (
  gpuSetting?: GpuSetting
 ): CortexExecutableOptions => {
  let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
  let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
  let binaryName = `cortex-server${extension()}`
  const binPath = path.join(__dirname, '..', 'bin')
  return {
    enginePath: path.join(appResourcePath(), 'shared'),
    executablePath: path.join(binPath, binaryName),
    cudaVisibleDevices,
    vkVisibleDevices,
  }
 }
 /**
 * Find which variant to run based on the current platform.
 */
 export const engineVariant = (gpuSetting?: GpuSetting): string => {
  const cpuInstruction = cpuInstructions()
-  let engineFolder = gpuSetting?.vulkan
+  let engineVariant = [
    os(),
    gpuSetting?.vulkan
      ? 'vulkan'
-    : process.platform === 'darwin'
+      : gpuRunMode(gpuSetting) !== 'cuda'
-      ? os()
+        ? // CPU mode - support all variants
-      : [
+          cpuInstruction
-        gpuRunMode(gpuSetting) !== 'cuda' ||
+        : // GPU mode - packaged CUDA variants of avx2 and noavx
          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
-          ? cpuInstruction
+          ? 'avx2'
          : 'noavx',
    gpuRunMode(gpuSetting),
    cudaVersion(gpuSetting),
  ]
    .filter((e) => !!e)
    .join('-')
-  let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
+
-  let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
+  log(`[CORTEX]: Engine variant: ${engineVariant}`)
-  let binaryName = `cortex-server${extension()}`
+  return engineVariant
  const binPath = path.join(__dirname, '..', 'bin')
  return {
    enginePath: path.join(binPath, engineFolder),
    executablePath: path.join(binPath, binaryName),
    binPath: binPath,
    cudaVisibleDevices,
    vkVisibleDevices,
  }
 }
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@ -1,8 +1,7 @@
 import path from 'path'
 import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
-import { executableCortexFile } from './execute'
+import { engineVariant, executableCortexFile } from './execute'
 import { ProcessWatchdog } from './watchdog'
 import { appResourcePath } from '@janhq/core/node'
 // The HOST address to use for the Nitro subprocess
 const LOCAL_PORT = '39291'
@ -30,16 +29,13 @@ function run(systemInfo?: SystemInformation): Promise<any> {
    log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
    log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
    addEnvPaths(path.join(appResourcePath(), 'shared'))
    addEnvPaths(executableOptions.binPath)
    addEnvPaths(executableOptions.enginePath)
    // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH
    // This is required for the cortex engine to run for now since dlls are not moved to the root
    addEnvPaths(
      path.join(executableOptions.enginePath, 'engines', 'cortex.llamacpp')
    )
    const dataFolderPath = getJanDataFolderPath()
    if (watchdog) {
      watchdog.terminate()
    }
    watchdog = new ProcessWatchdog(
      executableOptions.executablePath,
      [
@ -81,17 +77,12 @@ function dispose() {
 function addEnvPaths(dest: string) {
  // Add engine path to the PATH and LD_LIBRARY_PATH
  if (process.platform === 'win32') {
-    process.env.PATH = (process.env.PATH || '').concat(
+    process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
      path.delimiter,
      dest,
    )
    log(`[CORTEX] PATH: ${process.env.PATH}`)
  } else {
    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
      path.delimiter,
-      dest,
+      dest
    )
    log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
  }
 }
@ -105,4 +96,5 @@ export interface CortexProcessInfo {
 export default {
  run,
  dispose,
  engineVariant,
 }
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@ -9,7 +9,6 @@ import {
  OptionType,
  events,
  fs,
  baseName,
 } from '@janhq/core'
 import { atom, useAtomValue, useSetAtom } from 'jotai'
--- a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
@ -9,8 +9,6 @@ import { MainViewState } from '@/constants/screens'
 import { loadModelErrorAtom } from '@/hooks/useActiveModel'
 import { useSettings } from '@/hooks/useSettings'
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
@ -21,7 +19,6 @@ const LoadModelError = () => {
  const setMainState = useSetAtom(mainViewStateAtom)
  const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
  const activeThread = useAtomValue(activeThreadAtom)
  const { settings } = useSettings()
  const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE'