From a15d92dbdc0ab5748de8130060b6c156d2579554 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 14 Nov 2024 21:09:44 +0700
Subject: [PATCH 1/6] feat: integrates cortex.cpp engine variants

---
 .../inference-cortex-extension/download.sh    |  4 +-
 .../rollup.config.ts                          |  1 +
 .../src/@types/global.d.ts                    |  1 +
 .../inference-cortex-extension/src/index.ts   | 23 +++++--
 .../src/node/execute.test.ts                  | 63 ++++++++++---------
 .../src/node/execute.ts                       | 60 +++++++++---------
 .../src/node/index.ts                         | 17 +++--
 7 files changed, 96 insertions(+), 73 deletions(-)
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index 902a31e51..8c13a13ef 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -38,8 +38,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then
     chmod +x "./bin/cortex-server"
 
     # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
-    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.35"
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.35"
 
 else
     echo "Unsupported operating system: $OS_TYPE"
diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts
index 34ad9295d..882ed1921 100644
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@@ -120,6 +120,7 @@ export default [
         DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
         CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
         CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
+        CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.35'),
       }),
       // Allow json resolution
       json(),
diff --git a/extensions/inference-cortex-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts
index 48dbcd780..381a80f5e 100644
--- a/extensions/inference-cortex-extension/src/@types/global.d.ts
+++ b/extensions/inference-cortex-extension/src/@types/global.d.ts
@@ -1,6 +1,7 @@
 declare const NODE: string
 declare const CORTEX_API_URL: string
 declare const CORTEX_SOCKET_URL: string
+declare const CORTEX_ENGINE_VERSION: string
 declare const DEFAULT_SETTINGS: Array<any>
 declare const MODELS: Array<any>
 
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index e83a17561..0331a4d17 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -18,6 +18,7 @@ import {
   fs,
   events,
   ModelEvent,
+  SystemInformation,
 } from '@janhq/core'
 import PQueue from 'p-queue'
 import ky from 'ky'
@@ -74,6 +75,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
     this.queue.add(() => this.healthz())
 
+    this.queue.add(() => this.setDefaultEngine(systemInfo))
     this.subscribeToEvents()
 
     window.addEventListener('beforeunload', () => {
@@ -153,7 +155,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    * Do health check on cortex.cpp
    * @returns
    */
-  healthz(): Promise<void> {
+  private healthz(): Promise<void> {
     return ky
       .get(`${CORTEX_API_URL}/healthz`, {
         retry: {
@@ -164,11 +166,24 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       .then(() => {})
   }
 
+  /**
+   * Set default engine variant on launch
+   */
+  private async setDefaultEngine(systemInfo: SystemInformation) {
+    const variant = await executeOnMain(NODE, 'engineVariant', systemInfo)
+    return ky
+      .post(
+        `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`,
+        { json: {} }
+      )
+      .then(() => {})
+  }
+
   /**
    * Clean cortex processes
    * @returns
    */
-  clean(): Promise<any> {
+  private clean(): Promise<any> {
     return ky
       .delete(`${CORTEX_API_URL}/processmanager/destroy`, {
         timeout: 2000, // maximum 2 seconds
@@ -181,7 +196,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
   /**
    * Subscribe to cortex.cpp websocket events
    */
-  subscribeToEvents() {
+  private subscribeToEvents() {
     this.queue.add(
       () =>
         new Promise<void>((resolve) => {
@@ -235,7 +250,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 }
 
 /// Legacy
-export const getModelFilePath = async (
+const getModelFilePath = async (
   model: Model,
   file: string
 ): Promise<string> => {
diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts
index 622eb38af..b0a7ece9e 100644
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, it } from '@jest/globals'
-import { executableCortexFile } from './execute'
+import { engineVariant, executableCortexFile } from './execute'
 import { GpuSetting } from '@janhq/core'
 import { cpuInfo } from 'cpu-instructions'
 
@@ -46,8 +46,7 @@ describe('test executable cortex file', () => {
     })
     expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`arm64`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath:
           originalPlatform === 'darwin'
             ? expect.stringContaining(`cortex-server`)
@@ -56,13 +55,13 @@ describe('test executable cortex file', () => {
         vkVisibleDevices: '',
       })
     )
+    expect(engineVariant(testSettings)).toEqual('mac-arm64')
     Object.defineProperty(process, 'arch', {
       value: 'x64',
     })
     expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`x64`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath:
           originalPlatform === 'darwin'
             ? expect.stringContaining(`cortex-server`)
@@ -71,6 +70,7 @@ describe('test executable cortex file', () => {
         vkVisibleDevices: '',
       })
     )
+    expect(engineVariant(testSettings)).toEqual('mac-amd64')
   })
 
   it('executes on Windows CPU', () => {
@@ -84,13 +84,13 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['avx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`avx`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
     )
+    expect(engineVariant()).toEqual('windows-amd64-avx')
   })
 
   it('executes on Windows Cuda 11', () => {
@@ -120,13 +120,13 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['avx2'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`avx2-cuda-11-7`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
     )
+    expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-11-7')
   })
 
   it('executes on Windows Cuda 12', () => {
@@ -156,13 +156,15 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['noavx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`noavx-cuda-12-0`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
     )
+    expect(engineVariant(settings)).toEqual('windows-amd64-noavx-cuda-12-0')
+    mockCpuInfo.mockReturnValue(['avx512'])
+    expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-12-0')
   })
 
   it('executes on Linux CPU', () => {
@@ -176,12 +178,13 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['noavx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`noavx`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
     )
+    expect(engineVariant()).toEqual('linux-amd64-noavx')
   })
 
   it('executes on Linux Cuda 11', () => {
@@ -208,15 +211,16 @@ describe('test executable cortex file', () => {
         },
       ],
     }
+    mockCpuInfo.mockReturnValue(['avx512'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-11-7`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
     )
+    expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-11-7')
   })
 
   it('executes on Linux Cuda 12', () => {
@@ -245,13 +249,13 @@ describe('test executable cortex file', () => {
     }
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-12-0`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
     )
+    expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-12-0')
   })
 
   // Generate test for different cpu instructions on Linux
@@ -270,14 +274,14 @@ describe('test executable cortex file', () => {
 
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(instruction),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('bin'),
           executablePath: expect.stringContaining(`cortex-server`),
 
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
       )
+      expect(engineVariant(settings)).toEqual(`linux-amd64-${instruction}`)
     })
   })
   // Generate test for different cpu instructions on Windows
@@ -294,13 +298,13 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(instruction),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('bin'),
           executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
       )
+      expect(engineVariant(settings)).toEqual(`windows-amd64-${instruction}`)
     })
   })
 
@@ -334,13 +338,15 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`cuda-12-0`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining(`bin`),
           executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
       )
+      expect(engineVariant(settings)).toEqual(
+        `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
+      )
     })
   })
 
@@ -374,13 +380,15 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`cuda-12-0`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining(`bin`),
           executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
       )
+      expect(engineVariant(settings)).toEqual(
+        `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
+      )
     })
   })
 
@@ -415,13 +423,13 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`vulkan`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining(`bin`),
           executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
       )
+      expect(engineVariant(settings)).toEqual(`linux-amd64-vulkan`)
     })
   })
 
@@ -442,8 +450,7 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`x64`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining(`bin`),
           executablePath:
             originalPlatform === 'darwin'
               ? expect.stringContaining(`cortex-server`)
diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts
index 74ffb48c6..48a407e31 100644
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@@ -4,7 +4,6 @@ import { cpuInfo } from 'cpu-instructions'
 
 export interface CortexExecutableOptions {
   enginePath: string
-  binPath: string
   executablePath: string
   cudaVisibleDevices: string
   vkVisibleDevices: string
@@ -21,11 +20,7 @@ const gpuRunMode = (settings?: GpuSetting): string => {
 
   if (!settings) return ''
 
-  return settings.vulkan === true
-    ? 'vulkan'
-    : settings.run_mode === 'cpu'
-      ? ''
-      : 'cuda'
+  return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
 }
 
 /**
@@ -34,12 +29,12 @@ const gpuRunMode = (settings?: GpuSetting): string => {
  */
 const os = (): string => {
   return process.platform === 'win32'
-    ? 'win'
+    ? 'windows-amd64'
     : process.platform === 'darwin'
       ? process.arch === 'arm64'
-        ? 'arm64'
-        : 'x64'
-      : 'linux'
+        ? 'mac-arm64'
+        : 'mac-amd64'
+      : 'linux-amd64'
 }
 
 /**
@@ -79,36 +74,43 @@ const cpuInstructions = (): string => {
 }
 
 /**
- * Find which executable file to run based on the current platform.
- * @returns The name of the executable file to run.
+ * The executable options for the cortex.cpp extension.
  */
 export const executableCortexFile = (
   gpuSetting?: GpuSetting
 ): CortexExecutableOptions => {
-  const cpuInstruction = cpuInstructions()
-  let engineFolder = gpuSetting?.vulkan
-    ? 'vulkan'
-    : process.platform === 'darwin'
-      ? os()
-      : [
-        gpuRunMode(gpuSetting) !== 'cuda' ||
-          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
-          ? cpuInstruction
-          : 'noavx',
-        gpuRunMode(gpuSetting),
-        cudaVersion(gpuSetting),
-      ]
-        .filter((e) => !!e)
-        .join('-')
   let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let binaryName = `cortex-server${extension()}`
   const binPath = path.join(__dirname, '..', 'bin')
   return {
-    enginePath: path.join(binPath, engineFolder),
+    enginePath: binPath,
     executablePath: path.join(binPath, binaryName),
-    binPath: binPath,
     cudaVisibleDevices,
     vkVisibleDevices,
   }
 }
+
+/**
+ * Find which variant to run based on the current platform.
+ */
+export const engineVariant = (gpuSetting?: GpuSetting): string => {
+  const cpuInstruction = cpuInstructions()
+  let engineVariant = [
+    os(),
+    gpuSetting?.vulkan
+      ? 'vulkan'
+      : gpuRunMode(gpuSetting) !== 'cuda'
+        ? // CPU mode - support all variants
+          cpuInstruction
+        : // GPU mode - packaged CUDA variants of avx2 and noavx
+          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
+          ? 'avx2'
+          : 'noavx',
+    gpuRunMode(gpuSetting),
+    cudaVersion(gpuSetting),
+  ]
+    .filter((e) => !!e)
+    .join('-')
+  return engineVariant
+}
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
index 3816605d2..cf2af045b 100644
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -1,6 +1,6 @@
 import path from 'path'
 import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
-import { executableCortexFile } from './execute'
+import { engineVariant, executableCortexFile } from './execute'
 import { ProcessWatchdog } from './watchdog'
 import { appResourcePath } from '@janhq/core/node'
 
@@ -20,9 +20,9 @@ function run(systemInfo?: SystemInformation): Promise<any> {
       // If ngl is not set or equal to 0, run on CPU with correct instructions
       systemInfo?.gpuSetting
         ? {
-          ...systemInfo.gpuSetting,
-          run_mode: systemInfo.gpuSetting.run_mode,
-        }
+            ...systemInfo.gpuSetting,
+            run_mode: systemInfo.gpuSetting.run_mode,
+          }
         : undefined
     )
 
@@ -31,7 +31,6 @@ function run(systemInfo?: SystemInformation): Promise<any> {
     log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
 
     addEnvPaths(path.join(appResourcePath(), 'shared'))
-    addEnvPaths(executableOptions.binPath)
     addEnvPaths(executableOptions.enginePath)
     // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH
     // This is required for the cortex engine to run for now since dlls are not moved to the root
@@ -81,15 +80,12 @@ function dispose() {
 function addEnvPaths(dest: string) {
   // Add engine path to the PATH and LD_LIBRARY_PATH
   if (process.platform === 'win32') {
-    process.env.PATH = (process.env.PATH || '').concat(
-      path.delimiter,
-      dest,
-    )
+    process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
     log(`[CORTEX] PATH: ${process.env.PATH}`)
   } else {
     process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
       path.delimiter,
-      dest,
+      dest
     )
     log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
   }
@@ -105,4 +101,5 @@ export interface CortexProcessInfo {
 export default {
   run,
   dispose,
+  engineVariant,
 }

From a38715f18abe788d862e144f04ca6027cb1b0c2a Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 14 Nov 2024 23:51:51 +0700
Subject: [PATCH 2/6] fix: should queue health check and default engine set
 before starting model

---
 extensions/inference-cortex-extension/src/index.ts | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 0331a4d17..e88608d57 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -68,14 +68,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
     super.onLoad()
 
+    this.queue.add(() => this.healthz())
+    this.queue.add(() => this.setDefaultEngine(systemInfo))
     // Run the process watchdog
     const systemInfo = await systemInformation()
     await this.clean()
     await executeOnMain(NODE, 'run', systemInfo)
-
-    this.queue.add(() => this.healthz())
-
-    this.queue.add(() => this.setDefaultEngine(systemInfo))
     this.subscribeToEvents()
 
     window.addEventListener('beforeunload', () => {

From 6f066357ed2fb137c8ff7d577c8e166ab1baf74a Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Sat, 16 Nov 2024 16:05:49 +0700
Subject: [PATCH 3/6] chore: bump new engine version 0.1.39 and get rid of
 dangling process

---
 extensions/inference-cortex-extension/download.bat        | 4 ++--
 extensions/inference-cortex-extension/download.sh         | 8 ++++----
 extensions/inference-cortex-extension/rollup.config.ts    | 2 +-
 extensions/inference-cortex-extension/src/node/index.ts   | 4 ++++
 web/hooks/useImportModel.ts                               | 1 -
 .../Thread/ThreadCenterPanel/LoadModelError/index.tsx     | 3 ---
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index ecff683c3..e89d42f23 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -4,8 +4,8 @@ set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
 
 @REM Download cortex.llamacpp binaries
-set VERSION=v0.1.35
-set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
+set VERSION=v0.1.39
+set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.39-windows-amd64
 set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
 set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
 
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index 8c13a13ef..b6b181987 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -3,8 +3,8 @@
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
 CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
-ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
-CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39/cortex.llamacpp-0.1.39"
+CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39"
 # Detect platform
 OS_TYPE=$(uname)
 
@@ -38,8 +38,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then
     chmod +x "./bin/cortex-server"
 
     # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.35"
-    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.35"
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.39"
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.39"
 
 else
     echo "Unsupported operating system: $OS_TYPE"
diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts
index 882ed1921..00fae78ba 100644
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@@ -120,7 +120,7 @@ export default [
         DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
         CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
         CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
-        CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.35'),
+        CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.39'),
       }),
       // Allow json resolution
       json(),
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
index cf2af045b..4c6d96292 100644
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -39,6 +39,10 @@ function run(systemInfo?: SystemInformation): Promise<any> {
     )
 
     const dataFolderPath = getJanDataFolderPath()
+    if (watchdog) {
+      watchdog.terminate()
+    }
+
     watchdog = new ProcessWatchdog(
       executableOptions.executablePath,
       [
diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts
index c49ddb964..84c6a5126 100644
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@@ -9,7 +9,6 @@ import {
   OptionType,
   events,
   fs,
-  baseName,
 } from '@janhq/core'
 
 import { atom, useAtomValue, useSetAtom } from 'jotai'
diff --git a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
index 0420b7d51..16a0024e8 100644
--- a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
@@ -9,8 +9,6 @@ import { MainViewState } from '@/constants/screens'
 
 import { loadModelErrorAtom } from '@/hooks/useActiveModel'
 
-import { useSettings } from '@/hooks/useSettings'
-
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
@@ -21,7 +19,6 @@ const LoadModelError = () => {
   const setMainState = useSetAtom(mainViewStateAtom)
   const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
   const activeThread = useAtomValue(activeThreadAtom)
-  const { settings } = useSettings()
 
   const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE'
 

From 6e9c34baf7b7352f5052c969975a69dfe8f0a43d Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Sat, 16 Nov 2024 16:23:06 +0700
Subject: [PATCH 4/6] chore: cortex.cpp version bump

---
 extensions/inference-cortex-extension/bin/version.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
index e6d5cb833..89f843d1d 100644
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -1 +1 @@
-1.0.2
\ No newline at end of file
+1.0.3-rc1
\ No newline at end of file

From 5243e4a095d6151de0404633c5a50d7bc137367f Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 18 Nov 2024 14:22:10 +0700
Subject: [PATCH 5/6] fix: correct cortex repo url

---
 extensions/inference-cortex-extension/download.bat | 2 +-
 extensions/inference-cortex-extension/download.sh  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index e89d42f23..1f4102b97 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -9,7 +9,7 @@ set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VER
 set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
 set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
 
-call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
+call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index b6b181987..6a2809f0c 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -2,7 +2,7 @@
 
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
-CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
+CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download"
 ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39/cortex.llamacpp-0.1.39"
 CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39"
 # Detect platform

From f75dc662ee74542d53b6e42405f4881325c17764 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 18 Nov 2024 15:54:26 +0700
Subject: [PATCH 6/6] chore: reduce app launch time

---
 .gitignore                                    |  1 +
 .../inference-cortex-extension/download.bat   | 25 ++++++-------
 .../inference-cortex-extension/download.sh    | 32 +++++++++--------
 .../inference-cortex-extension/src/index.ts   |  2 +-
 .../src/node/execute.test.ts                  | 35 +++++++++++--------
 .../src/node/execute.ts                       |  8 +++--
 .../src/node/index.ts                         |  9 -----
 7 files changed, 57 insertions(+), 55 deletions(-)

diff --git a/.gitignore b/.gitignore
index f28d152d9..ab815678a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,3 +47,4 @@ coverage
 .yarnrc
 test_results.html
 *.tsbuildinfo
+electron/shared/**
diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index 1f4102b97..25527eb36 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -2,6 +2,7 @@
 set BIN_PATH=./bin
 set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
+set ENGINE_VERSION=0.1.39
 
 @REM Download cortex.llamacpp binaries
 set VERSION=v0.1.39
@@ -10,15 +11,15 @@ set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download
 set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
 
 call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-12-0/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-11-7/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-12-0/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-11-7/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx512/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-vulkan/v%ENGINE_VERSION%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 
@@ -28,12 +29,12 @@ del %BIN_PATH%\cortex.exe
 
 @REM Loop through each folder and move DLLs (excluding engine.dll)
 for %%F in (%SUBFOLDERS%) do (
-    echo Processing folder: %BIN_PATH%\%%F
+    echo Processing folder: %SHARED_PATH%\engines\cortex.llamacpp\%%F
 
     @REM Move all .dll files except engine.dll
-    for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
+    for %%D in (%SHARED_PATH%\engines\cortex.llamacpp\%%F\*.dll) do (
         if /I not "%%~nxD"=="engine.dll" (
-            move "%%D" "%BIN_PATH%"
+            move "%%D" "%SHARED_PATH%"
         )
     )
 )
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index 6a2809f0c..9c0ebbe64 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -2,9 +2,11 @@
 
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
+ENGINE_VERSION=0.1.39
 CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download"
-ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39/cortex.llamacpp-0.1.39"
-CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}"
+CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}"
+SHARED_PATH="../../electron/shared"
 # Detect platform
 OS_TYPE=$(uname)
 
@@ -17,17 +19,17 @@ if [ "$OS_TYPE" == "Linux" ]; then
     chmod +x "./bin/cortex-server"
 
     # Download engines for Linux
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx512/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-12-0/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-11-7/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-12-0/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-11-7/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-vulkan/v${ENGINE_VERSION}" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
 
 elif [ "$OS_TYPE" == "Darwin" ]; then
     # macOS downloads
@@ -38,8 +40,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then
     chmod +x "./bin/cortex-server"
 
     # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.39"
-    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.39"
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-arm64/v0.1.39"
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-amd64/v0.1.39"
 
 else
     echo "Unsupported operating system: $OS_TYPE"
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index e88608d57..6bd3c468e 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -168,7 +168,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    * Set default engine variant on launch
    */
   private async setDefaultEngine(systemInfo: SystemInformation) {
-    const variant = await executeOnMain(NODE, 'engineVariant', systemInfo)
+    const variant = await executeOnMain(NODE, 'engineVariant', systemInfo.gpuSetting)
     return ky
       .post(
         `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`,
diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts
index b0a7ece9e..73f114ce1 100644
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from '@jest/globals'
 import { engineVariant, executableCortexFile } from './execute'
-import { GpuSetting } from '@janhq/core'
+import { GpuSetting } from '@janhq/core/node'
 import { cpuInfo } from 'cpu-instructions'
 
 let testSettings: GpuSetting = {
@@ -30,6 +30,11 @@ jest.mock('cpu-instructions', () => ({
 let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
 mockCpuInfo.mockReturnValue([])
 
+jest.mock('@janhq/core/node', () => ({
+  appResourcePath: () => ".",
+  log: jest.fn()
+}))
+
 describe('test executable cortex file', () => {
   afterAll(function () {
     Object.defineProperty(process, 'platform', {
@@ -46,7 +51,7 @@ describe('test executable cortex file', () => {
     })
     expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath:
           originalPlatform === 'darwin'
             ? expect.stringContaining(`cortex-server`)
@@ -61,7 +66,7 @@ describe('test executable cortex file', () => {
     })
     expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath:
           originalPlatform === 'darwin'
             ? expect.stringContaining(`cortex-server`)
@@ -84,7 +89,7 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['avx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
@@ -120,7 +125,7 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['avx2'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
@@ -156,7 +161,7 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['noavx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
@@ -178,7 +183,7 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['noavx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
@@ -214,7 +219,7 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['avx512'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
@@ -249,7 +254,7 @@ describe('test executable cortex file', () => {
     }
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
@@ -274,7 +279,7 @@ describe('test executable cortex file', () => {
 
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining('bin'),
+          enginePath: expect.stringContaining('shared'),
           executablePath: expect.stringContaining(`cortex-server`),
 
           cudaVisibleDevices: '',
@@ -298,7 +303,7 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining('bin'),
+          enginePath: expect.stringContaining('shared'),
           executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
@@ -338,7 +343,7 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining("shared"),
           executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
@@ -380,7 +385,7 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining("shared"),
           executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
@@ -423,7 +428,7 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining("shared"),
           executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
@@ -450,7 +455,7 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining("shared"),
           executablePath:
             originalPlatform === 'darwin'
               ? expect.stringContaining(`cortex-server`)
diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts
index 48a407e31..44b85d515 100644
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@@ -1,6 +1,6 @@
-import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
 import { cpuInfo } from 'cpu-instructions'
+import { GpuSetting, appResourcePath, log } from '@janhq/core/node'
 
 export interface CortexExecutableOptions {
   enginePath: string
@@ -52,7 +52,7 @@ const extension = (): '.exe' | '' => {
  */
 const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
   const isUsingCuda =
-    settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac'
+    settings?.vulkan !== true && settings?.run_mode === 'gpu' && !os().includes('mac')
 
   if (!isUsingCuda) return undefined
   return settings?.cuda?.version === '11' ? '11-7' : '12-0'
@@ -84,7 +84,7 @@ export const executableCortexFile = (
   let binaryName = `cortex-server${extension()}`
   const binPath = path.join(__dirname, '..', 'bin')
   return {
-    enginePath: binPath,
+    enginePath: path.join(appResourcePath(), 'shared'),
     executablePath: path.join(binPath, binaryName),
     cudaVisibleDevices,
     vkVisibleDevices,
@@ -112,5 +112,7 @@ export const engineVariant = (gpuSetting?: GpuSetting): string => {
   ]
     .filter((e) => !!e)
     .join('-')
+
+  log(`[CORTEX]: Engine variant: ${engineVariant}`)
   return engineVariant
 }
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
index 4c6d96292..a13bf6028 100644
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -2,7 +2,6 @@ import path from 'path'
 import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
 import { engineVariant, executableCortexFile } from './execute'
 import { ProcessWatchdog } from './watchdog'
-import { appResourcePath } from '@janhq/core/node'
 
 // The HOST address to use for the Nitro subprocess
 const LOCAL_PORT = '39291'
@@ -30,13 +29,7 @@ function run(systemInfo?: SystemInformation): Promise<any> {
     log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
     log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
 
-    addEnvPaths(path.join(appResourcePath(), 'shared'))
     addEnvPaths(executableOptions.enginePath)
-    // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH
-    // This is required for the cortex engine to run for now since dlls are not moved to the root
-    addEnvPaths(
-      path.join(executableOptions.enginePath, 'engines', 'cortex.llamacpp')
-    )
 
     const dataFolderPath = getJanDataFolderPath()
     if (watchdog) {
@@ -85,13 +78,11 @@ function addEnvPaths(dest: string) {
   // Add engine path to the PATH and LD_LIBRARY_PATH
   if (process.platform === 'win32') {
     process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
-    log(`[CORTEX] PATH: ${process.env.PATH}`)
   } else {
     process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
       path.delimiter,
       dest
     )
-    log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
   }
 }