From a15d92dbdc0ab5748de8130060b6c156d2579554 Mon Sep 17 00:00:00 2001 From: Louis Date: Thu, 14 Nov 2024 21:09:44 +0700 Subject: [PATCH 1/6] feat: integrates cortex.cpp engine variants --- .../inference-cortex-extension/download.sh | 4 +- .../rollup.config.ts | 1 + .../src/@types/global.d.ts | 1 + .../inference-cortex-extension/src/index.ts | 23 +++++-- .../src/node/execute.test.ts | 63 ++++++++++--------- .../src/node/execute.ts | 60 +++++++++--------- .../src/node/index.ts | 17 +++-- 7 files changed, 96 insertions(+), 73 deletions(-) diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh index 902a31e51..8c13a13ef 100755 --- a/extensions/inference-cortex-extension/download.sh +++ b/extensions/inference-cortex-extension/download.sh @@ -38,8 +38,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then chmod +x "./bin/cortex-server" # Download engines for macOS - download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp - download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp + download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.35" + download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.35" else echo "Unsupported operating system: $OS_TYPE" diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts index 34ad9295d..882ed1921 100644 --- a/extensions/inference-cortex-extension/rollup.config.ts +++ b/extensions/inference-cortex-extension/rollup.config.ts @@ -120,6 +120,7 @@ export default [ DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'), CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), + CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.35'), }), // Allow json resolution json(), diff --git a/extensions/inference-cortex-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts index 48dbcd780..381a80f5e 100644 --- a/extensions/inference-cortex-extension/src/@types/global.d.ts +++ b/extensions/inference-cortex-extension/src/@types/global.d.ts @@ -1,6 +1,7 @@ declare const NODE: string declare const CORTEX_API_URL: string declare const CORTEX_SOCKET_URL: string +declare const CORTEX_ENGINE_VERSION: string declare const DEFAULT_SETTINGS: Array declare const MODELS: Array diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index e83a17561..0331a4d17 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -18,6 +18,7 @@ import { fs, events, ModelEvent, + SystemInformation, } from '@janhq/core' import PQueue from 'p-queue' import ky from 'ky' @@ -74,6 +75,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { this.queue.add(() => this.healthz()) + this.queue.add(() => this.setDefaultEngine(systemInfo)) this.subscribeToEvents() window.addEventListener('beforeunload', () => { @@ -153,7 +155,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { * Do health check on cortex.cpp * @returns */ - healthz(): Promise { + private healthz(): Promise { return ky .get(`${CORTEX_API_URL}/healthz`, { retry: { @@ -164,11 +166,24 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { .then(() => {}) } + /** + * Set default engine variant on launch + */ + private async setDefaultEngine(systemInfo: SystemInformation) { + const variant = await executeOnMain(NODE, 'engineVariant', systemInfo) + return ky + .post( + `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`, + { json: {} } + ) + .then(() => {}) + } + /** * Clean cortex processes * @returns */ - clean(): Promise { + private clean(): Promise { return ky .delete(`${CORTEX_API_URL}/processmanager/destroy`, { timeout: 2000, // maximum 2 seconds @@ -181,7 +196,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { /** * Subscribe to cortex.cpp websocket events */ - subscribeToEvents() { + private subscribeToEvents() { this.queue.add( () => new Promise((resolve) => { @@ -235,7 +250,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { } /// Legacy -export const getModelFilePath = async ( +const getModelFilePath = async ( model: Model, file: string ): Promise => { diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts index 622eb38af..b0a7ece9e 100644 --- a/extensions/inference-cortex-extension/src/node/execute.test.ts +++ b/extensions/inference-cortex-extension/src/node/execute.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from '@jest/globals' -import { executableCortexFile } from './execute' +import { engineVariant, executableCortexFile } from './execute' import { GpuSetting } from '@janhq/core' import { cpuInfo } from 'cpu-instructions' @@ -46,8 +46,7 @@ describe('test executable cortex file', () => { }) expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`arm64`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) @@ -56,13 +55,13 @@ describe('test executable cortex file', () => { vkVisibleDevices: '', }) ) + expect(engineVariant(testSettings)).toEqual('mac-arm64') Object.defineProperty(process, 'arch', { value: 'x64', }) expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`x64`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) @@ -71,6 +70,7 @@ describe('test executable cortex file', () => { vkVisibleDevices: '', }) ) + expect(engineVariant(testSettings)).toEqual('mac-amd64') }) it('executes on Windows CPU', () => { @@ -84,13 +84,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['avx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`avx`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant()).toEqual('windows-amd64-avx') }) it('executes on Windows Cuda 11', () => { @@ -120,13 +120,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['avx2']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`avx2-cuda-11-7`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-11-7') }) it('executes on Windows Cuda 12', () => { @@ -156,13 +156,15 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['noavx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`noavx-cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual('windows-amd64-noavx-cuda-12-0') + mockCpuInfo.mockReturnValue(['avx512']) + expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-12-0') }) it('executes on Linux CPU', () => { @@ -176,12 +178,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['noavx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`noavx`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant()).toEqual('linux-amd64-noavx') }) it('executes on Linux Cuda 11', () => { @@ -208,15 +211,16 @@ describe('test executable cortex file', () => { }, ], } + mockCpuInfo.mockReturnValue(['avx512']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-11-7`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-11-7') }) it('executes on Linux Cuda 12', () => { @@ -245,13 +249,13 @@ describe('test executable cortex file', () => { } expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-12-0') }) // Generate test for different cpu instructions on Linux @@ -270,14 +274,14 @@ describe('test executable cortex file', () => { expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(instruction), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('bin'), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant(settings)).toEqual(`linux-amd64-${instruction}`) }) }) // Generate test for different cpu instructions on Windows @@ -294,13 +298,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(instruction), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('bin'), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant(settings)).toEqual(`windows-amd64-${instruction}`) }) }) @@ -334,13 +338,15 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual( + `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` + ) }) }) @@ -374,13 +380,15 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual( + `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` + ) }) }) @@ -415,13 +423,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`vulkan`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual(`linux-amd64-vulkan`) }) }) @@ -442,8 +450,7 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`x64`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts index 74ffb48c6..48a407e31 100644 --- a/extensions/inference-cortex-extension/src/node/execute.ts +++ b/extensions/inference-cortex-extension/src/node/execute.ts @@ -4,7 +4,6 @@ import { cpuInfo } from 'cpu-instructions' export interface CortexExecutableOptions { enginePath: string - binPath: string executablePath: string cudaVisibleDevices: string vkVisibleDevices: string @@ -21,11 +20,7 @@ const gpuRunMode = (settings?: GpuSetting): string => { if (!settings) return '' - return settings.vulkan === true - ? 'vulkan' - : settings.run_mode === 'cpu' - ? '' - : 'cuda' + return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda' } /** @@ -34,12 +29,12 @@ const gpuRunMode = (settings?: GpuSetting): string => { */ const os = (): string => { return process.platform === 'win32' - ? 'win' + ? 'windows-amd64' : process.platform === 'darwin' ? process.arch === 'arm64' - ? 'arm64' - : 'x64' - : 'linux' + ? 'mac-arm64' + : 'mac-amd64' + : 'linux-amd64' } /** @@ -79,36 +74,43 @@ const cpuInstructions = (): string => { } /** - * Find which executable file to run based on the current platform. - * @returns The name of the executable file to run. + * The executable options for the cortex.cpp extension. */ export const executableCortexFile = ( gpuSetting?: GpuSetting ): CortexExecutableOptions => { - const cpuInstruction = cpuInstructions() - let engineFolder = gpuSetting?.vulkan - ? 'vulkan' - : process.platform === 'darwin' - ? os() - : [ - gpuRunMode(gpuSetting) !== 'cuda' || - cpuInstruction === 'avx2' || cpuInstruction === 'avx512' - ? cpuInstruction - : 'noavx', - gpuRunMode(gpuSetting), - cudaVersion(gpuSetting), - ] - .filter((e) => !!e) - .join('-') let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let binaryName = `cortex-server${extension()}` const binPath = path.join(__dirname, '..', 'bin') return { - enginePath: path.join(binPath, engineFolder), + enginePath: binPath, executablePath: path.join(binPath, binaryName), - binPath: binPath, cudaVisibleDevices, vkVisibleDevices, } } + +/** + * Find which variant to run based on the current platform. + */ +export const engineVariant = (gpuSetting?: GpuSetting): string => { + const cpuInstruction = cpuInstructions() + let engineVariant = [ + os(), + gpuSetting?.vulkan + ? 'vulkan' + : gpuRunMode(gpuSetting) !== 'cuda' + ? // CPU mode - support all variants + cpuInstruction + : // GPU mode - packaged CUDA variants of avx2 and noavx + cpuInstruction === 'avx2' || cpuInstruction === 'avx512' + ? 'avx2' + : 'noavx', + gpuRunMode(gpuSetting), + cudaVersion(gpuSetting), + ] + .filter((e) => !!e) + .join('-') + return engineVariant +} diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts index 3816605d2..cf2af045b 100644 --- a/extensions/inference-cortex-extension/src/node/index.ts +++ b/extensions/inference-cortex-extension/src/node/index.ts @@ -1,6 +1,6 @@ import path from 'path' import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node' -import { executableCortexFile } from './execute' +import { engineVariant, executableCortexFile } from './execute' import { ProcessWatchdog } from './watchdog' import { appResourcePath } from '@janhq/core/node' @@ -20,9 +20,9 @@ function run(systemInfo?: SystemInformation): Promise { // If ngl is not set or equal to 0, run on CPU with correct instructions systemInfo?.gpuSetting ? { - ...systemInfo.gpuSetting, - run_mode: systemInfo.gpuSetting.run_mode, - } + ...systemInfo.gpuSetting, + run_mode: systemInfo.gpuSetting.run_mode, + } : undefined ) @@ -31,7 +31,6 @@ function run(systemInfo?: SystemInformation): Promise { log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`) addEnvPaths(path.join(appResourcePath(), 'shared')) - addEnvPaths(executableOptions.binPath) addEnvPaths(executableOptions.enginePath) // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH // This is required for the cortex engine to run for now since dlls are not moved to the root @@ -81,15 +80,12 @@ function dispose() { function addEnvPaths(dest: string) { // Add engine path to the PATH and LD_LIBRARY_PATH if (process.platform === 'win32') { - process.env.PATH = (process.env.PATH || '').concat( - path.delimiter, - dest, - ) + process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest) log(`[CORTEX] PATH: ${process.env.PATH}`) } else { process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( path.delimiter, - dest, + dest ) log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`) } @@ -105,4 +101,5 @@ export interface CortexProcessInfo { export default { run, dispose, + engineVariant, } From a38715f18abe788d862e144f04ca6027cb1b0c2a Mon Sep 17 00:00:00 2001 From: Louis Date: Thu, 14 Nov 2024 23:51:51 +0700 Subject: [PATCH 2/6] fix: should queue health check and default engine set before starting model --- extensions/inference-cortex-extension/src/index.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index 0331a4d17..e88608d57 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -68,14 +68,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { super.onLoad() + this.queue.add(() => this.healthz()) + this.queue.add(() => this.setDefaultEngine(systemInfo)) // Run the process watchdog const systemInfo = await systemInformation() await this.clean() await executeOnMain(NODE, 'run', systemInfo) - - this.queue.add(() => this.healthz()) - - this.queue.add(() => this.setDefaultEngine(systemInfo)) this.subscribeToEvents() window.addEventListener('beforeunload', () => { From 6f066357ed2fb137c8ff7d577c8e166ab1baf74a Mon Sep 17 00:00:00 2001 From: Louis Date: Sat, 16 Nov 2024 16:05:49 +0700 Subject: [PATCH 3/6] chore: bump new engine version 0.1.39 and get rid of dangling process --- extensions/inference-cortex-extension/download.bat | 4 ++-- extensions/inference-cortex-extension/download.sh | 8 ++++---- extensions/inference-cortex-extension/rollup.config.ts | 2 +- extensions/inference-cortex-extension/src/node/index.ts | 4 ++++ web/hooks/useImportModel.ts | 1 - .../Thread/ThreadCenterPanel/LoadModelError/index.tsx | 3 --- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat index ecff683c3..e89d42f23 100644 --- a/extensions/inference-cortex-extension/download.bat +++ b/extensions/inference-cortex-extension/download.bat @@ -4,8 +4,8 @@ set SHARED_PATH=./../../electron/shared set /p CORTEX_VERSION=<./bin/version.txt @REM Download cortex.llamacpp binaries -set VERSION=v0.1.35 -set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64 +set VERSION=v0.1.39 +set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.39-windows-amd64 set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION% set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh index 8c13a13ef..b6b181987 100755 --- a/extensions/inference-cortex-extension/download.sh +++ b/extensions/inference-cortex-extension/download.sh @@ -3,8 +3,8 @@ # Read CORTEX_VERSION CORTEX_VERSION=$(cat ./bin/version.txt) CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download" -ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35" -CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35" +ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39/cortex.llamacpp-0.1.39" +CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39" # Detect platform OS_TYPE=$(uname) @@ -38,8 +38,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then chmod +x "./bin/cortex-server" # Download engines for macOS - download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.35" - download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.35" + download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.39" + download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.39" else echo "Unsupported operating system: $OS_TYPE" diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts index 882ed1921..00fae78ba 100644 --- a/extensions/inference-cortex-extension/rollup.config.ts +++ b/extensions/inference-cortex-extension/rollup.config.ts @@ -120,7 +120,7 @@ export default [ DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'), CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), - CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.35'), + CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.39'), }), // Allow json resolution json(), diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts index cf2af045b..4c6d96292 100644 --- a/extensions/inference-cortex-extension/src/node/index.ts +++ b/extensions/inference-cortex-extension/src/node/index.ts @@ -39,6 +39,10 @@ function run(systemInfo?: SystemInformation): Promise { ) const dataFolderPath = getJanDataFolderPath() + if (watchdog) { + watchdog.terminate() + } + watchdog = new ProcessWatchdog( executableOptions.executablePath, [ diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts index c49ddb964..84c6a5126 100644 --- a/web/hooks/useImportModel.ts +++ b/web/hooks/useImportModel.ts @@ -9,7 +9,6 @@ import { OptionType, events, fs, - baseName, } from '@janhq/core' import { atom, useAtomValue, useSetAtom } from 'jotai' diff --git a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx index 0420b7d51..16a0024e8 100644 --- a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx +++ b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx @@ -9,8 +9,6 @@ import { MainViewState } from '@/constants/screens' import { loadModelErrorAtom } from '@/hooks/useActiveModel' -import { useSettings } from '@/hooks/useSettings' - import { mainViewStateAtom } from '@/helpers/atoms/App.atom' import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom' import { activeThreadAtom } from '@/helpers/atoms/Thread.atom' @@ -21,7 +19,6 @@ const LoadModelError = () => { const setMainState = useSetAtom(mainViewStateAtom) const setSelectedSettingScreen = useSetAtom(selectedSettingAtom) const activeThread = useAtomValue(activeThreadAtom) - const { settings } = useSettings() const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE' From 6e9c34baf7b7352f5052c969975a69dfe8f0a43d Mon Sep 17 00:00:00 2001 From: Louis Date: Sat, 16 Nov 2024 16:23:06 +0700 Subject: [PATCH 4/6] chore: cortex.cpp version bump --- extensions/inference-cortex-extension/bin/version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt index e6d5cb833..89f843d1d 100644 --- a/extensions/inference-cortex-extension/bin/version.txt +++ b/extensions/inference-cortex-extension/bin/version.txt @@ -1 +1 @@ -1.0.2 \ No newline at end of file +1.0.3-rc1 \ No newline at end of file From 5243e4a095d6151de0404633c5a50d7bc137367f Mon Sep 17 00:00:00 2001 From: Louis Date: Mon, 18 Nov 2024 14:22:10 +0700 Subject: [PATCH 5/6] fix: correct cortex repo url --- extensions/inference-cortex-extension/download.bat | 2 +- extensions/inference-cortex-extension/download.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat index e89d42f23..1f4102b97 100644 --- a/extensions/inference-cortex-extension/download.bat +++ b/extensions/inference-cortex-extension/download.bat @@ -9,7 +9,7 @@ set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VER set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION% set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan -call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz +call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh index b6b181987..6a2809f0c 100755 --- a/extensions/inference-cortex-extension/download.sh +++ b/extensions/inference-cortex-extension/download.sh @@ -2,7 +2,7 @@ # Read CORTEX_VERSION CORTEX_VERSION=$(cat ./bin/version.txt) -CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download" +CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download" ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39/cortex.llamacpp-0.1.39" CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39" # Detect platform From f75dc662ee74542d53b6e42405f4881325c17764 Mon Sep 17 00:00:00 2001 From: Louis Date: Mon, 18 Nov 2024 15:54:26 +0700 Subject: [PATCH 6/6] chore: reduce app launch time --- .gitignore | 1 + .../inference-cortex-extension/download.bat | 25 ++++++------- .../inference-cortex-extension/download.sh | 32 +++++++++-------- .../inference-cortex-extension/src/index.ts | 2 +- .../src/node/execute.test.ts | 35 +++++++++++-------- .../src/node/execute.ts | 8 +++-- .../src/node/index.ts | 9 ----- 7 files changed, 57 insertions(+), 55 deletions(-) diff --git a/.gitignore b/.gitignore index f28d152d9..ab815678a 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ coverage .yarnrc test_results.html *.tsbuildinfo +electron/shared/** diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat index 1f4102b97..25527eb36 100644 --- a/extensions/inference-cortex-extension/download.bat +++ b/extensions/inference-cortex-extension/download.bat @@ -2,6 +2,7 @@ set BIN_PATH=./bin set SHARED_PATH=./../../electron/shared set /p CORTEX_VERSION=<./bin/version.txt +set ENGINE_VERSION=0.1.39 @REM Download cortex.llamacpp binaries set VERSION=v0.1.39 @@ -10,15 +11,15 @@ set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-12-0/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-11-7/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-12-0/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-11-7/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx512/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-vulkan/v%ENGINE_VERSION% call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH% call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH% @@ -28,12 +29,12 @@ del %BIN_PATH%\cortex.exe @REM Loop through each folder and move DLLs (excluding engine.dll) for %%F in (%SUBFOLDERS%) do ( - echo Processing folder: %BIN_PATH%\%%F + echo Processing folder: %SHARED_PATH%\engines\cortex.llamacpp\%%F @REM Move all .dll files except engine.dll - for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do ( + for %%D in (%SHARED_PATH%\engines\cortex.llamacpp\%%F\*.dll) do ( if /I not "%%~nxD"=="engine.dll" ( - move "%%D" "%BIN_PATH%" + move "%%D" "%SHARED_PATH%" ) ) ) diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh index 6a2809f0c..9c0ebbe64 100755 --- a/extensions/inference-cortex-extension/download.sh +++ b/extensions/inference-cortex-extension/download.sh @@ -2,9 +2,11 @@ # Read CORTEX_VERSION CORTEX_VERSION=$(cat ./bin/version.txt) +ENGINE_VERSION=0.1.39 CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download" -ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39/cortex.llamacpp-0.1.39" -CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39" +ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}" +CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}" +SHARED_PATH="../../electron/shared" # Detect platform OS_TYPE=$(uname) @@ -17,17 +19,17 @@ if [ "$OS_TYPE" == "Linux" ]; then chmod +x "./bin/cortex-server" # Download engines for Linux - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1 - download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1 - download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx512/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-12-0/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-11-7/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-12-0/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-11-7/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-vulkan/v${ENGINE_VERSION}" 1 + download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1 + download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1 elif [ "$OS_TYPE" == "Darwin" ]; then # macOS downloads @@ -38,8 +40,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then chmod +x "./bin/cortex-server" # Download engines for macOS - download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.39" - download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.39" + download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-arm64/v0.1.39" + download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-amd64/v0.1.39" else echo "Unsupported operating system: $OS_TYPE" diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index e88608d57..6bd3c468e 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -168,7 +168,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { * Set default engine variant on launch */ private async setDefaultEngine(systemInfo: SystemInformation) { - const variant = await executeOnMain(NODE, 'engineVariant', systemInfo) + const variant = await executeOnMain(NODE, 'engineVariant', systemInfo.gpuSetting) return ky .post( `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`, diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts index b0a7ece9e..73f114ce1 100644 --- a/extensions/inference-cortex-extension/src/node/execute.test.ts +++ b/extensions/inference-cortex-extension/src/node/execute.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from '@jest/globals' import { engineVariant, executableCortexFile } from './execute' -import { GpuSetting } from '@janhq/core' +import { GpuSetting } from '@janhq/core/node' import { cpuInfo } from 'cpu-instructions' let testSettings: GpuSetting = { @@ -30,6 +30,11 @@ jest.mock('cpu-instructions', () => ({ let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock mockCpuInfo.mockReturnValue([]) +jest.mock('@janhq/core/node', () => ({ + appResourcePath: () => ".", + log: jest.fn() +})) + describe('test executable cortex file', () => { afterAll(function () { Object.defineProperty(process, 'platform', { @@ -46,7 +51,7 @@ describe('test executable cortex file', () => { }) expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) @@ -61,7 +66,7 @@ describe('test executable cortex file', () => { }) expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) @@ -84,7 +89,7 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['avx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', @@ -120,7 +125,7 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['avx2']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -156,7 +161,7 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['noavx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -178,7 +183,7 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['noavx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', @@ -214,7 +219,7 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['avx512']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -249,7 +254,7 @@ describe('test executable cortex file', () => { } expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -274,7 +279,7 @@ describe('test executable cortex file', () => { expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining('bin'), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', @@ -298,7 +303,7 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining('bin'), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', @@ -338,7 +343,7 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -380,7 +385,7 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -423,7 +428,7 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -450,7 +455,7 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts index 48a407e31..44b85d515 100644 --- a/extensions/inference-cortex-extension/src/node/execute.ts +++ b/extensions/inference-cortex-extension/src/node/execute.ts @@ -1,6 +1,6 @@ -import { GpuSetting } from '@janhq/core' import * as path from 'path' import { cpuInfo } from 'cpu-instructions' +import { GpuSetting, appResourcePath, log } from '@janhq/core/node' export interface CortexExecutableOptions { enginePath: string @@ -52,7 +52,7 @@ const extension = (): '.exe' | '' => { */ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => { const isUsingCuda = - settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac' + settings?.vulkan !== true && settings?.run_mode === 'gpu' && !os().includes('mac') if (!isUsingCuda) return undefined return settings?.cuda?.version === '11' ? '11-7' : '12-0' @@ -84,7 +84,7 @@ export const executableCortexFile = ( let binaryName = `cortex-server${extension()}` const binPath = path.join(__dirname, '..', 'bin') return { - enginePath: binPath, + enginePath: path.join(appResourcePath(), 'shared'), executablePath: path.join(binPath, binaryName), cudaVisibleDevices, vkVisibleDevices, @@ -112,5 +112,7 @@ export const engineVariant = (gpuSetting?: GpuSetting): string => { ] .filter((e) => !!e) .join('-') + + log(`[CORTEX]: Engine variant: ${engineVariant}`) return engineVariant } diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts index 4c6d96292..a13bf6028 100644 --- a/extensions/inference-cortex-extension/src/node/index.ts +++ b/extensions/inference-cortex-extension/src/node/index.ts @@ -2,7 +2,6 @@ import path from 'path' import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node' import { engineVariant, executableCortexFile } from './execute' import { ProcessWatchdog } from './watchdog' -import { appResourcePath } from '@janhq/core/node' // The HOST address to use for the Nitro subprocess const LOCAL_PORT = '39291' @@ -30,13 +29,7 @@ function run(systemInfo?: SystemInformation): Promise { log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`) log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`) - addEnvPaths(path.join(appResourcePath(), 'shared')) addEnvPaths(executableOptions.enginePath) - // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH - // This is required for the cortex engine to run for now since dlls are not moved to the root - addEnvPaths( - path.join(executableOptions.enginePath, 'engines', 'cortex.llamacpp') - ) const dataFolderPath = getJanDataFolderPath() if (watchdog) { @@ -85,13 +78,11 @@ function addEnvPaths(dest: string) { // Add engine path to the PATH and LD_LIBRARY_PATH if (process.platform === 'win32') { process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest) - log(`[CORTEX] PATH: ${process.env.PATH}`) } else { process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( path.delimiter, dest ) - log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`) } }