diff --git a/.gitignore b/.gitignore index f28d152d9..ab815678a 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ coverage .yarnrc test_results.html *.tsbuildinfo +electron/shared/** diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt index e6d5cb833..89f843d1d 100644 --- a/extensions/inference-cortex-extension/bin/version.txt +++ b/extensions/inference-cortex-extension/bin/version.txt @@ -1 +1 @@ -1.0.2 \ No newline at end of file +1.0.3-rc1 \ No newline at end of file diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat index ecff683c3..25527eb36 100644 --- a/extensions/inference-cortex-extension/download.bat +++ b/extensions/inference-cortex-extension/download.bat @@ -2,23 +2,24 @@ set BIN_PATH=./bin set SHARED_PATH=./../../electron/shared set /p CORTEX_VERSION=<./bin/version.txt +set ENGINE_VERSION=0.1.39 @REM Download cortex.llamacpp binaries -set VERSION=v0.1.35 -set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64 +set VERSION=v0.1.39 +set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.39-windows-amd64 set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION% set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan -call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp +call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-12-0/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-11-7/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-12-0/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-11-7/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx512/v%ENGINE_VERSION% +call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-vulkan/v%ENGINE_VERSION% call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH% call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH% @@ -28,12 +29,12 @@ del %BIN_PATH%\cortex.exe @REM Loop through each folder and move DLLs (excluding engine.dll) for %%F in (%SUBFOLDERS%) do ( - echo Processing folder: %BIN_PATH%\%%F + echo Processing folder: %SHARED_PATH%\engines\cortex.llamacpp\%%F @REM Move all .dll files except engine.dll - for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do ( + for %%D in (%SHARED_PATH%\engines\cortex.llamacpp\%%F\*.dll) do ( if /I not "%%~nxD"=="engine.dll" ( - move "%%D" "%BIN_PATH%" + move "%%D" "%SHARED_PATH%" ) ) ) diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh index 902a31e51..9c0ebbe64 100755 --- a/extensions/inference-cortex-extension/download.sh +++ b/extensions/inference-cortex-extension/download.sh @@ -2,9 +2,11 @@ # Read CORTEX_VERSION CORTEX_VERSION=$(cat ./bin/version.txt) -CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download" -ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35" -CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35" +ENGINE_VERSION=0.1.39 +CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download" +ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}" +CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}" +SHARED_PATH="../../electron/shared" # Detect platform OS_TYPE=$(uname) @@ -17,17 +19,17 @@ if [ "$OS_TYPE" == "Linux" ]; then chmod +x "./bin/cortex-server" # Download engines for Linux - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1 - download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1 - download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx512/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-12-0/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-11-7/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-12-0/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-11-7/v${ENGINE_VERSION}" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-vulkan/v${ENGINE_VERSION}" 1 + download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1 + download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1 elif [ "$OS_TYPE" == "Darwin" ]; then # macOS downloads @@ -38,8 +40,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then chmod +x "./bin/cortex-server" # Download engines for macOS - download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp - download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp + download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-arm64/v0.1.39" + download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-amd64/v0.1.39" else echo "Unsupported operating system: $OS_TYPE" diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts index 34ad9295d..00fae78ba 100644 --- a/extensions/inference-cortex-extension/rollup.config.ts +++ b/extensions/inference-cortex-extension/rollup.config.ts @@ -120,6 +120,7 @@ export default [ DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'), CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), + CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.39'), }), // Allow json resolution json(), diff --git a/extensions/inference-cortex-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts index 48dbcd780..381a80f5e 100644 --- a/extensions/inference-cortex-extension/src/@types/global.d.ts +++ b/extensions/inference-cortex-extension/src/@types/global.d.ts @@ -1,6 +1,7 @@ declare const NODE: string declare const CORTEX_API_URL: string declare const CORTEX_SOCKET_URL: string +declare const CORTEX_ENGINE_VERSION: string declare const DEFAULT_SETTINGS: Array declare const MODELS: Array diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index e83a17561..6bd3c468e 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -18,6 +18,7 @@ import { fs, events, ModelEvent, + SystemInformation, } from '@janhq/core' import PQueue from 'p-queue' import ky from 'ky' @@ -67,13 +68,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { super.onLoad() + this.queue.add(() => this.healthz()) + this.queue.add(() => this.setDefaultEngine(systemInfo)) // Run the process watchdog const systemInfo = await systemInformation() await this.clean() await executeOnMain(NODE, 'run', systemInfo) - - this.queue.add(() => this.healthz()) - this.subscribeToEvents() window.addEventListener('beforeunload', () => { @@ -153,7 +153,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { * Do health check on cortex.cpp * @returns */ - healthz(): Promise { + private healthz(): Promise { return ky .get(`${CORTEX_API_URL}/healthz`, { retry: { @@ -164,11 +164,24 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { .then(() => {}) } + /** + * Set default engine variant on launch + */ + private async setDefaultEngine(systemInfo: SystemInformation) { + const variant = await executeOnMain(NODE, 'engineVariant', systemInfo.gpuSetting) + return ky + .post( + `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`, + { json: {} } + ) + .then(() => {}) + } + /** * Clean cortex processes * @returns */ - clean(): Promise { + private clean(): Promise { return ky .delete(`${CORTEX_API_URL}/processmanager/destroy`, { timeout: 2000, // maximum 2 seconds @@ -181,7 +194,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { /** * Subscribe to cortex.cpp websocket events */ - subscribeToEvents() { + private subscribeToEvents() { this.queue.add( () => new Promise((resolve) => { @@ -235,7 +248,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { } /// Legacy -export const getModelFilePath = async ( +const getModelFilePath = async ( model: Model, file: string ): Promise => { diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts index 622eb38af..73f114ce1 100644 --- a/extensions/inference-cortex-extension/src/node/execute.test.ts +++ b/extensions/inference-cortex-extension/src/node/execute.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from '@jest/globals' -import { executableCortexFile } from './execute' -import { GpuSetting } from '@janhq/core' +import { engineVariant, executableCortexFile } from './execute' +import { GpuSetting } from '@janhq/core/node' import { cpuInfo } from 'cpu-instructions' let testSettings: GpuSetting = { @@ -30,6 +30,11 @@ jest.mock('cpu-instructions', () => ({ let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock mockCpuInfo.mockReturnValue([]) +jest.mock('@janhq/core/node', () => ({ + appResourcePath: () => ".", + log: jest.fn() +})) + describe('test executable cortex file', () => { afterAll(function () { Object.defineProperty(process, 'platform', { @@ -46,8 +51,7 @@ describe('test executable cortex file', () => { }) expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`arm64`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) @@ -56,13 +60,13 @@ describe('test executable cortex file', () => { vkVisibleDevices: '', }) ) + expect(engineVariant(testSettings)).toEqual('mac-arm64') Object.defineProperty(process, 'arch', { value: 'x64', }) expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`x64`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) @@ -71,6 +75,7 @@ describe('test executable cortex file', () => { vkVisibleDevices: '', }) ) + expect(engineVariant(testSettings)).toEqual('mac-amd64') }) it('executes on Windows CPU', () => { @@ -84,13 +89,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['avx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`avx`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant()).toEqual('windows-amd64-avx') }) it('executes on Windows Cuda 11', () => { @@ -120,13 +125,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['avx2']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`avx2-cuda-11-7`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-11-7') }) it('executes on Windows Cuda 12', () => { @@ -156,13 +161,15 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['noavx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`noavx-cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual('windows-amd64-noavx-cuda-12-0') + mockCpuInfo.mockReturnValue(['avx512']) + expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-12-0') }) it('executes on Linux CPU', () => { @@ -176,12 +183,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['noavx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`noavx`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant()).toEqual('linux-amd64-noavx') }) it('executes on Linux Cuda 11', () => { @@ -208,15 +216,16 @@ describe('test executable cortex file', () => { }, ], } + mockCpuInfo.mockReturnValue(['avx512']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-11-7`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-11-7') }) it('executes on Linux Cuda 12', () => { @@ -245,13 +254,13 @@ describe('test executable cortex file', () => { } expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-12-0') }) // Generate test for different cpu instructions on Linux @@ -270,14 +279,14 @@ describe('test executable cortex file', () => { expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(instruction), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant(settings)).toEqual(`linux-amd64-${instruction}`) }) }) // Generate test for different cpu instructions on Windows @@ -294,13 +303,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(instruction), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('shared'), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant(settings)).toEqual(`windows-amd64-${instruction}`) }) }) @@ -334,13 +343,15 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual( + `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` + ) }) }) @@ -374,13 +385,15 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual( + `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` + ) }) }) @@ -415,13 +428,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`vulkan`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual(`linux-amd64-vulkan`) }) }) @@ -442,8 +455,7 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`x64`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining("shared"), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts index 74ffb48c6..44b85d515 100644 --- a/extensions/inference-cortex-extension/src/node/execute.ts +++ b/extensions/inference-cortex-extension/src/node/execute.ts @@ -1,10 +1,9 @@ -import { GpuSetting } from '@janhq/core' import * as path from 'path' import { cpuInfo } from 'cpu-instructions' +import { GpuSetting, appResourcePath, log } from '@janhq/core/node' export interface CortexExecutableOptions { enginePath: string - binPath: string executablePath: string cudaVisibleDevices: string vkVisibleDevices: string @@ -21,11 +20,7 @@ const gpuRunMode = (settings?: GpuSetting): string => { if (!settings) return '' - return settings.vulkan === true - ? 'vulkan' - : settings.run_mode === 'cpu' - ? '' - : 'cuda' + return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda' } /** @@ -34,12 +29,12 @@ const gpuRunMode = (settings?: GpuSetting): string => { */ const os = (): string => { return process.platform === 'win32' - ? 'win' + ? 'windows-amd64' : process.platform === 'darwin' ? process.arch === 'arm64' - ? 'arm64' - : 'x64' - : 'linux' + ? 'mac-arm64' + : 'mac-amd64' + : 'linux-amd64' } /** @@ -57,7 +52,7 @@ const extension = (): '.exe' | '' => { */ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => { const isUsingCuda = - settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac' + settings?.vulkan !== true && settings?.run_mode === 'gpu' && !os().includes('mac') if (!isUsingCuda) return undefined return settings?.cuda?.version === '11' ? '11-7' : '12-0' @@ -79,36 +74,45 @@ const cpuInstructions = (): string => { } /** - * Find which executable file to run based on the current platform. - * @returns The name of the executable file to run. + * The executable options for the cortex.cpp extension. */ export const executableCortexFile = ( gpuSetting?: GpuSetting ): CortexExecutableOptions => { - const cpuInstruction = cpuInstructions() - let engineFolder = gpuSetting?.vulkan - ? 'vulkan' - : process.platform === 'darwin' - ? os() - : [ - gpuRunMode(gpuSetting) !== 'cuda' || - cpuInstruction === 'avx2' || cpuInstruction === 'avx512' - ? cpuInstruction - : 'noavx', - gpuRunMode(gpuSetting), - cudaVersion(gpuSetting), - ] - .filter((e) => !!e) - .join('-') let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let binaryName = `cortex-server${extension()}` const binPath = path.join(__dirname, '..', 'bin') return { - enginePath: path.join(binPath, engineFolder), + enginePath: path.join(appResourcePath(), 'shared'), executablePath: path.join(binPath, binaryName), - binPath: binPath, cudaVisibleDevices, vkVisibleDevices, } } + +/** + * Find which variant to run based on the current platform. + */ +export const engineVariant = (gpuSetting?: GpuSetting): string => { + const cpuInstruction = cpuInstructions() + let engineVariant = [ + os(), + gpuSetting?.vulkan + ? 'vulkan' + : gpuRunMode(gpuSetting) !== 'cuda' + ? // CPU mode - support all variants + cpuInstruction + : // GPU mode - packaged CUDA variants of avx2 and noavx + cpuInstruction === 'avx2' || cpuInstruction === 'avx512' + ? 'avx2' + : 'noavx', + gpuRunMode(gpuSetting), + cudaVersion(gpuSetting), + ] + .filter((e) => !!e) + .join('-') + + log(`[CORTEX]: Engine variant: ${engineVariant}`) + return engineVariant +} diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts index 3816605d2..a13bf6028 100644 --- a/extensions/inference-cortex-extension/src/node/index.ts +++ b/extensions/inference-cortex-extension/src/node/index.ts @@ -1,8 +1,7 @@ import path from 'path' import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node' -import { executableCortexFile } from './execute' +import { engineVariant, executableCortexFile } from './execute' import { ProcessWatchdog } from './watchdog' -import { appResourcePath } from '@janhq/core/node' // The HOST address to use for the Nitro subprocess const LOCAL_PORT = '39291' @@ -20,9 +19,9 @@ function run(systemInfo?: SystemInformation): Promise { // If ngl is not set or equal to 0, run on CPU with correct instructions systemInfo?.gpuSetting ? { - ...systemInfo.gpuSetting, - run_mode: systemInfo.gpuSetting.run_mode, - } + ...systemInfo.gpuSetting, + run_mode: systemInfo.gpuSetting.run_mode, + } : undefined ) @@ -30,16 +29,13 @@ function run(systemInfo?: SystemInformation): Promise { log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`) log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`) - addEnvPaths(path.join(appResourcePath(), 'shared')) - addEnvPaths(executableOptions.binPath) addEnvPaths(executableOptions.enginePath) - // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH - // This is required for the cortex engine to run for now since dlls are not moved to the root - addEnvPaths( - path.join(executableOptions.enginePath, 'engines', 'cortex.llamacpp') - ) const dataFolderPath = getJanDataFolderPath() + if (watchdog) { + watchdog.terminate() + } + watchdog = new ProcessWatchdog( executableOptions.executablePath, [ @@ -81,17 +77,12 @@ function dispose() { function addEnvPaths(dest: string) { // Add engine path to the PATH and LD_LIBRARY_PATH if (process.platform === 'win32') { - process.env.PATH = (process.env.PATH || '').concat( - path.delimiter, - dest, - ) - log(`[CORTEX] PATH: ${process.env.PATH}`) + process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest) } else { process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( path.delimiter, - dest, + dest ) - log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`) } } @@ -105,4 +96,5 @@ export interface CortexProcessInfo { export default { run, dispose, + engineVariant, } diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts index c49ddb964..84c6a5126 100644 --- a/web/hooks/useImportModel.ts +++ b/web/hooks/useImportModel.ts @@ -9,7 +9,6 @@ import { OptionType, events, fs, - baseName, } from '@janhq/core' import { atom, useAtomValue, useSetAtom } from 'jotai' diff --git a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx index 0420b7d51..16a0024e8 100644 --- a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx +++ b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx @@ -9,8 +9,6 @@ import { MainViewState } from '@/constants/screens' import { loadModelErrorAtom } from '@/hooks/useActiveModel' -import { useSettings } from '@/hooks/useSettings' - import { mainViewStateAtom } from '@/helpers/atoms/App.atom' import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom' import { activeThreadAtom } from '@/helpers/atoms/Thread.atom' @@ -21,7 +19,6 @@ const LoadModelError = () => { const setMainState = useSetAtom(mainViewStateAtom) const setSelectedSettingScreen = useSetAtom(selectedSettingAtom) const activeThread = useAtomValue(activeThreadAtom) - const { settings } = useSettings() const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE'