From a15d92dbdc0ab5748de8130060b6c156d2579554 Mon Sep 17 00:00:00 2001 From: Louis Date: Thu, 14 Nov 2024 21:09:44 +0700 Subject: [PATCH] feat: integrates cortex.cpp engine variants --- .../inference-cortex-extension/download.sh | 4 +- .../rollup.config.ts | 1 + .../src/@types/global.d.ts | 1 + .../inference-cortex-extension/src/index.ts | 23 +++++-- .../src/node/execute.test.ts | 63 ++++++++++--------- .../src/node/execute.ts | 60 +++++++++--------- .../src/node/index.ts | 17 +++-- 7 files changed, 96 insertions(+), 73 deletions(-) diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh index 902a31e51..8c13a13ef 100755 --- a/extensions/inference-cortex-extension/download.sh +++ b/extensions/inference-cortex-extension/download.sh @@ -38,8 +38,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then chmod +x "./bin/cortex-server" # Download engines for macOS - download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp - download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp + download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.35" + download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.35" else echo "Unsupported operating system: $OS_TYPE" diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts index 34ad9295d..882ed1921 100644 --- a/extensions/inference-cortex-extension/rollup.config.ts +++ b/extensions/inference-cortex-extension/rollup.config.ts @@ -120,6 +120,7 @@ export default [ DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'), CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), + CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.35'), }), // Allow json resolution json(), diff --git a/extensions/inference-cortex-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts index 48dbcd780..381a80f5e 100644 --- a/extensions/inference-cortex-extension/src/@types/global.d.ts +++ b/extensions/inference-cortex-extension/src/@types/global.d.ts @@ -1,6 +1,7 @@ declare const NODE: string declare const CORTEX_API_URL: string declare const CORTEX_SOCKET_URL: string +declare const CORTEX_ENGINE_VERSION: string declare const DEFAULT_SETTINGS: Array declare const MODELS: Array diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index e83a17561..0331a4d17 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -18,6 +18,7 @@ import { fs, events, ModelEvent, + SystemInformation, } from '@janhq/core' import PQueue from 'p-queue' import ky from 'ky' @@ -74,6 +75,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { this.queue.add(() => this.healthz()) + this.queue.add(() => this.setDefaultEngine(systemInfo)) this.subscribeToEvents() window.addEventListener('beforeunload', () => { @@ -153,7 +155,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { * Do health check on cortex.cpp * @returns */ - healthz(): Promise { + private healthz(): Promise { return ky .get(`${CORTEX_API_URL}/healthz`, { retry: { @@ -164,11 +166,24 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { .then(() => {}) } + /** + * Set default engine variant on launch + */ + private async setDefaultEngine(systemInfo: SystemInformation) { + const variant = await executeOnMain(NODE, 'engineVariant', systemInfo) + return ky + .post( + `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`, + { json: {} } + ) + .then(() => {}) + } + /** * Clean cortex processes * @returns */ - clean(): Promise { + private clean(): Promise { return ky .delete(`${CORTEX_API_URL}/processmanager/destroy`, { timeout: 2000, // maximum 2 seconds @@ -181,7 +196,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { /** * Subscribe to cortex.cpp websocket events */ - subscribeToEvents() { + private subscribeToEvents() { this.queue.add( () => new Promise((resolve) => { @@ -235,7 +250,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { } /// Legacy -export const getModelFilePath = async ( +const getModelFilePath = async ( model: Model, file: string ): Promise => { diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts index 622eb38af..b0a7ece9e 100644 --- a/extensions/inference-cortex-extension/src/node/execute.test.ts +++ b/extensions/inference-cortex-extension/src/node/execute.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from '@jest/globals' -import { executableCortexFile } from './execute' +import { engineVariant, executableCortexFile } from './execute' import { GpuSetting } from '@janhq/core' import { cpuInfo } from 'cpu-instructions' @@ -46,8 +46,7 @@ describe('test executable cortex file', () => { }) expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`arm64`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) @@ -56,13 +55,13 @@ describe('test executable cortex file', () => { vkVisibleDevices: '', }) ) + expect(engineVariant(testSettings)).toEqual('mac-arm64') Object.defineProperty(process, 'arch', { value: 'x64', }) expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`x64`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) @@ -71,6 +70,7 @@ describe('test executable cortex file', () => { vkVisibleDevices: '', }) ) + expect(engineVariant(testSettings)).toEqual('mac-amd64') }) it('executes on Windows CPU', () => { @@ -84,13 +84,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['avx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`avx`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant()).toEqual('windows-amd64-avx') }) it('executes on Windows Cuda 11', () => { @@ -120,13 +120,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['avx2']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`avx2-cuda-11-7`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-11-7') }) it('executes on Windows Cuda 12', () => { @@ -156,13 +156,15 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['noavx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`noavx-cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual('windows-amd64-noavx-cuda-12-0') + mockCpuInfo.mockReturnValue(['avx512']) + expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-12-0') }) it('executes on Linux CPU', () => { @@ -176,12 +178,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['noavx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`noavx`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant()).toEqual('linux-amd64-noavx') }) it('executes on Linux Cuda 11', () => { @@ -208,15 +211,16 @@ describe('test executable cortex file', () => { }, ], } + mockCpuInfo.mockReturnValue(['avx512']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-11-7`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-11-7') }) it('executes on Linux Cuda 12', () => { @@ -245,13 +249,13 @@ describe('test executable cortex file', () => { } expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-12-0') }) // Generate test for different cpu instructions on Linux @@ -270,14 +274,14 @@ describe('test executable cortex file', () => { expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(instruction), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('bin'), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant(settings)).toEqual(`linux-amd64-${instruction}`) }) }) // Generate test for different cpu instructions on Windows @@ -294,13 +298,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(instruction), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining('bin'), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) ) + expect(engineVariant(settings)).toEqual(`windows-amd64-${instruction}`) }) }) @@ -334,13 +338,15 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual( + `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` + ) }) }) @@ -374,13 +380,15 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual( + `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0` + ) }) }) @@ -415,13 +423,13 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`vulkan`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) ) + expect(engineVariant(settings)).toEqual(`linux-amd64-vulkan`) }) }) @@ -442,8 +450,7 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue([]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`x64`), - binPath: expect.stringContaining(`bin`), + enginePath: expect.stringContaining(`bin`), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`cortex-server`) diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts index 74ffb48c6..48a407e31 100644 --- a/extensions/inference-cortex-extension/src/node/execute.ts +++ b/extensions/inference-cortex-extension/src/node/execute.ts @@ -4,7 +4,6 @@ import { cpuInfo } from 'cpu-instructions' export interface CortexExecutableOptions { enginePath: string - binPath: string executablePath: string cudaVisibleDevices: string vkVisibleDevices: string @@ -21,11 +20,7 @@ const gpuRunMode = (settings?: GpuSetting): string => { if (!settings) return '' - return settings.vulkan === true - ? 'vulkan' - : settings.run_mode === 'cpu' - ? '' - : 'cuda' + return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda' } /** @@ -34,12 +29,12 @@ const gpuRunMode = (settings?: GpuSetting): string => { */ const os = (): string => { return process.platform === 'win32' - ? 'win' + ? 'windows-amd64' : process.platform === 'darwin' ? process.arch === 'arm64' - ? 'arm64' - : 'x64' - : 'linux' + ? 'mac-arm64' + : 'mac-amd64' + : 'linux-amd64' } /** @@ -79,36 +74,43 @@ const cpuInstructions = (): string => { } /** - * Find which executable file to run based on the current platform. - * @returns The name of the executable file to run. + * The executable options for the cortex.cpp extension. */ export const executableCortexFile = ( gpuSetting?: GpuSetting ): CortexExecutableOptions => { - const cpuInstruction = cpuInstructions() - let engineFolder = gpuSetting?.vulkan - ? 'vulkan' - : process.platform === 'darwin' - ? os() - : [ - gpuRunMode(gpuSetting) !== 'cuda' || - cpuInstruction === 'avx2' || cpuInstruction === 'avx512' - ? cpuInstruction - : 'noavx', - gpuRunMode(gpuSetting), - cudaVersion(gpuSetting), - ] - .filter((e) => !!e) - .join('-') let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let binaryName = `cortex-server${extension()}` const binPath = path.join(__dirname, '..', 'bin') return { - enginePath: path.join(binPath, engineFolder), + enginePath: binPath, executablePath: path.join(binPath, binaryName), - binPath: binPath, cudaVisibleDevices, vkVisibleDevices, } } + +/** + * Find which variant to run based on the current platform. + */ +export const engineVariant = (gpuSetting?: GpuSetting): string => { + const cpuInstruction = cpuInstructions() + let engineVariant = [ + os(), + gpuSetting?.vulkan + ? 'vulkan' + : gpuRunMode(gpuSetting) !== 'cuda' + ? // CPU mode - support all variants + cpuInstruction + : // GPU mode - packaged CUDA variants of avx2 and noavx + cpuInstruction === 'avx2' || cpuInstruction === 'avx512' + ? 'avx2' + : 'noavx', + gpuRunMode(gpuSetting), + cudaVersion(gpuSetting), + ] + .filter((e) => !!e) + .join('-') + return engineVariant +} diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts index 3816605d2..cf2af045b 100644 --- a/extensions/inference-cortex-extension/src/node/index.ts +++ b/extensions/inference-cortex-extension/src/node/index.ts @@ -1,6 +1,6 @@ import path from 'path' import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node' -import { executableCortexFile } from './execute' +import { engineVariant, executableCortexFile } from './execute' import { ProcessWatchdog } from './watchdog' import { appResourcePath } from '@janhq/core/node' @@ -20,9 +20,9 @@ function run(systemInfo?: SystemInformation): Promise { // If ngl is not set or equal to 0, run on CPU with correct instructions systemInfo?.gpuSetting ? { - ...systemInfo.gpuSetting, - run_mode: systemInfo.gpuSetting.run_mode, - } + ...systemInfo.gpuSetting, + run_mode: systemInfo.gpuSetting.run_mode, + } : undefined ) @@ -31,7 +31,6 @@ function run(systemInfo?: SystemInformation): Promise { log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`) addEnvPaths(path.join(appResourcePath(), 'shared')) - addEnvPaths(executableOptions.binPath) addEnvPaths(executableOptions.enginePath) // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH // This is required for the cortex engine to run for now since dlls are not moved to the root @@ -81,15 +80,12 @@ function dispose() { function addEnvPaths(dest: string) { // Add engine path to the PATH and LD_LIBRARY_PATH if (process.platform === 'win32') { - process.env.PATH = (process.env.PATH || '').concat( - path.delimiter, - dest, - ) + process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest) log(`[CORTEX] PATH: ${process.env.PATH}`) } else { process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( path.delimiter, - dest, + dest ) log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`) } @@ -105,4 +101,5 @@ export interface CortexProcessInfo { export default { run, dispose, + engineVariant, }