feat: integrates cortex.cpp engine variants

This commit is contained in:
Louis 2024-11-14 21:09:44 +07:00
parent 3b48cb5816
commit a15d92dbdc
No known key found for this signature in database
GPG Key ID: 44FA9F4D33C37DE2
7 changed files with 96 additions and 73 deletions

View File

@ -38,8 +38,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then
chmod +x "./bin/cortex-server" chmod +x "./bin/cortex-server"
# Download engines for macOS # Download engines for macOS
download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.35"
download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.35"
else else
echo "Unsupported operating system: $OS_TYPE" echo "Unsupported operating system: $OS_TYPE"

View File

@ -120,6 +120,7 @@ export default [
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'), CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.35'),
}), }),
// Allow json resolution // Allow json resolution
json(), json(),

View File

@ -1,6 +1,7 @@
declare const NODE: string declare const NODE: string
declare const CORTEX_API_URL: string declare const CORTEX_API_URL: string
declare const CORTEX_SOCKET_URL: string declare const CORTEX_SOCKET_URL: string
declare const CORTEX_ENGINE_VERSION: string
declare const DEFAULT_SETTINGS: Array<any> declare const DEFAULT_SETTINGS: Array<any>
declare const MODELS: Array<any> declare const MODELS: Array<any>

View File

@ -18,6 +18,7 @@ import {
fs, fs,
events, events,
ModelEvent, ModelEvent,
SystemInformation,
} from '@janhq/core' } from '@janhq/core'
import PQueue from 'p-queue' import PQueue from 'p-queue'
import ky from 'ky' import ky from 'ky'
@ -74,6 +75,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
this.queue.add(() => this.healthz()) this.queue.add(() => this.healthz())
this.queue.add(() => this.setDefaultEngine(systemInfo))
this.subscribeToEvents() this.subscribeToEvents()
window.addEventListener('beforeunload', () => { window.addEventListener('beforeunload', () => {
@ -153,7 +155,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
* Do health check on cortex.cpp * Do health check on cortex.cpp
* @returns * @returns
*/ */
healthz(): Promise<void> { private healthz(): Promise<void> {
return ky return ky
.get(`${CORTEX_API_URL}/healthz`, { .get(`${CORTEX_API_URL}/healthz`, {
retry: { retry: {
@ -164,11 +166,24 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
.then(() => {}) .then(() => {})
} }
/**
* Set default engine variant on launch
*/
private async setDefaultEngine(systemInfo: SystemInformation) {
const variant = await executeOnMain(NODE, 'engineVariant', systemInfo)
return ky
.post(
`${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`,
{ json: {} }
)
.then(() => {})
}
/** /**
* Clean cortex processes * Clean cortex processes
* @returns * @returns
*/ */
clean(): Promise<any> { private clean(): Promise<any> {
return ky return ky
.delete(`${CORTEX_API_URL}/processmanager/destroy`, { .delete(`${CORTEX_API_URL}/processmanager/destroy`, {
timeout: 2000, // maximum 2 seconds timeout: 2000, // maximum 2 seconds
@ -181,7 +196,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
/** /**
* Subscribe to cortex.cpp websocket events * Subscribe to cortex.cpp websocket events
*/ */
subscribeToEvents() { private subscribeToEvents() {
this.queue.add( this.queue.add(
() => () =>
new Promise<void>((resolve) => { new Promise<void>((resolve) => {
@ -235,7 +250,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
} }
/// Legacy /// Legacy
export const getModelFilePath = async ( const getModelFilePath = async (
model: Model, model: Model,
file: string file: string
): Promise<string> => { ): Promise<string> => {

View File

@ -1,5 +1,5 @@
import { describe, expect, it } from '@jest/globals' import { describe, expect, it } from '@jest/globals'
import { executableCortexFile } from './execute' import { engineVariant, executableCortexFile } from './execute'
import { GpuSetting } from '@janhq/core' import { GpuSetting } from '@janhq/core'
import { cpuInfo } from 'cpu-instructions' import { cpuInfo } from 'cpu-instructions'
@ -46,8 +46,7 @@ describe('test executable cortex file', () => {
}) })
expect(executableCortexFile(testSettings)).toEqual( expect(executableCortexFile(testSettings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(`arm64`), enginePath: expect.stringContaining(`bin`),
binPath: expect.stringContaining(`bin`),
executablePath: executablePath:
originalPlatform === 'darwin' originalPlatform === 'darwin'
? expect.stringContaining(`cortex-server`) ? expect.stringContaining(`cortex-server`)
@ -56,13 +55,13 @@ describe('test executable cortex file', () => {
vkVisibleDevices: '', vkVisibleDevices: '',
}) })
) )
expect(engineVariant(testSettings)).toEqual('mac-arm64')
Object.defineProperty(process, 'arch', { Object.defineProperty(process, 'arch', {
value: 'x64', value: 'x64',
}) })
expect(executableCortexFile(testSettings)).toEqual( expect(executableCortexFile(testSettings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(`x64`), enginePath: expect.stringContaining(`bin`),
binPath: expect.stringContaining(`bin`),
executablePath: executablePath:
originalPlatform === 'darwin' originalPlatform === 'darwin'
? expect.stringContaining(`cortex-server`) ? expect.stringContaining(`cortex-server`)
@ -71,6 +70,7 @@ describe('test executable cortex file', () => {
vkVisibleDevices: '', vkVisibleDevices: '',
}) })
) )
expect(engineVariant(testSettings)).toEqual('mac-amd64')
}) })
it('executes on Windows CPU', () => { it('executes on Windows CPU', () => {
@ -84,13 +84,13 @@ describe('test executable cortex file', () => {
mockCpuInfo.mockReturnValue(['avx']) mockCpuInfo.mockReturnValue(['avx'])
expect(executableCortexFile(settings)).toEqual( expect(executableCortexFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(`avx`), enginePath: expect.stringContaining(`bin`),
binPath: expect.stringContaining(`bin`),
executablePath: expect.stringContaining(`cortex-server.exe`), executablePath: expect.stringContaining(`cortex-server.exe`),
cudaVisibleDevices: '', cudaVisibleDevices: '',
vkVisibleDevices: '', vkVisibleDevices: '',
}) })
) )
expect(engineVariant()).toEqual('windows-amd64-avx')
}) })
it('executes on Windows Cuda 11', () => { it('executes on Windows Cuda 11', () => {
@ -120,13 +120,13 @@ describe('test executable cortex file', () => {
mockCpuInfo.mockReturnValue(['avx2']) mockCpuInfo.mockReturnValue(['avx2'])
expect(executableCortexFile(settings)).toEqual( expect(executableCortexFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(`avx2-cuda-11-7`), enginePath: expect.stringContaining(`bin`),
binPath: expect.stringContaining(`bin`),
executablePath: expect.stringContaining(`cortex-server.exe`), executablePath: expect.stringContaining(`cortex-server.exe`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })
) )
expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-11-7')
}) })
it('executes on Windows Cuda 12', () => { it('executes on Windows Cuda 12', () => {
@ -156,13 +156,15 @@ describe('test executable cortex file', () => {
mockCpuInfo.mockReturnValue(['noavx']) mockCpuInfo.mockReturnValue(['noavx'])
expect(executableCortexFile(settings)).toEqual( expect(executableCortexFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(`noavx-cuda-12-0`), enginePath: expect.stringContaining(`bin`),
binPath: expect.stringContaining(`bin`),
executablePath: expect.stringContaining(`cortex-server.exe`), executablePath: expect.stringContaining(`cortex-server.exe`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })
) )
expect(engineVariant(settings)).toEqual('windows-amd64-noavx-cuda-12-0')
mockCpuInfo.mockReturnValue(['avx512'])
expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-12-0')
}) })
it('executes on Linux CPU', () => { it('executes on Linux CPU', () => {
@ -176,12 +178,13 @@ describe('test executable cortex file', () => {
mockCpuInfo.mockReturnValue(['noavx']) mockCpuInfo.mockReturnValue(['noavx'])
expect(executableCortexFile(settings)).toEqual( expect(executableCortexFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(`noavx`), enginePath: expect.stringContaining(`bin`),
executablePath: expect.stringContaining(`cortex-server`), executablePath: expect.stringContaining(`cortex-server`),
cudaVisibleDevices: '', cudaVisibleDevices: '',
vkVisibleDevices: '', vkVisibleDevices: '',
}) })
) )
expect(engineVariant()).toEqual('linux-amd64-noavx')
}) })
it('executes on Linux Cuda 11', () => { it('executes on Linux Cuda 11', () => {
@ -208,15 +211,16 @@ describe('test executable cortex file', () => {
}, },
], ],
} }
mockCpuInfo.mockReturnValue(['avx512'])
expect(executableCortexFile(settings)).toEqual( expect(executableCortexFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(`cuda-11-7`), enginePath: expect.stringContaining(`bin`),
binPath: expect.stringContaining(`bin`),
executablePath: expect.stringContaining(`cortex-server`), executablePath: expect.stringContaining(`cortex-server`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })
) )
expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-11-7')
}) })
it('executes on Linux Cuda 12', () => { it('executes on Linux Cuda 12', () => {
@ -245,13 +249,13 @@ describe('test executable cortex file', () => {
} }
expect(executableCortexFile(settings)).toEqual( expect(executableCortexFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(`cuda-12-0`), enginePath: expect.stringContaining(`bin`),
binPath: expect.stringContaining(`bin`),
executablePath: expect.stringContaining(`cortex-server`), executablePath: expect.stringContaining(`cortex-server`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })
) )
expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-12-0')
}) })
// Generate test for different cpu instructions on Linux // Generate test for different cpu instructions on Linux
@ -270,14 +274,14 @@ describe('test executable cortex file', () => {
expect(executableCortexFile(settings)).toEqual( expect(executableCortexFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(instruction), enginePath: expect.stringContaining('bin'),
binPath: expect.stringContaining(`bin`),
executablePath: expect.stringContaining(`cortex-server`), executablePath: expect.stringContaining(`cortex-server`),
cudaVisibleDevices: '', cudaVisibleDevices: '',
vkVisibleDevices: '', vkVisibleDevices: '',
}) })
) )
expect(engineVariant(settings)).toEqual(`linux-amd64-${instruction}`)
}) })
}) })
// Generate test for different cpu instructions on Windows // Generate test for different cpu instructions on Windows
@ -294,13 +298,13 @@ describe('test executable cortex file', () => {
mockCpuInfo.mockReturnValue([instruction]) mockCpuInfo.mockReturnValue([instruction])
expect(executableCortexFile(settings)).toEqual( expect(executableCortexFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(instruction), enginePath: expect.stringContaining('bin'),
binPath: expect.stringContaining(`bin`),
executablePath: expect.stringContaining(`cortex-server.exe`), executablePath: expect.stringContaining(`cortex-server.exe`),
cudaVisibleDevices: '', cudaVisibleDevices: '',
vkVisibleDevices: '', vkVisibleDevices: '',
}) })
) )
expect(engineVariant(settings)).toEqual(`windows-amd64-${instruction}`)
}) })
}) })
@ -334,13 +338,15 @@ describe('test executable cortex file', () => {
mockCpuInfo.mockReturnValue([instruction]) mockCpuInfo.mockReturnValue([instruction])
expect(executableCortexFile(settings)).toEqual( expect(executableCortexFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(`cuda-12-0`), enginePath: expect.stringContaining(`bin`),
binPath: expect.stringContaining(`bin`),
executablePath: expect.stringContaining(`cortex-server.exe`), executablePath: expect.stringContaining(`cortex-server.exe`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })
) )
expect(engineVariant(settings)).toEqual(
`windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
)
}) })
}) })
@ -374,13 +380,15 @@ describe('test executable cortex file', () => {
mockCpuInfo.mockReturnValue([instruction]) mockCpuInfo.mockReturnValue([instruction])
expect(executableCortexFile(settings)).toEqual( expect(executableCortexFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(`cuda-12-0`), enginePath: expect.stringContaining(`bin`),
binPath: expect.stringContaining(`bin`),
executablePath: expect.stringContaining(`cortex-server`), executablePath: expect.stringContaining(`cortex-server`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })
) )
expect(engineVariant(settings)).toEqual(
`linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
)
}) })
}) })
@ -415,13 +423,13 @@ describe('test executable cortex file', () => {
mockCpuInfo.mockReturnValue([instruction]) mockCpuInfo.mockReturnValue([instruction])
expect(executableCortexFile(settings)).toEqual( expect(executableCortexFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(`vulkan`), enginePath: expect.stringContaining(`bin`),
binPath: expect.stringContaining(`bin`),
executablePath: expect.stringContaining(`cortex-server`), executablePath: expect.stringContaining(`cortex-server`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })
) )
expect(engineVariant(settings)).toEqual(`linux-amd64-vulkan`)
}) })
}) })
@ -442,8 +450,7 @@ describe('test executable cortex file', () => {
mockCpuInfo.mockReturnValue([]) mockCpuInfo.mockReturnValue([])
expect(executableCortexFile(settings)).toEqual( expect(executableCortexFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
enginePath: expect.stringContaining(`x64`), enginePath: expect.stringContaining(`bin`),
binPath: expect.stringContaining(`bin`),
executablePath: executablePath:
originalPlatform === 'darwin' originalPlatform === 'darwin'
? expect.stringContaining(`cortex-server`) ? expect.stringContaining(`cortex-server`)

View File

@ -4,7 +4,6 @@ import { cpuInfo } from 'cpu-instructions'
export interface CortexExecutableOptions { export interface CortexExecutableOptions {
enginePath: string enginePath: string
binPath: string
executablePath: string executablePath: string
cudaVisibleDevices: string cudaVisibleDevices: string
vkVisibleDevices: string vkVisibleDevices: string
@ -21,11 +20,7 @@ const gpuRunMode = (settings?: GpuSetting): string => {
if (!settings) return '' if (!settings) return ''
return settings.vulkan === true return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
? 'vulkan'
: settings.run_mode === 'cpu'
? ''
: 'cuda'
} }
/** /**
@ -34,12 +29,12 @@ const gpuRunMode = (settings?: GpuSetting): string => {
*/ */
const os = (): string => { const os = (): string => {
return process.platform === 'win32' return process.platform === 'win32'
? 'win' ? 'windows-amd64'
: process.platform === 'darwin' : process.platform === 'darwin'
? process.arch === 'arm64' ? process.arch === 'arm64'
? 'arm64' ? 'mac-arm64'
: 'x64' : 'mac-amd64'
: 'linux' : 'linux-amd64'
} }
/** /**
@ -79,36 +74,43 @@ const cpuInstructions = (): string => {
} }
/** /**
* Find which executable file to run based on the current platform. * The executable options for the cortex.cpp extension.
* @returns The name of the executable file to run.
*/ */
export const executableCortexFile = ( export const executableCortexFile = (
gpuSetting?: GpuSetting gpuSetting?: GpuSetting
): CortexExecutableOptions => { ): CortexExecutableOptions => {
const cpuInstruction = cpuInstructions()
let engineFolder = gpuSetting?.vulkan
? 'vulkan'
: process.platform === 'darwin'
? os()
: [
gpuRunMode(gpuSetting) !== 'cuda' ||
cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
? cpuInstruction
: 'noavx',
gpuRunMode(gpuSetting),
cudaVersion(gpuSetting),
]
.filter((e) => !!e)
.join('-')
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
let binaryName = `cortex-server${extension()}` let binaryName = `cortex-server${extension()}`
const binPath = path.join(__dirname, '..', 'bin') const binPath = path.join(__dirname, '..', 'bin')
return { return {
enginePath: path.join(binPath, engineFolder), enginePath: binPath,
executablePath: path.join(binPath, binaryName), executablePath: path.join(binPath, binaryName),
binPath: binPath,
cudaVisibleDevices, cudaVisibleDevices,
vkVisibleDevices, vkVisibleDevices,
} }
} }
/**
* Find which variant to run based on the current platform.
*/
export const engineVariant = (gpuSetting?: GpuSetting): string => {
const cpuInstruction = cpuInstructions()
let engineVariant = [
os(),
gpuSetting?.vulkan
? 'vulkan'
: gpuRunMode(gpuSetting) !== 'cuda'
? // CPU mode - support all variants
cpuInstruction
: // GPU mode - packaged CUDA variants of avx2 and noavx
cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
? 'avx2'
: 'noavx',
gpuRunMode(gpuSetting),
cudaVersion(gpuSetting),
]
.filter((e) => !!e)
.join('-')
return engineVariant
}

View File

@ -1,6 +1,6 @@
import path from 'path' import path from 'path'
import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node' import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
import { executableCortexFile } from './execute' import { engineVariant, executableCortexFile } from './execute'
import { ProcessWatchdog } from './watchdog' import { ProcessWatchdog } from './watchdog'
import { appResourcePath } from '@janhq/core/node' import { appResourcePath } from '@janhq/core/node'
@ -20,9 +20,9 @@ function run(systemInfo?: SystemInformation): Promise<any> {
// If ngl is not set or equal to 0, run on CPU with correct instructions // If ngl is not set or equal to 0, run on CPU with correct instructions
systemInfo?.gpuSetting systemInfo?.gpuSetting
? { ? {
...systemInfo.gpuSetting, ...systemInfo.gpuSetting,
run_mode: systemInfo.gpuSetting.run_mode, run_mode: systemInfo.gpuSetting.run_mode,
} }
: undefined : undefined
) )
@ -31,7 +31,6 @@ function run(systemInfo?: SystemInformation): Promise<any> {
log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`) log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
addEnvPaths(path.join(appResourcePath(), 'shared')) addEnvPaths(path.join(appResourcePath(), 'shared'))
addEnvPaths(executableOptions.binPath)
addEnvPaths(executableOptions.enginePath) addEnvPaths(executableOptions.enginePath)
// Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH
// This is required for the cortex engine to run for now since dlls are not moved to the root // This is required for the cortex engine to run for now since dlls are not moved to the root
@ -81,15 +80,12 @@ function dispose() {
function addEnvPaths(dest: string) { function addEnvPaths(dest: string) {
// Add engine path to the PATH and LD_LIBRARY_PATH // Add engine path to the PATH and LD_LIBRARY_PATH
if (process.platform === 'win32') { if (process.platform === 'win32') {
process.env.PATH = (process.env.PATH || '').concat( process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
path.delimiter,
dest,
)
log(`[CORTEX] PATH: ${process.env.PATH}`) log(`[CORTEX] PATH: ${process.env.PATH}`)
} else { } else {
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
path.delimiter, path.delimiter,
dest, dest
) )
log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`) log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
} }
@ -105,4 +101,5 @@ export interface CortexProcessInfo {
export default { export default {
run, run,
dispose, dispose,
engineVariant,
} }