Merge pull request #4022 from janhq/feat/cortex-cpp-engine-variants
feat: support cortex.cpp engine variants
This commit is contained in:
commit
1eb600f881
1
.gitignore
vendored
1
.gitignore
vendored
@ -47,3 +47,4 @@ coverage
|
||||
.yarnrc
|
||||
test_results.html
|
||||
*.tsbuildinfo
|
||||
electron/shared/**
|
||||
|
||||
@ -1 +1 @@
|
||||
1.0.2
|
||||
1.0.3-rc1
|
||||
@ -2,23 +2,24 @@
|
||||
set BIN_PATH=./bin
|
||||
set SHARED_PATH=./../../electron/shared
|
||||
set /p CORTEX_VERSION=<./bin/version.txt
|
||||
set ENGINE_VERSION=0.1.39
|
||||
|
||||
@REM Download cortex.llamacpp binaries
|
||||
set VERSION=v0.1.35
|
||||
set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
|
||||
set VERSION=v0.1.39
|
||||
set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.39-windows-amd64
|
||||
set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
|
||||
set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
|
||||
|
||||
call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-12-0/v%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-11-7/v%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-12-0/v%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-11-7/v%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx/v%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx/v%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2/v%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx512/v%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-vulkan/v%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
|
||||
call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
|
||||
|
||||
@ -28,12 +29,12 @@ del %BIN_PATH%\cortex.exe
|
||||
|
||||
@REM Loop through each folder and move DLLs (excluding engine.dll)
|
||||
for %%F in (%SUBFOLDERS%) do (
|
||||
echo Processing folder: %BIN_PATH%\%%F
|
||||
echo Processing folder: %SHARED_PATH%\engines\cortex.llamacpp\%%F
|
||||
|
||||
@REM Move all .dll files except engine.dll
|
||||
for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
|
||||
for %%D in (%SHARED_PATH%\engines\cortex.llamacpp\%%F\*.dll) do (
|
||||
if /I not "%%~nxD"=="engine.dll" (
|
||||
move "%%D" "%BIN_PATH%"
|
||||
move "%%D" "%SHARED_PATH%"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@ -2,9 +2,11 @@
|
||||
|
||||
# Read CORTEX_VERSION
|
||||
CORTEX_VERSION=$(cat ./bin/version.txt)
|
||||
CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
|
||||
ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
|
||||
CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
|
||||
ENGINE_VERSION=0.1.39
|
||||
CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download"
|
||||
ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}"
|
||||
CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}"
|
||||
SHARED_PATH="../../electron/shared"
|
||||
# Detect platform
|
||||
OS_TYPE=$(uname)
|
||||
|
||||
@ -17,17 +19,17 @@ if [ "$OS_TYPE" == "Linux" ]; then
|
||||
chmod +x "./bin/cortex-server"
|
||||
|
||||
# Download engines for Linux
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
|
||||
download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
|
||||
download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx/v${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx/v${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2/v${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx512/v${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-12-0/v${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-11-7/v${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-12-0/v${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-11-7/v${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-vulkan/v${ENGINE_VERSION}" 1
|
||||
download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
|
||||
download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
|
||||
|
||||
elif [ "$OS_TYPE" == "Darwin" ]; then
|
||||
# macOS downloads
|
||||
@ -38,8 +40,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then
|
||||
chmod +x "./bin/cortex-server"
|
||||
|
||||
# Download engines for macOS
|
||||
download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
|
||||
download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
|
||||
download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-arm64/v0.1.39"
|
||||
download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-amd64/v0.1.39"
|
||||
|
||||
else
|
||||
echo "Unsupported operating system: $OS_TYPE"
|
||||
|
||||
@ -120,6 +120,7 @@ export default [
|
||||
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
||||
CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
|
||||
CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
|
||||
CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.39'),
|
||||
}),
|
||||
// Allow json resolution
|
||||
json(),
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
declare const NODE: string
|
||||
declare const CORTEX_API_URL: string
|
||||
declare const CORTEX_SOCKET_URL: string
|
||||
declare const CORTEX_ENGINE_VERSION: string
|
||||
declare const DEFAULT_SETTINGS: Array<any>
|
||||
declare const MODELS: Array<any>
|
||||
|
||||
|
||||
@ -18,6 +18,7 @@ import {
|
||||
fs,
|
||||
events,
|
||||
ModelEvent,
|
||||
SystemInformation,
|
||||
} from '@janhq/core'
|
||||
import PQueue from 'p-queue'
|
||||
import ky from 'ky'
|
||||
@ -67,13 +68,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
|
||||
super.onLoad()
|
||||
|
||||
this.queue.add(() => this.healthz())
|
||||
this.queue.add(() => this.setDefaultEngine(systemInfo))
|
||||
// Run the process watchdog
|
||||
const systemInfo = await systemInformation()
|
||||
await this.clean()
|
||||
await executeOnMain(NODE, 'run', systemInfo)
|
||||
|
||||
this.queue.add(() => this.healthz())
|
||||
|
||||
this.subscribeToEvents()
|
||||
|
||||
window.addEventListener('beforeunload', () => {
|
||||
@ -153,7 +153,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
* Do health check on cortex.cpp
|
||||
* @returns
|
||||
*/
|
||||
healthz(): Promise<void> {
|
||||
private healthz(): Promise<void> {
|
||||
return ky
|
||||
.get(`${CORTEX_API_URL}/healthz`, {
|
||||
retry: {
|
||||
@ -164,11 +164,24 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
.then(() => {})
|
||||
}
|
||||
|
||||
/**
|
||||
* Set default engine variant on launch
|
||||
*/
|
||||
private async setDefaultEngine(systemInfo: SystemInformation) {
|
||||
const variant = await executeOnMain(NODE, 'engineVariant', systemInfo.gpuSetting)
|
||||
return ky
|
||||
.post(
|
||||
`${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`,
|
||||
{ json: {} }
|
||||
)
|
||||
.then(() => {})
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean cortex processes
|
||||
* @returns
|
||||
*/
|
||||
clean(): Promise<any> {
|
||||
private clean(): Promise<any> {
|
||||
return ky
|
||||
.delete(`${CORTEX_API_URL}/processmanager/destroy`, {
|
||||
timeout: 2000, // maximum 2 seconds
|
||||
@ -181,7 +194,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
/**
|
||||
* Subscribe to cortex.cpp websocket events
|
||||
*/
|
||||
subscribeToEvents() {
|
||||
private subscribeToEvents() {
|
||||
this.queue.add(
|
||||
() =>
|
||||
new Promise<void>((resolve) => {
|
||||
@ -235,7 +248,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
}
|
||||
|
||||
/// Legacy
|
||||
export const getModelFilePath = async (
|
||||
const getModelFilePath = async (
|
||||
model: Model,
|
||||
file: string
|
||||
): Promise<string> => {
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { describe, expect, it } from '@jest/globals'
|
||||
import { executableCortexFile } from './execute'
|
||||
import { GpuSetting } from '@janhq/core'
|
||||
import { engineVariant, executableCortexFile } from './execute'
|
||||
import { GpuSetting } from '@janhq/core/node'
|
||||
import { cpuInfo } from 'cpu-instructions'
|
||||
|
||||
let testSettings: GpuSetting = {
|
||||
@ -30,6 +30,11 @@ jest.mock('cpu-instructions', () => ({
|
||||
let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
|
||||
mockCpuInfo.mockReturnValue([])
|
||||
|
||||
jest.mock('@janhq/core/node', () => ({
|
||||
appResourcePath: () => ".",
|
||||
log: jest.fn()
|
||||
}))
|
||||
|
||||
describe('test executable cortex file', () => {
|
||||
afterAll(function () {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
@ -46,8 +51,7 @@ describe('test executable cortex file', () => {
|
||||
})
|
||||
expect(executableCortexFile(testSettings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`arm64`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
enginePath: expect.stringContaining("shared"),
|
||||
executablePath:
|
||||
originalPlatform === 'darwin'
|
||||
? expect.stringContaining(`cortex-server`)
|
||||
@ -56,13 +60,13 @@ describe('test executable cortex file', () => {
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
)
|
||||
expect(engineVariant(testSettings)).toEqual('mac-arm64')
|
||||
Object.defineProperty(process, 'arch', {
|
||||
value: 'x64',
|
||||
})
|
||||
expect(executableCortexFile(testSettings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`x64`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
enginePath: expect.stringContaining("shared"),
|
||||
executablePath:
|
||||
originalPlatform === 'darwin'
|
||||
? expect.stringContaining(`cortex-server`)
|
||||
@ -71,6 +75,7 @@ describe('test executable cortex file', () => {
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
)
|
||||
expect(engineVariant(testSettings)).toEqual('mac-amd64')
|
||||
})
|
||||
|
||||
it('executes on Windows CPU', () => {
|
||||
@ -84,13 +89,13 @@ describe('test executable cortex file', () => {
|
||||
mockCpuInfo.mockReturnValue(['avx'])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`avx`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
enginePath: expect.stringContaining("shared"),
|
||||
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
)
|
||||
expect(engineVariant()).toEqual('windows-amd64-avx')
|
||||
})
|
||||
|
||||
it('executes on Windows Cuda 11', () => {
|
||||
@ -120,13 +125,13 @@ describe('test executable cortex file', () => {
|
||||
mockCpuInfo.mockReturnValue(['avx2'])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`avx2-cuda-11-7`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
enginePath: expect.stringContaining("shared"),
|
||||
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
)
|
||||
expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-11-7')
|
||||
})
|
||||
|
||||
it('executes on Windows Cuda 12', () => {
|
||||
@ -156,13 +161,15 @@ describe('test executable cortex file', () => {
|
||||
mockCpuInfo.mockReturnValue(['noavx'])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`noavx-cuda-12-0`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
enginePath: expect.stringContaining("shared"),
|
||||
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
)
|
||||
expect(engineVariant(settings)).toEqual('windows-amd64-noavx-cuda-12-0')
|
||||
mockCpuInfo.mockReturnValue(['avx512'])
|
||||
expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-12-0')
|
||||
})
|
||||
|
||||
it('executes on Linux CPU', () => {
|
||||
@ -176,12 +183,13 @@ describe('test executable cortex file', () => {
|
||||
mockCpuInfo.mockReturnValue(['noavx'])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`noavx`),
|
||||
enginePath: expect.stringContaining("shared"),
|
||||
executablePath: expect.stringContaining(`cortex-server`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
)
|
||||
expect(engineVariant()).toEqual('linux-amd64-noavx')
|
||||
})
|
||||
|
||||
it('executes on Linux Cuda 11', () => {
|
||||
@ -208,15 +216,16 @@ describe('test executable cortex file', () => {
|
||||
},
|
||||
],
|
||||
}
|
||||
mockCpuInfo.mockReturnValue(['avx512'])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`cuda-11-7`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
enginePath: expect.stringContaining("shared"),
|
||||
executablePath: expect.stringContaining(`cortex-server`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
)
|
||||
expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-11-7')
|
||||
})
|
||||
|
||||
it('executes on Linux Cuda 12', () => {
|
||||
@ -245,13 +254,13 @@ describe('test executable cortex file', () => {
|
||||
}
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`cuda-12-0`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
enginePath: expect.stringContaining("shared"),
|
||||
executablePath: expect.stringContaining(`cortex-server`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
)
|
||||
expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-12-0')
|
||||
})
|
||||
|
||||
// Generate test for different cpu instructions on Linux
|
||||
@ -270,14 +279,14 @@ describe('test executable cortex file', () => {
|
||||
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(instruction),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
enginePath: expect.stringContaining('shared'),
|
||||
executablePath: expect.stringContaining(`cortex-server`),
|
||||
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
)
|
||||
expect(engineVariant(settings)).toEqual(`linux-amd64-${instruction}`)
|
||||
})
|
||||
})
|
||||
// Generate test for different cpu instructions on Windows
|
||||
@ -294,13 +303,13 @@ describe('test executable cortex file', () => {
|
||||
mockCpuInfo.mockReturnValue([instruction])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(instruction),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
enginePath: expect.stringContaining('shared'),
|
||||
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
)
|
||||
expect(engineVariant(settings)).toEqual(`windows-amd64-${instruction}`)
|
||||
})
|
||||
})
|
||||
|
||||
@ -334,13 +343,15 @@ describe('test executable cortex file', () => {
|
||||
mockCpuInfo.mockReturnValue([instruction])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`cuda-12-0`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
enginePath: expect.stringContaining("shared"),
|
||||
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
)
|
||||
expect(engineVariant(settings)).toEqual(
|
||||
`windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
@ -374,13 +385,15 @@ describe('test executable cortex file', () => {
|
||||
mockCpuInfo.mockReturnValue([instruction])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`cuda-12-0`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
enginePath: expect.stringContaining("shared"),
|
||||
executablePath: expect.stringContaining(`cortex-server`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
)
|
||||
expect(engineVariant(settings)).toEqual(
|
||||
`linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
@ -415,13 +428,13 @@ describe('test executable cortex file', () => {
|
||||
mockCpuInfo.mockReturnValue([instruction])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`vulkan`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
enginePath: expect.stringContaining("shared"),
|
||||
executablePath: expect.stringContaining(`cortex-server`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
)
|
||||
expect(engineVariant(settings)).toEqual(`linux-amd64-vulkan`)
|
||||
})
|
||||
})
|
||||
|
||||
@ -442,8 +455,7 @@ describe('test executable cortex file', () => {
|
||||
mockCpuInfo.mockReturnValue([])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`x64`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
enginePath: expect.stringContaining("shared"),
|
||||
executablePath:
|
||||
originalPlatform === 'darwin'
|
||||
? expect.stringContaining(`cortex-server`)
|
||||
|
||||
@ -1,10 +1,9 @@
|
||||
import { GpuSetting } from '@janhq/core'
|
||||
import * as path from 'path'
|
||||
import { cpuInfo } from 'cpu-instructions'
|
||||
import { GpuSetting, appResourcePath, log } from '@janhq/core/node'
|
||||
|
||||
export interface CortexExecutableOptions {
|
||||
enginePath: string
|
||||
binPath: string
|
||||
executablePath: string
|
||||
cudaVisibleDevices: string
|
||||
vkVisibleDevices: string
|
||||
@ -21,11 +20,7 @@ const gpuRunMode = (settings?: GpuSetting): string => {
|
||||
|
||||
if (!settings) return ''
|
||||
|
||||
return settings.vulkan === true
|
||||
? 'vulkan'
|
||||
: settings.run_mode === 'cpu'
|
||||
? ''
|
||||
: 'cuda'
|
||||
return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
|
||||
}
|
||||
|
||||
/**
|
||||
@ -34,12 +29,12 @@ const gpuRunMode = (settings?: GpuSetting): string => {
|
||||
*/
|
||||
const os = (): string => {
|
||||
return process.platform === 'win32'
|
||||
? 'win'
|
||||
? 'windows-amd64'
|
||||
: process.platform === 'darwin'
|
||||
? process.arch === 'arm64'
|
||||
? 'arm64'
|
||||
: 'x64'
|
||||
: 'linux'
|
||||
? 'mac-arm64'
|
||||
: 'mac-amd64'
|
||||
: 'linux-amd64'
|
||||
}
|
||||
|
||||
/**
|
||||
@ -57,7 +52,7 @@ const extension = (): '.exe' | '' => {
|
||||
*/
|
||||
const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
|
||||
const isUsingCuda =
|
||||
settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac'
|
||||
settings?.vulkan !== true && settings?.run_mode === 'gpu' && !os().includes('mac')
|
||||
|
||||
if (!isUsingCuda) return undefined
|
||||
return settings?.cuda?.version === '11' ? '11-7' : '12-0'
|
||||
@ -79,36 +74,45 @@ const cpuInstructions = (): string => {
|
||||
}
|
||||
|
||||
/**
|
||||
* Find which executable file to run based on the current platform.
|
||||
* @returns The name of the executable file to run.
|
||||
* The executable options for the cortex.cpp extension.
|
||||
*/
|
||||
export const executableCortexFile = (
|
||||
gpuSetting?: GpuSetting
|
||||
): CortexExecutableOptions => {
|
||||
const cpuInstruction = cpuInstructions()
|
||||
let engineFolder = gpuSetting?.vulkan
|
||||
? 'vulkan'
|
||||
: process.platform === 'darwin'
|
||||
? os()
|
||||
: [
|
||||
gpuRunMode(gpuSetting) !== 'cuda' ||
|
||||
cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
|
||||
? cpuInstruction
|
||||
: 'noavx',
|
||||
gpuRunMode(gpuSetting),
|
||||
cudaVersion(gpuSetting),
|
||||
]
|
||||
.filter((e) => !!e)
|
||||
.join('-')
|
||||
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||
let binaryName = `cortex-server${extension()}`
|
||||
const binPath = path.join(__dirname, '..', 'bin')
|
||||
return {
|
||||
enginePath: path.join(binPath, engineFolder),
|
||||
enginePath: path.join(appResourcePath(), 'shared'),
|
||||
executablePath: path.join(binPath, binaryName),
|
||||
binPath: binPath,
|
||||
cudaVisibleDevices,
|
||||
vkVisibleDevices,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find which variant to run based on the current platform.
|
||||
*/
|
||||
export const engineVariant = (gpuSetting?: GpuSetting): string => {
|
||||
const cpuInstruction = cpuInstructions()
|
||||
let engineVariant = [
|
||||
os(),
|
||||
gpuSetting?.vulkan
|
||||
? 'vulkan'
|
||||
: gpuRunMode(gpuSetting) !== 'cuda'
|
||||
? // CPU mode - support all variants
|
||||
cpuInstruction
|
||||
: // GPU mode - packaged CUDA variants of avx2 and noavx
|
||||
cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
|
||||
? 'avx2'
|
||||
: 'noavx',
|
||||
gpuRunMode(gpuSetting),
|
||||
cudaVersion(gpuSetting),
|
||||
]
|
||||
.filter((e) => !!e)
|
||||
.join('-')
|
||||
|
||||
log(`[CORTEX]: Engine variant: ${engineVariant}`)
|
||||
return engineVariant
|
||||
}
|
||||
|
||||
@ -1,8 +1,7 @@
|
||||
import path from 'path'
|
||||
import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
|
||||
import { executableCortexFile } from './execute'
|
||||
import { engineVariant, executableCortexFile } from './execute'
|
||||
import { ProcessWatchdog } from './watchdog'
|
||||
import { appResourcePath } from '@janhq/core/node'
|
||||
|
||||
// The HOST address to use for the Nitro subprocess
|
||||
const LOCAL_PORT = '39291'
|
||||
@ -20,9 +19,9 @@ function run(systemInfo?: SystemInformation): Promise<any> {
|
||||
// If ngl is not set or equal to 0, run on CPU with correct instructions
|
||||
systemInfo?.gpuSetting
|
||||
? {
|
||||
...systemInfo.gpuSetting,
|
||||
run_mode: systemInfo.gpuSetting.run_mode,
|
||||
}
|
||||
...systemInfo.gpuSetting,
|
||||
run_mode: systemInfo.gpuSetting.run_mode,
|
||||
}
|
||||
: undefined
|
||||
)
|
||||
|
||||
@ -30,16 +29,13 @@ function run(systemInfo?: SystemInformation): Promise<any> {
|
||||
log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
|
||||
log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
|
||||
|
||||
addEnvPaths(path.join(appResourcePath(), 'shared'))
|
||||
addEnvPaths(executableOptions.binPath)
|
||||
addEnvPaths(executableOptions.enginePath)
|
||||
// Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH
|
||||
// This is required for the cortex engine to run for now since dlls are not moved to the root
|
||||
addEnvPaths(
|
||||
path.join(executableOptions.enginePath, 'engines', 'cortex.llamacpp')
|
||||
)
|
||||
|
||||
const dataFolderPath = getJanDataFolderPath()
|
||||
if (watchdog) {
|
||||
watchdog.terminate()
|
||||
}
|
||||
|
||||
watchdog = new ProcessWatchdog(
|
||||
executableOptions.executablePath,
|
||||
[
|
||||
@ -81,17 +77,12 @@ function dispose() {
|
||||
function addEnvPaths(dest: string) {
|
||||
// Add engine path to the PATH and LD_LIBRARY_PATH
|
||||
if (process.platform === 'win32') {
|
||||
process.env.PATH = (process.env.PATH || '').concat(
|
||||
path.delimiter,
|
||||
dest,
|
||||
)
|
||||
log(`[CORTEX] PATH: ${process.env.PATH}`)
|
||||
process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
|
||||
} else {
|
||||
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
|
||||
path.delimiter,
|
||||
dest,
|
||||
dest
|
||||
)
|
||||
log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
|
||||
}
|
||||
}
|
||||
|
||||
@ -105,4 +96,5 @@ export interface CortexProcessInfo {
|
||||
export default {
|
||||
run,
|
||||
dispose,
|
||||
engineVariant,
|
||||
}
|
||||
|
||||
@ -9,7 +9,6 @@ import {
|
||||
OptionType,
|
||||
events,
|
||||
fs,
|
||||
baseName,
|
||||
} from '@janhq/core'
|
||||
|
||||
import { atom, useAtomValue, useSetAtom } from 'jotai'
|
||||
|
||||
@ -9,8 +9,6 @@ import { MainViewState } from '@/constants/screens'
|
||||
|
||||
import { loadModelErrorAtom } from '@/hooks/useActiveModel'
|
||||
|
||||
import { useSettings } from '@/hooks/useSettings'
|
||||
|
||||
import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
|
||||
import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
|
||||
import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
|
||||
@ -21,7 +19,6 @@ const LoadModelError = () => {
|
||||
const setMainState = useSetAtom(mainViewStateAtom)
|
||||
const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
|
||||
const activeThread = useAtomValue(activeThreadAtom)
|
||||
const { settings } = useSettings()
|
||||
|
||||
const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE'
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user