fix: #3549, #3552 - Inference on CPU is slower on Jan 0.5.3 (#3602)

This commit is contained in:
Louis 2024-09-11 14:03:53 +07:00 committed by GitHub
parent 2d05134cb1
commit 5217437912
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 921 additions and 374 deletions

View File

@ -51,13 +51,13 @@ jobs:
latest_prerelease_asset_count=$(get_asset_count "$latest_prerelease_name")
if [ "$current_version_name" = "$latest_prerelease_name" ]; then
echo "cortex cpp remote repo doesn't have update today, skip update cortex-cpp for today nightly build"
echo "cortex cpp remote repo doesn't have update today, skip update cortex.cpp for today nightly build"
echo "::set-output name=pr_created::false"
exit 0
fi
if [ "$current_version_asset_count" != "$latest_prerelease_asset_count" ]; then
echo "Latest prerelease version has different number of assets, somethink went wrong, skip update cortex-cpp for today nightly build"
echo "Latest prerelease version has different number of assets, somethink went wrong, skip update cortex.cpp for today nightly build"
echo "::set-output name=pr_created::false"
exit 1
fi

View File

@ -1,31 +1,13 @@
import fs from 'fs'
import { join } from 'path'
import {
getJanDataFolderPath,
getJanExtensionsPath,
getSystemResourceInfo,
log,
} from '../../../helper'
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
import { Model, ModelSettingParams, PromptTemplate } from '../../../../types'
import {
LOCAL_HOST,
NITRO_DEFAULT_PORT,
NITRO_HTTP_KILL_URL,
NITRO_HTTP_LOAD_MODEL_URL,
NITRO_HTTP_VALIDATE_MODEL_URL,
SUPPORTED_MODEL_FORMAT,
} from './consts'
// The subprocess instance for Nitro
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
// TODO: move this to core type
interface NitroModelSettings extends ModelSettingParams {
llama_model_path: string
cpu_threads: number
}
import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper'
import { ModelSettingParams } from '../../../../types'
/**
* Start a model
* @param modelId
* @param settingParams
* @returns
*/
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
try {
await runModel(modelId, settingParams)
@ -40,316 +22,57 @@ export const startModel = async (modelId: string, settingParams?: ModelSettingPa
}
}
const runModel = async (modelId: string, settingParams?: ModelSettingParams): Promise<void> => {
const janDataFolderPath = getJanDataFolderPath()
const modelFolderFullPath = join(janDataFolderPath, 'models', modelId)
if (!fs.existsSync(modelFolderFullPath)) {
throw new Error(`Model not found: ${modelId}`)
}
const files: string[] = fs.readdirSync(modelFolderFullPath)
// Look for GGUF model file
const ggufBinFile = files.find((file) => file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT))
const modelMetadataPath = join(modelFolderFullPath, 'model.json')
const modelMetadata: Model = JSON.parse(fs.readFileSync(modelMetadataPath, 'utf-8'))
if (!ggufBinFile) {
throw new Error('No GGUF model file found')
}
const modelBinaryPath = join(modelFolderFullPath, ggufBinFile)
const nitroResourceProbe = await getSystemResourceInfo()
const nitroModelSettings: NitroModelSettings = {
// This is critical and requires real CPU physical core count (or performance core)
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
...modelMetadata.settings,
...settingParams,
llama_model_path: modelBinaryPath,
...(modelMetadata.settings.mmproj && {
mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj),
}),
}
log(`[SERVER]::Debug: Nitro model settings: ${JSON.stringify(nitroModelSettings)}`)
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
if (modelMetadata.settings.prompt_template) {
const promptTemplate = modelMetadata.settings.prompt_template
const prompt = promptTemplateConverter(promptTemplate)
if (prompt?.error) {
throw new Error(prompt.error)
}
nitroModelSettings.system_prompt = prompt.system_prompt
nitroModelSettings.user_prompt = prompt.user_prompt
nitroModelSettings.ai_prompt = prompt.ai_prompt
}
await runNitroAndLoadModel(modelId, nitroModelSettings)
}
// TODO: move to util
const promptTemplateConverter = (promptTemplate: string): PromptTemplate => {
// Split the string using the markers
const systemMarker = '{system_message}'
const promptMarker = '{prompt}'
if (promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker)) {
// Find the indices of the markers
const systemIndex = promptTemplate.indexOf(systemMarker)
const promptIndex = promptTemplate.indexOf(promptMarker)
// Extract the parts of the string
const system_prompt = promptTemplate.substring(0, systemIndex)
const user_prompt = promptTemplate.substring(systemIndex + systemMarker.length, promptIndex)
const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
// Return the split parts
return { system_prompt, user_prompt, ai_prompt }
} else if (promptTemplate.includes(promptMarker)) {
// Extract the parts of the string for the case where only promptMarker is present
const promptIndex = promptTemplate.indexOf(promptMarker)
const user_prompt = promptTemplate.substring(0, promptIndex)
const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
// Return the split parts
return { user_prompt, ai_prompt }
}
// Return an error if none of the conditions are met
return { error: 'Cannot split prompt template' }
}
const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSettings) => {
// Gather system information for CPU physical cores and memory
const tcpPortUsed = require('tcp-port-used')
await stopModel(modelId)
await tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000)
/**
* There is a problem with Windows process manager
* Should wait for awhile to make sure the port is free and subprocess is killed
* The tested threshold is 500ms
**/
if (process.platform === 'win32') {
await new Promise((resolve) => setTimeout(resolve, 500))
}
await spawnNitroProcess()
await loadLLMModel(modelSettings)
await validateModelStatus()
}
const spawnNitroProcess = async (): Promise<void> => {
log(`[SERVER]::Debug: Spawning cortex subprocess...`)
let binaryFolder = join(
getJanExtensionsPath(),
'@janhq',
'inference-cortex-extension',
'dist',
'bin'
)
let executableOptions = executableNitroFile()
const tcpPortUsed = require('tcp-port-used')
const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
// Execute the binary
log(
`[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
)
subprocess = spawn(
executableOptions.executablePath,
['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()],
{
cwd: binaryFolder,
env: {
...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
},
}
)
// Handle subprocess output
subprocess.stdout.on('data', (data: any) => {
log(`[SERVER]::Debug: ${data}`)
})
subprocess.stderr.on('data', (data: any) => {
log(`[SERVER]::Error: ${data}`)
})
subprocess.on('close', (code: any) => {
log(`[SERVER]::Debug: cortex exited with code: ${code}`)
subprocess = undefined
})
tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
log(`[SERVER]::Debug: cortex is ready`)
})
}
type NitroExecutableOptions = {
executablePath: string
cudaVisibleDevices: string
}
const executableNitroFile = (): NitroExecutableOptions => {
const nvidiaInfoFilePath = join(getJanDataFolderPath(), 'settings', 'settings.json')
let binaryFolder = join(
getJanExtensionsPath(),
'@janhq',
'inference-cortex-extension',
'dist',
'bin'
)
let cudaVisibleDevices = ''
let binaryName = 'cortex-cpp'
/**
* The binary folder is different for each platform.
*/
if (process.platform === 'win32') {
/**
* For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
*/
let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
if (nvidiaInfo['run_mode'] === 'cpu') {
binaryFolder = join(binaryFolder, 'win-cpu')
} else {
if (nvidiaInfo['cuda'].version === '12') {
binaryFolder = join(binaryFolder, 'win-cuda-12-0')
} else {
binaryFolder = join(binaryFolder, 'win-cuda-11-7')
}
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
}
binaryName = 'cortex-cpp.exe'
} else if (process.platform === 'darwin') {
/**
* For MacOS: mac-universal both Silicon and InteL
*/
if(process.arch === 'arm64') {
binaryFolder = join(binaryFolder, 'mac-arm64')
} else {
binaryFolder = join(binaryFolder, 'mac-amd64')
}
} else {
/**
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
*/
let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
if (nvidiaInfo['run_mode'] === 'cpu') {
binaryFolder = join(binaryFolder, 'linux-cpu')
} else {
if (nvidiaInfo['cuda'].version === '12') {
binaryFolder = join(binaryFolder, 'linux-cuda-12-0')
} else {
binaryFolder = join(binaryFolder, 'linux-cuda-11-7')
}
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
}
}
return {
executablePath: join(binaryFolder, binaryName),
cudaVisibleDevices,
}
}
const validateModelStatus = async (): Promise<void> => {
// Send a GET request to the validation URL.
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
const fetchRT = require('fetch-retry')
const fetchRetry = fetchRT(fetch)
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
},
retries: 5,
retryDelay: 500,
}).then(async (res: Response) => {
log(`[SERVER]::Debug: Validate model state success with response ${JSON.stringify(res)}`)
// If the response is OK, check model_loaded status.
if (res.ok) {
const body = await res.json()
// If the model is loaded, return an empty object.
// Otherwise, return an object with an error message.
if (body.model_loaded) {
return Promise.resolve()
}
}
return Promise.reject('Validate model status failed')
})
}
const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> => {
log(`[SERVER]::Debug: Loading model with params ${JSON.stringify(settings)}`)
const fetchRT = require('fetch-retry')
const fetchRetry = fetchRT(fetch)
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(settings),
retries: 3,
retryDelay: 500,
})
.then((res: any) => {
log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`)
return Promise.resolve(res)
})
.catch((err: any) => {
log(`[SERVER]::Error: Load model failed with error ${err}`)
return Promise.reject(err)
})
}
/**
* Run a model using installed cortex extension
* @param model
* @param settingParams
*/
const runModel = async (model: string, settingParams?: ModelSettingParams): Promise<void> => {
const janDataFolderPath = getJanDataFolderPath()
const modelFolder = join(janDataFolderPath, 'models', model)
let module = join(
getJanExtensionsPath(),
'@janhq',
'inference-cortex-extension',
'dist',
'node',
'index.cjs'
)
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
return import(module).then((extension) =>
extension
.loadModel(
{
modelFolder,
model,
},
settingParams
)
.then(() => log(`[SERVER]::Debug: Model is loaded`))
.then({
message: 'Model started',
})
)
}
/*
* Stop model and kill nitro process.
*/
export const stopModel = async (_modelId: string) => {
if (!subprocess) {
return {
error: "Model isn't running",
}
}
return new Promise((resolve, reject) => {
const controller = new AbortController()
setTimeout(() => {
controller.abort()
reject({
error: 'Failed to stop model: Timedout',
let module = join(
getJanExtensionsPath(),
'@janhq',
'inference-cortex-extension',
'dist',
'node',
'index.cjs'
)
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
return import(module).then((extension) =>
extension
.unloadModel()
.then(() => log(`[SERVER]::Debug: Model is unloaded`))
.then({
message: 'Model stopped',
})
}, 5000)
const tcpPortUsed = require('tcp-port-used')
log(`[SERVER]::Debug: Request to kill cortex`)
fetch(NITRO_HTTP_KILL_URL, {
method: 'DELETE',
signal: controller.signal,
})
.then(() => {
subprocess?.kill()
subprocess = undefined
})
.catch(() => {
// don't need to do anything, we still kill the subprocess
})
.then(() => tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000))
.then(() => log(`[SERVER]::Debug: Nitro process is terminated`))
.then(() =>
resolve({
message: 'Model stopped',
})
)
})
)
}

View File

@ -1,3 +1,31 @@
@echo off
set BIN_PATH=./bin
set /p CORTEX_VERSION=<./bin/version.txt
.\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz -e --strip 1 -o ./bin/win-vulkan && .\node_modules\.bin\download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-windows-amd64-noavx-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0/engines/cortex.llamacpp && .\node_modules\.bin\download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-windows-amd64-noavx-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7/engines/cortex.llamacpp && .\node_modules\.bin\download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-windows-amd64-noavx.tar.gz -e --strip 1 -o ./bin/win-cpu/engines/cortex.llamacpp && .\node_modules\.bin\download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan/engines/cortex.llamacpp
@REM Download cortex.llamacpp binaries
set VERSION=v0.1.25
set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.25-windows-amd64
set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan
call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-12-0/engines/cortex.llamacpp
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-11-7/engines/cortex.llamacpp
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/win-noavx/engines/cortex.llamacpp
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx/engines/cortex.llamacpp
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx2/engines/cortex.llamacpp
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx512/engines/cortex.llamacpp
call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/win-vulkan/engines/cortex.llamacpp
@REM Loop through each folder and move DLLs (excluding engine.dll)
for %%F in (%SUBFOLDERS%) do (
echo Processing folder: %BIN_PATH%\%%F
@REM Move all .dll files except engine.dll
for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
if /I not "%%~nxD"=="engine.dll" (
move "%%D" "%BIN_PATH%"
)
)
)
echo DLL files moved successfully.

View File

@ -0,0 +1,41 @@
#!/bin/bash
# Read CORTEX_VERSION
CORTEX_VERSION=$(cat ./bin/version.txt)
CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
# Detect platform
OS_TYPE=$(uname)
if [ "$OS_TYPE" == "Linux" ]; then
# Linux downloads
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
chmod +x "./bin/cortex-cpp"
ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64"
# Download engines for Linux
download "${ENGINE_DOWNLOAD_URL}-noavx.tar.gz" -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1
download "${ENGINE_DOWNLOAD_URL}-avx.tar.gz" -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1
download "${ENGINE_DOWNLOAD_URL}-avx2.tar.gz" -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1
download "${ENGINE_DOWNLOAD_URL}-avx512.tar.gz" -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1
download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1
download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1
download "${ENGINE_DOWNLOAD_URL}-vulkan.tar.gz" -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1
elif [ "$OS_TYPE" == "Darwin" ]; then
# macOS downloads
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/mac-arm64" 1
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/mac-x64" 1
chmod +x "./bin/mac-arm64/cortex-cpp"
chmod +x "./bin/mac-x64/cortex-cpp"
ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac"
# Download engines for macOS
download "${ENGINE_DOWNLOAD_URL}-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp
download "${ENGINE_DOWNLOAD_URL}-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp
else
echo "Unsupported operating system: $OS_TYPE"
exit 1
fi

View File

@ -2,7 +2,7 @@
"name": "@janhq/inference-cortex-extension",
"productName": "Cortex Inference Engine",
"version": "1.0.15",
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
"author": "Jan <service@jan.ai>",
@ -10,13 +10,11 @@
"scripts": {
"test": "jest",
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
"downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64-noavx.tar.gz -e --strip 1 -o ./bin/linux-cpu/engines/cortex.llamacpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64-noavx-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0/engines/cortex.llamacpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64-noavx-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7/engines/cortex.llamacpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan/engines/cortex.llamacpp",
"downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-amd64/engines/cortex.llamacpp",
"downloadnitro:linux:darwin": "./download.sh",
"downloadnitro:win32": "download.bat",
"downloadnitro": "run-script-os",
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
"build:publish:win32": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
"build:publish:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
"build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
"build:publish": "yarn test && run-script-os"
},
"exports": {
@ -49,6 +47,7 @@
},
"dependencies": {
"@janhq/core": "file:../../core",
"cpu-instructions": "^0.0.13",
"decompress": "^4.2.1",
"fetch-retry": "^5.0.6",
"rxjs": "^7.8.1",
@ -68,6 +67,7 @@
"tcp-port-used",
"fetch-retry",
"@janhq/core",
"decompress"
"decompress",
"cpu-instructions"
]
}

View File

@ -96,7 +96,7 @@ export default [
llama3170bJson,
gemma22bJson,
gemma29bJson,
gemma227bJson
gemma227bJson,
]),
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
@ -117,7 +117,10 @@ export default [
// Allow json resolution
json(),
// Compile TypeScript files
typescript({ useTsconfigDeclarationDir: true }),
typescript({
useTsconfigDeclarationDir: true,
exclude: ['**/__tests__', '**/*.test.ts'],
}),
// Compile TypeScript files
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
commonjs(),
@ -139,7 +142,7 @@ export default [
{ file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
],
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
external: ['@janhq/core/node'],
external: ['@janhq/core/node', 'cpu-instructions'],
watch: {
include: 'src/node/**',
},
@ -147,7 +150,10 @@ export default [
// Allow json resolution
json(),
// Compile TypeScript files
typescript({ useTsconfigDeclarationDir: true }),
typescript({
useTsconfigDeclarationDir: true,
exclude: ['**/__tests__', '**/*.test.ts'],
}),
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
commonjs(),
// Allow node_modules resolution, so you can use 'external' to control
@ -156,7 +162,6 @@ export default [
resolve({
extensions: ['.ts', '.js', '.json'],
}),
// Resolve source maps to the original source
sourceMaps(),
],

View File

@ -73,6 +73,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
this.registerModels(models)
super.onLoad()
// Add additional dependencies PATH to the env
executeOnMain(NODE, 'addAdditionalDependencies', {
name: this.name,
version: this.version,

View File

@ -1,7 +1,7 @@
import { describe, expect, it } from '@jest/globals'
import { executableNitroFile } from './execute'
import { GpuSetting } from '@janhq/core'
import { sep } from 'path'
import { cpuInfo } from 'cpu-instructions'
let testSettings: GpuSetting = {
run_mode: 'cpu',
@ -22,6 +22,14 @@ let testSettings: GpuSetting = {
}
const originalPlatform = process.platform
jest.mock('cpu-instructions', () => ({
cpuInfo: {
cpuInfo: jest.fn(),
},
}))
let mock = cpuInfo.cpuInfo as jest.Mock
mock.mockReturnValue([])
describe('test executable nitro file', () => {
afterAll(function () {
Object.defineProperty(process, 'platform', {
@ -38,17 +46,19 @@ describe('test executable nitro file', () => {
})
expect(executableNitroFile(testSettings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`),
enginePath: expect.stringContaining(`mac-arm64`),
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
)
Object.defineProperty(process, 'arch', {
value: 'amd64',
value: 'x64',
})
expect(executableNitroFile(testSettings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`),
enginePath: expect.stringContaining(`mac-x64`),
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
@ -62,14 +72,11 @@ describe('test executable nitro file', () => {
const settings: GpuSetting = {
...testSettings,
run_mode: 'cpu',
cuda: {
exist: true,
version: '11',
},
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`),
enginePath: expect.stringContaining(`win`),
executablePath: expect.stringContaining(`cortex-cpp.exe`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
@ -102,7 +109,8 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`),
enginePath: expect.stringContaining(`win-cuda-11-7`),
executablePath: expect.stringContaining(`cortex-cpp.exe`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
@ -135,7 +143,8 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`),
enginePath: expect.stringContaining(`win-cuda-12-0`),
executablePath: expect.stringContaining(`cortex-cpp.exe`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
@ -152,7 +161,8 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`),
enginePath: expect.stringContaining(`linux`),
executablePath: expect.stringContaining(`cortex-cpp`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
@ -185,7 +195,8 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`),
enginePath: expect.stringContaining(`linux-cuda-11-7`),
executablePath: expect.stringContaining(`cortex-cpp`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
@ -218,10 +229,203 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`),
enginePath: expect.stringContaining(`linux-cuda-12-0`),
executablePath: expect.stringContaining(`cortex-cpp`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
)
})
// Generate test for different cpu instructions on Linux
it(`executes on Linux CPU with different instructions`, () => {
Object.defineProperty(process, 'platform', {
value: 'linux',
})
const settings: GpuSetting = {
...testSettings,
run_mode: 'cpu',
}
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
cpuInstructions.forEach((instruction) => {
mock.mockReturnValue([instruction])
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
enginePath: expect.stringContaining(`linux-${instruction}`),
executablePath: expect.stringContaining(`cortex-cpp`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
)
})
})
// Generate test for different cpu instructions on Windows
it(`executes on Windows CPU with different instructions`, () => {
Object.defineProperty(process, 'platform', {
value: 'win32',
})
const settings: GpuSetting = {
...testSettings,
run_mode: 'cpu',
}
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
cpuInstructions.forEach((instruction) => {
mock.mockReturnValue([instruction])
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
enginePath: expect.stringContaining(`win-${instruction}`),
executablePath: expect.stringContaining(`cortex-cpp.exe`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
)
})
})
// Generate test for different cpu instructions on Windows
it(`executes on Windows GPU with different instructions`, () => {
Object.defineProperty(process, 'platform', {
value: 'win32',
})
const settings: GpuSetting = {
...testSettings,
run_mode: 'gpu',
cuda: {
exist: true,
version: '12',
},
nvidia_driver: {
exist: true,
version: '12',
},
gpus_in_use: ['0'],
gpus: [
{
id: '0',
name: 'NVIDIA GeForce GTX 1080',
vram: '80000000',
},
],
}
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
cpuInstructions.forEach((instruction) => {
mock.mockReturnValue([instruction])
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
enginePath: expect.stringContaining(`win-cuda-12-0`),
executablePath: expect.stringContaining(`cortex-cpp.exe`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
)
})
})
// Generate test for different cpu instructions on Linux
it(`executes on Linux GPU with different instructions`, () => {
Object.defineProperty(process, 'platform', {
value: 'linux',
})
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
const settings: GpuSetting = {
...testSettings,
run_mode: 'gpu',
cuda: {
exist: true,
version: '12',
},
nvidia_driver: {
exist: true,
version: '12',
},
gpus_in_use: ['0'],
gpus: [
{
id: '0',
name: 'NVIDIA GeForce GTX 1080',
vram: '80000000',
},
],
}
cpuInstructions.forEach((instruction) => {
mock.mockReturnValue([instruction])
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
enginePath: expect.stringContaining(`linux-cuda-12-0`),
executablePath: expect.stringContaining(`cortex-cpp`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
)
})
})
// Generate test for different cpu instructions on Linux
it(`executes on Linux Vulkan should not have CPU instructions included`, () => {
Object.defineProperty(process, 'platform', {
value: 'linux',
})
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
const settings: GpuSetting = {
...testSettings,
run_mode: 'gpu',
vulkan: true,
cuda: {
exist: true,
version: '12',
},
nvidia_driver: {
exist: true,
version: '12',
},
gpus_in_use: ['0'],
gpus: [
{
id: '0',
name: 'NVIDIA GeForce GTX 1080',
vram: '80000000',
},
],
}
cpuInstructions.forEach((instruction) => {
mock.mockReturnValue([instruction])
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
enginePath: expect.stringContaining(`linux-vulkan`),
executablePath: expect.stringContaining(`cortex-cpp`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
)
})
})
// Generate test for different cpu instructions on MacOS
it(`executes on MacOS with different instructions`, () => {
Object.defineProperty(process, 'platform', {
value: 'darwin',
})
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
cpuInstructions.forEach(() => {
Object.defineProperty(process, 'platform', {
value: 'darwin',
})
const settings: GpuSetting = {
...testSettings,
run_mode: 'cpu',
}
mock.mockReturnValue([])
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
enginePath: expect.stringContaining(`mac-x64`),
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
)
})
})
})

View File

@ -1,37 +1,59 @@
import { GpuSetting } from '@janhq/core'
import * as path from 'path'
import { cpuInfo } from 'cpu-instructions'
export interface NitroExecutableOptions {
enginePath: string
executablePath: string
cudaVisibleDevices: string
vkVisibleDevices: string
}
const runMode = (settings?: GpuSetting): string => {
/**
* The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
* @param settings
* @returns
*/
const gpuRunMode = (settings?: GpuSetting): string => {
if (process.platform === 'darwin')
// MacOS now has universal binaries
return ''
if (!settings) return 'cpu'
if (!settings) return ''
return settings.vulkan === true
? 'vulkan'
: settings.run_mode === 'cpu'
? 'cpu'
? ''
: 'cuda'
}
/**
* The OS & architecture that the current process is running on.
* @returns win, mac-x64, mac-arm64, or linux
*/
const os = (): string => {
return process.platform === 'win32'
? 'win'
: process.platform === 'darwin'
? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64'
? process.arch === 'arm64'
? 'mac-arm64'
: 'mac-x64'
: 'linux'
}
/**
* The cortex.cpp extension based on the current platform.
* @returns .exe if on Windows, otherwise an empty string.
*/
const extension = (): '.exe' | '' => {
return process.platform === 'win32' ? '.exe' : ''
}
/**
* The CUDA version that will be set - either '11-7' or '12-0'.
* @param settings
* @returns
*/
const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
const isUsingCuda =
settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac'
@ -40,6 +62,21 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
return settings?.cuda?.version === '11' ? '11-7' : '12-0'
}
/**
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
* @returns
*/
const cpuInstructions = () => {
if (process.platform === 'darwin') return ''
return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
? 'avx512'
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
? 'avx2'
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
? 'avx'
: 'noavx'
}
/**
* Find which executable file to run based on the current platform.
* @returns The name of the executable file to run.
@ -47,15 +84,26 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
export const executableNitroFile = (
gpuSetting?: GpuSetting
): NitroExecutableOptions => {
let binaryFolder = [os(), runMode(gpuSetting), cudaVersion(gpuSetting)]
let engineFolder = [
os(),
...(gpuSetting?.vulkan
? []
: [
gpuRunMode(gpuSetting) !== 'cuda' ? cpuInstructions() : '',
gpuRunMode(gpuSetting),
cudaVersion(gpuSetting),
]),
gpuSetting?.vulkan ? 'vulkan' : undefined,
]
.filter((e) => !!e)
.join('-')
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
let binaryName = `cortex-cpp${extension()}`
let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}`
return {
executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName),
enginePath: path.join(__dirname, '..', 'bin', engineFolder),
executablePath: path.join(__dirname, '..', 'bin', binaryName),
cudaVisibleDevices,
vkVisibleDevices,
}

View File

@ -0,0 +1,465 @@
jest.mock('fetch-retry', () => ({
default: () => () => {
return Promise.resolve({
ok: true,
status: 200,
json: () =>
Promise.resolve({
model_loaded: true,
}),
text: () => Promise.resolve(''),
})
},
}))
jest.mock('path', () => ({
default: {
isAbsolute: jest.fn(),
join: jest.fn(),
parse: () => {
return { dir: 'dir' }
},
delimiter: { concat: () => '' },
},
}))
jest.mock('decompress', () => ({
default: () => {
return Promise.resolve()
},
}))
jest.mock('@janhq/core/node', () => ({
...jest.requireActual('@janhq/core/node'),
getJanDataFolderPath: () => '',
getSystemResourceInfo: () => {
return {
cpu: {
cores: 1,
logicalCores: 1,
threads: 1,
model: 'model',
speed: 1,
},
memory: {
total: 1,
free: 1,
},
gpu: {
model: 'model',
memory: 1,
cuda: {
version: 'version',
devices: 'devices',
},
vulkan: {
version: 'version',
devices: 'devices',
},
},
}
},
}))
jest.mock('fs', () => ({
default: {
readdirSync: () => [],
},
}))
jest.mock('child_process', () => ({
exec: () => {
return {
stdout: { on: jest.fn() },
stderr: { on: jest.fn() },
on: jest.fn(),
}
},
spawn: () => {
return {
stdout: { on: jest.fn() },
stderr: { on: jest.fn() },
on: jest.fn(),
pid: '111',
}
},
}))
jest.mock('tcp-port-used', () => ({
default: {
waitUntilFree: () => Promise.resolve(true),
waitUntilUsed: () => Promise.resolve(true),
},
}))
jest.mock('./execute', () => ({
executableNitroFile: () => {
return {
enginePath: 'enginePath',
executablePath: 'executablePath',
cudaVisibleDevices: 'cudaVisibleDevices',
vkVisibleDevices: 'vkVisibleDevices',
}
},
}))
jest.mock('terminate', () => ({
default: (id: String, func: Function) => {
console.log(id)
func()
},
}))
import * as execute from './execute'
import index from './index'
let executeMock = execute
const modelInitOptions: any = {
modelFolder: '/path/to/model',
model: {
id: 'test',
name: 'test',
engine: 'nitro',
version: '0.0',
format: 'GGUF',
object: 'model',
sources: [],
created: 0,
description: 'test',
parameters: {},
metadata: {
author: '',
tags: [],
size: 0,
},
settings: {
prompt_template: '{prompt}',
llama_model_path: 'model.gguf',
},
},
}
describe('loadModel', () => {
it('should load a model successfully', async () => {
// Mock the necessary parameters and system information
const systemInfo = {
// Mock the system information if needed
}
// Call the loadModel function
const result = await index.loadModel(modelInitOptions, systemInfo)
// Assert that the result is as expected
expect(result).toBeUndefined()
})
it('should reject with an error message if the model is not a nitro model', async () => {
// Mock the necessary parameters and system information
const systemInfo = {
// Mock the system information if needed
}
modelInitOptions.model.engine = 'not-nitro'
// Call the loadModel function
try {
await index.loadModel(modelInitOptions, systemInfo)
} catch (error) {
// Assert that the error message is as expected
expect(error).toBe('Not a cortex model')
}
modelInitOptions.model.engine = 'nitro'
})
it('should reject if model load failed with an error message', async () => {
// Mock the necessary parameters and system information
const systemInfo = {
// Mock the system information if needed
}
// Mock the fetch-retry module to return a failed response
jest.mock('fetch-retry', () => ({
default: () => () => {
return Promise.resolve({
ok: false,
status: 500,
json: () =>
Promise.resolve({
model_loaded: false,
}),
text: () => Promise.resolve('Failed to load model'),
})
},
}))
// Call the loadModel function
try {
await index.loadModel(modelInitOptions, systemInfo)
} catch (error) {
// Assert that the error message is as expected
expect(error).toBe('Failed to load model')
}
})
it('should reject if port not available', async () => {
// Mock the necessary parameters and system information
const systemInfo = {
// Mock the system information if needed
}
// Mock the tcp-port-used module to return false
jest.mock('tcp-port-used', () => ({
default: {
waitUntilFree: () => Promise.resolve(false),
waitUntilUsed: () => Promise.resolve(false),
},
}))
// Call the loadModel function
try {
await index.loadModel(modelInitOptions, systemInfo)
} catch (error) {
// Assert that the error message is as expected
expect(error).toBe('Port not available')
}
})
it('should run on GPU model if ngl is set', async () => {
const systemInfo: any = {
gpuSetting: {
run_mode: 'gpu',
},
}
// Spy executableNitroFile
jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
enginePath: '',
executablePath: '',
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
Object.defineProperty(process, 'platform', { value: 'win32' })
await index.loadModel(
{
...modelInitOptions,
model: {
...modelInitOptions.model,
settings: {
...modelInitOptions.model.settings,
ngl: 40,
},
},
},
systemInfo
)
expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
run_mode: 'gpu',
})
})
it('should run on correct CPU instructions if ngl is not set', async () => {
const systemInfo: any = {
gpuSetting: {
run_mode: 'gpu',
},
}
// Spy executableNitroFile
jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
enginePath: '',
executablePath: '',
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
Object.defineProperty(process, 'platform', { value: 'win32' })
await index.loadModel(
{
...modelInitOptions,
model: {
...modelInitOptions.model,
settings: {
...modelInitOptions.model.settings,
ngl: undefined,
},
},
},
systemInfo
)
expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
run_mode: 'cpu',
})
})
it('should run on correct CPU instructions if ngl is 0', async () => {
const systemInfo: any = {
gpuSetting: {
run_mode: 'gpu',
},
}
// Spy executableNitroFile
jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
enginePath: '',
executablePath: '',
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
Object.defineProperty(process, 'platform', { value: 'win32' })
await index.loadModel(
{
...modelInitOptions,
model: {
...modelInitOptions.model,
settings: {
...modelInitOptions.model.settings,
ngl: 0,
},
},
},
systemInfo
)
expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
run_mode: 'cpu',
})
})
})
describe('unloadModel', () => {
it('should unload a model successfully', async () => {
// Call the unloadModel function
const result = await index.unloadModel()
// Assert that the result is as expected
expect(result).toBeUndefined()
})
it('should reject with an error message if the model is not a nitro model', async () => {
// Call the unloadModel function
try {
await index.unloadModel()
} catch (error) {
// Assert that the error message is as expected
expect(error).toBe('Not a cortex model')
}
})
it('should reject if model unload failed with an error message', async () => {
// Mock the fetch-retry module to return a failed response
jest.mock('fetch-retry', () => ({
default: () => () => {
return Promise.resolve({
ok: false,
status: 500,
json: () =>
Promise.resolve({
model_unloaded: false,
}),
text: () => Promise.resolve('Failed to unload model'),
})
},
}))
// Call the unloadModel function
try {
await index.unloadModel()
} catch (error) {
// Assert that the error message is as expected
expect(error).toBe('Failed to unload model')
}
})
it('should reject if port not available', async () => {
// Mock the tcp-port-used module to return false
jest.mock('tcp-port-used', () => ({
default: {
waitUntilFree: () => Promise.resolve(false),
waitUntilUsed: () => Promise.resolve(false),
},
}))
// Call the unloadModel function
try {
await index.unloadModel()
} catch (error) {
// Assert that the error message is as expected
expect(error).toBe('Port not available')
}
})
})
describe('dispose', () => {
it('should dispose a model successfully on Mac', async () => {
Object.defineProperty(process, 'platform', {
value: 'darwin',
})
// Call the dispose function
const result = await index.dispose()
// Assert that the result is as expected
expect(result).toBeUndefined()
})
it('should kill the subprocess successfully on Windows', async () => {
Object.defineProperty(process, 'platform', {
value: 'win32',
})
// Call the killSubprocess function
const result = await index.dispose()
// Assert that the result is as expected
expect(result).toBeUndefined()
})
})
describe('getCurrentNitroProcessInfo', () => {
it('should return the current nitro process info', async () => {
// Call the getCurrentNitroProcessInfo function
const result = await index.getCurrentNitroProcessInfo()
// Assert that the result is as expected
expect(result).toEqual({
isRunning: true,
})
})
})
describe('decompressRunner', () => {
it('should decompress the runner successfully', async () => {
jest.mock('decompress', () => ({
default: () => {
return Promise.resolve()
},
}))
// Call the decompressRunner function
const result = await index.decompressRunner('', '')
// Assert that the result is as expected
expect(result).toBeUndefined()
})
it('should not reject if decompression failed', async () => {
jest.mock('decompress', () => ({
default: () => {
return Promise.reject('Failed to decompress')
},
}))
// Call the decompressRunner function
const result = await index.decompressRunner('', '')
expect(result).toBeUndefined()
})
})
describe('addAdditionalDependencies', () => {
it('should add additional dependencies successfully', async () => {
// Call the addAdditionalDependencies function
const result = await index.addAdditionalDependencies({
name: 'name',
version: 'version',
})
// Assert that the result is as expected
expect(result).toBeUndefined()
})
})

View File

@ -263,10 +263,10 @@ async function validateModelStatus(modelId: string): Promise<void> {
log(`[CORTEX]::Debug: Validating model ${modelId}`)
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
method: 'POST',
body: JSON.stringify({
body: JSON.stringify({
model: modelId,
// TODO: force to use cortex llamacpp by default
engine: 'cortex.llamacpp'
engine: 'cortex.llamacpp',
}),
headers: {
'Content-Type': 'application/json',
@ -365,14 +365,37 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
log(`[CORTEX]::Debug: Spawning cortex subprocess...`)
return new Promise<void>(async (resolve, reject) => {
let executableOptions = executableNitroFile(systemInfo?.gpuSetting)
let executableOptions = executableNitroFile(
// If ngl is not set or equal to 0, run on CPU with correct instructions
systemInfo?.gpuSetting
? {
...systemInfo.gpuSetting,
run_mode:
currentSettings?.ngl === undefined || currentSettings.ngl === 0
? 'cpu'
: systemInfo.gpuSetting.run_mode,
}
: undefined
)
const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
// Execute the binary
log(
`[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
)
log(path.parse(executableOptions.executablePath).dir)
log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`)
// Add engine path to the PATH and LD_LIBRARY_PATH
process.env.PATH = (process.env.PATH || '').concat(
path.delimiter,
executableOptions.enginePath
)
log(`[CORTEX] PATH: ${process.env.PATH}`)
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
path.delimiter,
executableOptions.enginePath
)
subprocess = spawn(
executableOptions.executablePath,
['1', LOCAL_HOST, PORT.toString()],
@ -380,6 +403,7 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
cwd: path.join(path.parse(executableOptions.executablePath).dir),
env: {
...process.env,
ENGINE_PATH: executableOptions.enginePath,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
// Vulkan - Support 1 device at a time for now
...(executableOptions.vkVisibleDevices?.length > 0 && {
@ -440,12 +464,19 @@ const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
}
const addAdditionalDependencies = (data: { name: string; version: string }) => {
log(
`[CORTEX]::Debug: Adding additional dependencies for ${data.name} ${data.version}`
)
const additionalPath = path.delimiter.concat(
path.join(getJanDataFolderPath(), 'engines', data.name, data.version)
)
// Set the updated PATH
process.env.PATH = (process.env.PATH || '').concat(additionalPath)
process.env.PATH = (process.env.PATH || '').concat(
path.delimiter,
additionalPath
)
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
path.delimiter,
additionalPath
)
}

View File

@ -15,5 +15,6 @@
"importHelpers": true,
"typeRoots": ["node_modules/@types"]
},
"include": ["src"]
"include": ["src"],
"exclude": ["src/**/*.test.ts"]
}