parent
2d05134cb1
commit
5217437912
@ -51,13 +51,13 @@ jobs:
|
|||||||
latest_prerelease_asset_count=$(get_asset_count "$latest_prerelease_name")
|
latest_prerelease_asset_count=$(get_asset_count "$latest_prerelease_name")
|
||||||
|
|
||||||
if [ "$current_version_name" = "$latest_prerelease_name" ]; then
|
if [ "$current_version_name" = "$latest_prerelease_name" ]; then
|
||||||
echo "cortex cpp remote repo doesn't have update today, skip update cortex-cpp for today nightly build"
|
echo "cortex cpp remote repo doesn't have update today, skip update cortex.cpp for today nightly build"
|
||||||
echo "::set-output name=pr_created::false"
|
echo "::set-output name=pr_created::false"
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$current_version_asset_count" != "$latest_prerelease_asset_count" ]; then
|
if [ "$current_version_asset_count" != "$latest_prerelease_asset_count" ]; then
|
||||||
echo "Latest prerelease version has different number of assets, somethink went wrong, skip update cortex-cpp for today nightly build"
|
echo "Latest prerelease version has different number of assets, somethink went wrong, skip update cortex.cpp for today nightly build"
|
||||||
echo "::set-output name=pr_created::false"
|
echo "::set-output name=pr_created::false"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|||||||
@ -1,31 +1,13 @@
|
|||||||
import fs from 'fs'
|
|
||||||
import { join } from 'path'
|
import { join } from 'path'
|
||||||
import {
|
import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper'
|
||||||
getJanDataFolderPath,
|
import { ModelSettingParams } from '../../../../types'
|
||||||
getJanExtensionsPath,
|
|
||||||
getSystemResourceInfo,
|
|
||||||
log,
|
|
||||||
} from '../../../helper'
|
|
||||||
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
|
|
||||||
import { Model, ModelSettingParams, PromptTemplate } from '../../../../types'
|
|
||||||
import {
|
|
||||||
LOCAL_HOST,
|
|
||||||
NITRO_DEFAULT_PORT,
|
|
||||||
NITRO_HTTP_KILL_URL,
|
|
||||||
NITRO_HTTP_LOAD_MODEL_URL,
|
|
||||||
NITRO_HTTP_VALIDATE_MODEL_URL,
|
|
||||||
SUPPORTED_MODEL_FORMAT,
|
|
||||||
} from './consts'
|
|
||||||
|
|
||||||
// The subprocess instance for Nitro
|
|
||||||
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
|
|
||||||
|
|
||||||
// TODO: move this to core type
|
|
||||||
interface NitroModelSettings extends ModelSettingParams {
|
|
||||||
llama_model_path: string
|
|
||||||
cpu_threads: number
|
|
||||||
}
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start a model
|
||||||
|
* @param modelId
|
||||||
|
* @param settingParams
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
|
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
|
||||||
try {
|
try {
|
||||||
await runModel(modelId, settingParams)
|
await runModel(modelId, settingParams)
|
||||||
@ -40,316 +22,57 @@ export const startModel = async (modelId: string, settingParams?: ModelSettingPa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const runModel = async (modelId: string, settingParams?: ModelSettingParams): Promise<void> => {
|
|
||||||
const janDataFolderPath = getJanDataFolderPath()
|
|
||||||
const modelFolderFullPath = join(janDataFolderPath, 'models', modelId)
|
|
||||||
|
|
||||||
if (!fs.existsSync(modelFolderFullPath)) {
|
|
||||||
throw new Error(`Model not found: ${modelId}`)
|
|
||||||
}
|
|
||||||
|
|
||||||
const files: string[] = fs.readdirSync(modelFolderFullPath)
|
|
||||||
|
|
||||||
// Look for GGUF model file
|
|
||||||
const ggufBinFile = files.find((file) => file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT))
|
|
||||||
|
|
||||||
const modelMetadataPath = join(modelFolderFullPath, 'model.json')
|
|
||||||
const modelMetadata: Model = JSON.parse(fs.readFileSync(modelMetadataPath, 'utf-8'))
|
|
||||||
|
|
||||||
if (!ggufBinFile) {
|
|
||||||
throw new Error('No GGUF model file found')
|
|
||||||
}
|
|
||||||
const modelBinaryPath = join(modelFolderFullPath, ggufBinFile)
|
|
||||||
|
|
||||||
const nitroResourceProbe = await getSystemResourceInfo()
|
|
||||||
const nitroModelSettings: NitroModelSettings = {
|
|
||||||
// This is critical and requires real CPU physical core count (or performance core)
|
|
||||||
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
|
|
||||||
...modelMetadata.settings,
|
|
||||||
...settingParams,
|
|
||||||
llama_model_path: modelBinaryPath,
|
|
||||||
...(modelMetadata.settings.mmproj && {
|
|
||||||
mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj),
|
|
||||||
}),
|
|
||||||
}
|
|
||||||
|
|
||||||
log(`[SERVER]::Debug: Nitro model settings: ${JSON.stringify(nitroModelSettings)}`)
|
|
||||||
|
|
||||||
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
|
|
||||||
if (modelMetadata.settings.prompt_template) {
|
|
||||||
const promptTemplate = modelMetadata.settings.prompt_template
|
|
||||||
const prompt = promptTemplateConverter(promptTemplate)
|
|
||||||
if (prompt?.error) {
|
|
||||||
throw new Error(prompt.error)
|
|
||||||
}
|
|
||||||
nitroModelSettings.system_prompt = prompt.system_prompt
|
|
||||||
nitroModelSettings.user_prompt = prompt.user_prompt
|
|
||||||
nitroModelSettings.ai_prompt = prompt.ai_prompt
|
|
||||||
}
|
|
||||||
|
|
||||||
await runNitroAndLoadModel(modelId, nitroModelSettings)
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: move to util
|
|
||||||
const promptTemplateConverter = (promptTemplate: string): PromptTemplate => {
|
|
||||||
// Split the string using the markers
|
|
||||||
const systemMarker = '{system_message}'
|
|
||||||
const promptMarker = '{prompt}'
|
|
||||||
|
|
||||||
if (promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker)) {
|
|
||||||
// Find the indices of the markers
|
|
||||||
const systemIndex = promptTemplate.indexOf(systemMarker)
|
|
||||||
const promptIndex = promptTemplate.indexOf(promptMarker)
|
|
||||||
|
|
||||||
// Extract the parts of the string
|
|
||||||
const system_prompt = promptTemplate.substring(0, systemIndex)
|
|
||||||
const user_prompt = promptTemplate.substring(systemIndex + systemMarker.length, promptIndex)
|
|
||||||
const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
|
|
||||||
|
|
||||||
// Return the split parts
|
|
||||||
return { system_prompt, user_prompt, ai_prompt }
|
|
||||||
} else if (promptTemplate.includes(promptMarker)) {
|
|
||||||
// Extract the parts of the string for the case where only promptMarker is present
|
|
||||||
const promptIndex = promptTemplate.indexOf(promptMarker)
|
|
||||||
const user_prompt = promptTemplate.substring(0, promptIndex)
|
|
||||||
const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
|
|
||||||
|
|
||||||
// Return the split parts
|
|
||||||
return { user_prompt, ai_prompt }
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return an error if none of the conditions are met
|
|
||||||
return { error: 'Cannot split prompt template' }
|
|
||||||
}
|
|
||||||
|
|
||||||
const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSettings) => {
|
|
||||||
// Gather system information for CPU physical cores and memory
|
|
||||||
const tcpPortUsed = require('tcp-port-used')
|
|
||||||
|
|
||||||
await stopModel(modelId)
|
|
||||||
await tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* There is a problem with Windows process manager
|
|
||||||
* Should wait for awhile to make sure the port is free and subprocess is killed
|
|
||||||
* The tested threshold is 500ms
|
|
||||||
**/
|
|
||||||
if (process.platform === 'win32') {
|
|
||||||
await new Promise((resolve) => setTimeout(resolve, 500))
|
|
||||||
}
|
|
||||||
|
|
||||||
await spawnNitroProcess()
|
|
||||||
await loadLLMModel(modelSettings)
|
|
||||||
await validateModelStatus()
|
|
||||||
}
|
|
||||||
|
|
||||||
const spawnNitroProcess = async (): Promise<void> => {
|
|
||||||
log(`[SERVER]::Debug: Spawning cortex subprocess...`)
|
|
||||||
|
|
||||||
let binaryFolder = join(
|
|
||||||
getJanExtensionsPath(),
|
|
||||||
'@janhq',
|
|
||||||
'inference-cortex-extension',
|
|
||||||
'dist',
|
|
||||||
'bin'
|
|
||||||
)
|
|
||||||
|
|
||||||
let executableOptions = executableNitroFile()
|
|
||||||
const tcpPortUsed = require('tcp-port-used')
|
|
||||||
|
|
||||||
const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
|
|
||||||
// Execute the binary
|
|
||||||
log(
|
|
||||||
`[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
|
|
||||||
)
|
|
||||||
subprocess = spawn(
|
|
||||||
executableOptions.executablePath,
|
|
||||||
['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()],
|
|
||||||
{
|
|
||||||
cwd: binaryFolder,
|
|
||||||
env: {
|
|
||||||
...process.env,
|
|
||||||
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
// Handle subprocess output
|
|
||||||
subprocess.stdout.on('data', (data: any) => {
|
|
||||||
log(`[SERVER]::Debug: ${data}`)
|
|
||||||
})
|
|
||||||
|
|
||||||
subprocess.stderr.on('data', (data: any) => {
|
|
||||||
log(`[SERVER]::Error: ${data}`)
|
|
||||||
})
|
|
||||||
|
|
||||||
subprocess.on('close', (code: any) => {
|
|
||||||
log(`[SERVER]::Debug: cortex exited with code: ${code}`)
|
|
||||||
subprocess = undefined
|
|
||||||
})
|
|
||||||
|
|
||||||
tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
|
|
||||||
log(`[SERVER]::Debug: cortex is ready`)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
type NitroExecutableOptions = {
|
|
||||||
executablePath: string
|
|
||||||
cudaVisibleDevices: string
|
|
||||||
}
|
|
||||||
|
|
||||||
const executableNitroFile = (): NitroExecutableOptions => {
|
|
||||||
const nvidiaInfoFilePath = join(getJanDataFolderPath(), 'settings', 'settings.json')
|
|
||||||
let binaryFolder = join(
|
|
||||||
getJanExtensionsPath(),
|
|
||||||
'@janhq',
|
|
||||||
'inference-cortex-extension',
|
|
||||||
'dist',
|
|
||||||
'bin'
|
|
||||||
)
|
|
||||||
|
|
||||||
let cudaVisibleDevices = ''
|
|
||||||
let binaryName = 'cortex-cpp'
|
|
||||||
/**
|
|
||||||
* The binary folder is different for each platform.
|
|
||||||
*/
|
|
||||||
if (process.platform === 'win32') {
|
|
||||||
/**
|
|
||||||
* For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
|
|
||||||
*/
|
|
||||||
let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
|
|
||||||
if (nvidiaInfo['run_mode'] === 'cpu') {
|
|
||||||
binaryFolder = join(binaryFolder, 'win-cpu')
|
|
||||||
} else {
|
|
||||||
if (nvidiaInfo['cuda'].version === '12') {
|
|
||||||
binaryFolder = join(binaryFolder, 'win-cuda-12-0')
|
|
||||||
} else {
|
|
||||||
binaryFolder = join(binaryFolder, 'win-cuda-11-7')
|
|
||||||
}
|
|
||||||
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
|
|
||||||
}
|
|
||||||
binaryName = 'cortex-cpp.exe'
|
|
||||||
} else if (process.platform === 'darwin') {
|
|
||||||
/**
|
|
||||||
* For MacOS: mac-universal both Silicon and InteL
|
|
||||||
*/
|
|
||||||
if(process.arch === 'arm64') {
|
|
||||||
binaryFolder = join(binaryFolder, 'mac-arm64')
|
|
||||||
} else {
|
|
||||||
binaryFolder = join(binaryFolder, 'mac-amd64')
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/**
|
|
||||||
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
|
|
||||||
*/
|
|
||||||
let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
|
|
||||||
if (nvidiaInfo['run_mode'] === 'cpu') {
|
|
||||||
binaryFolder = join(binaryFolder, 'linux-cpu')
|
|
||||||
} else {
|
|
||||||
if (nvidiaInfo['cuda'].version === '12') {
|
|
||||||
binaryFolder = join(binaryFolder, 'linux-cuda-12-0')
|
|
||||||
} else {
|
|
||||||
binaryFolder = join(binaryFolder, 'linux-cuda-11-7')
|
|
||||||
}
|
|
||||||
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
executablePath: join(binaryFolder, binaryName),
|
|
||||||
cudaVisibleDevices,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const validateModelStatus = async (): Promise<void> => {
|
|
||||||
// Send a GET request to the validation URL.
|
|
||||||
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
|
|
||||||
const fetchRT = require('fetch-retry')
|
|
||||||
const fetchRetry = fetchRT(fetch)
|
|
||||||
|
|
||||||
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
|
|
||||||
method: 'GET',
|
|
||||||
headers: {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
},
|
|
||||||
retries: 5,
|
|
||||||
retryDelay: 500,
|
|
||||||
}).then(async (res: Response) => {
|
|
||||||
log(`[SERVER]::Debug: Validate model state success with response ${JSON.stringify(res)}`)
|
|
||||||
// If the response is OK, check model_loaded status.
|
|
||||||
if (res.ok) {
|
|
||||||
const body = await res.json()
|
|
||||||
// If the model is loaded, return an empty object.
|
|
||||||
// Otherwise, return an object with an error message.
|
|
||||||
if (body.model_loaded) {
|
|
||||||
return Promise.resolve()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Promise.reject('Validate model status failed')
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> => {
|
|
||||||
log(`[SERVER]::Debug: Loading model with params ${JSON.stringify(settings)}`)
|
|
||||||
const fetchRT = require('fetch-retry')
|
|
||||||
const fetchRetry = fetchRT(fetch)
|
|
||||||
|
|
||||||
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
|
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
},
|
|
||||||
body: JSON.stringify(settings),
|
|
||||||
retries: 3,
|
|
||||||
retryDelay: 500,
|
|
||||||
})
|
|
||||||
.then((res: any) => {
|
|
||||||
log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`)
|
|
||||||
return Promise.resolve(res)
|
|
||||||
})
|
|
||||||
.catch((err: any) => {
|
|
||||||
log(`[SERVER]::Error: Load model failed with error ${err}`)
|
|
||||||
return Promise.reject(err)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Run a model using installed cortex extension
|
||||||
|
* @param model
|
||||||
|
* @param settingParams
|
||||||
|
*/
|
||||||
|
const runModel = async (model: string, settingParams?: ModelSettingParams): Promise<void> => {
|
||||||
|
const janDataFolderPath = getJanDataFolderPath()
|
||||||
|
const modelFolder = join(janDataFolderPath, 'models', model)
|
||||||
|
let module = join(
|
||||||
|
getJanExtensionsPath(),
|
||||||
|
'@janhq',
|
||||||
|
'inference-cortex-extension',
|
||||||
|
'dist',
|
||||||
|
'node',
|
||||||
|
'index.cjs'
|
||||||
|
)
|
||||||
|
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
|
||||||
|
return import(module).then((extension) =>
|
||||||
|
extension
|
||||||
|
.loadModel(
|
||||||
|
{
|
||||||
|
modelFolder,
|
||||||
|
model,
|
||||||
|
},
|
||||||
|
settingParams
|
||||||
|
)
|
||||||
|
.then(() => log(`[SERVER]::Debug: Model is loaded`))
|
||||||
|
.then({
|
||||||
|
message: 'Model started',
|
||||||
|
})
|
||||||
|
)
|
||||||
|
}
|
||||||
|
/*
|
||||||
* Stop model and kill nitro process.
|
* Stop model and kill nitro process.
|
||||||
*/
|
*/
|
||||||
export const stopModel = async (_modelId: string) => {
|
export const stopModel = async (_modelId: string) => {
|
||||||
if (!subprocess) {
|
let module = join(
|
||||||
return {
|
getJanExtensionsPath(),
|
||||||
error: "Model isn't running",
|
'@janhq',
|
||||||
}
|
'inference-cortex-extension',
|
||||||
}
|
'dist',
|
||||||
return new Promise((resolve, reject) => {
|
'node',
|
||||||
const controller = new AbortController()
|
'index.cjs'
|
||||||
setTimeout(() => {
|
)
|
||||||
controller.abort()
|
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
|
||||||
reject({
|
return import(module).then((extension) =>
|
||||||
error: 'Failed to stop model: Timedout',
|
extension
|
||||||
})
|
.unloadModel()
|
||||||
}, 5000)
|
.then(() => log(`[SERVER]::Debug: Model is unloaded`))
|
||||||
const tcpPortUsed = require('tcp-port-used')
|
.then({
|
||||||
log(`[SERVER]::Debug: Request to kill cortex`)
|
|
||||||
|
|
||||||
fetch(NITRO_HTTP_KILL_URL, {
|
|
||||||
method: 'DELETE',
|
|
||||||
signal: controller.signal,
|
|
||||||
})
|
|
||||||
.then(() => {
|
|
||||||
subprocess?.kill()
|
|
||||||
subprocess = undefined
|
|
||||||
})
|
|
||||||
.catch(() => {
|
|
||||||
// don't need to do anything, we still kill the subprocess
|
|
||||||
})
|
|
||||||
.then(() => tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000))
|
|
||||||
.then(() => log(`[SERVER]::Debug: Nitro process is terminated`))
|
|
||||||
.then(() =>
|
|
||||||
resolve({
|
|
||||||
message: 'Model stopped',
|
message: 'Model stopped',
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,3 +1,31 @@
|
|||||||
@echo off
|
@echo off
|
||||||
|
set BIN_PATH=./bin
|
||||||
set /p CORTEX_VERSION=<./bin/version.txt
|
set /p CORTEX_VERSION=<./bin/version.txt
|
||||||
.\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz -e --strip 1 -o ./bin/win-vulkan && .\node_modules\.bin\download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-windows-amd64-noavx-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0/engines/cortex.llamacpp && .\node_modules\.bin\download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-windows-amd64-noavx-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7/engines/cortex.llamacpp && .\node_modules\.bin\download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-windows-amd64-noavx.tar.gz -e --strip 1 -o ./bin/win-cpu/engines/cortex.llamacpp && .\node_modules\.bin\download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan/engines/cortex.llamacpp
|
|
||||||
|
@REM Download cortex.llamacpp binaries
|
||||||
|
set VERSION=v0.1.25
|
||||||
|
set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.25-windows-amd64
|
||||||
|
set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan
|
||||||
|
|
||||||
|
call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-12-0/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-11-7/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/win-noavx/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx2/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx512/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/win-vulkan/engines/cortex.llamacpp
|
||||||
|
|
||||||
|
@REM Loop through each folder and move DLLs (excluding engine.dll)
|
||||||
|
for %%F in (%SUBFOLDERS%) do (
|
||||||
|
echo Processing folder: %BIN_PATH%\%%F
|
||||||
|
|
||||||
|
@REM Move all .dll files except engine.dll
|
||||||
|
for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
|
||||||
|
if /I not "%%~nxD"=="engine.dll" (
|
||||||
|
move "%%D" "%BIN_PATH%"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
echo DLL files moved successfully.
|
||||||
41
extensions/inference-nitro-extension/download.sh
Executable file
41
extensions/inference-nitro-extension/download.sh
Executable file
@ -0,0 +1,41 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Read CORTEX_VERSION
|
||||||
|
CORTEX_VERSION=$(cat ./bin/version.txt)
|
||||||
|
CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
|
||||||
|
|
||||||
|
# Detect platform
|
||||||
|
OS_TYPE=$(uname)
|
||||||
|
|
||||||
|
if [ "$OS_TYPE" == "Linux" ]; then
|
||||||
|
# Linux downloads
|
||||||
|
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
|
||||||
|
chmod +x "./bin/cortex-cpp"
|
||||||
|
|
||||||
|
ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64"
|
||||||
|
|
||||||
|
# Download engines for Linux
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-noavx.tar.gz" -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-avx.tar.gz" -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-avx2.tar.gz" -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-avx512.tar.gz" -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-vulkan.tar.gz" -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1
|
||||||
|
|
||||||
|
elif [ "$OS_TYPE" == "Darwin" ]; then
|
||||||
|
# macOS downloads
|
||||||
|
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/mac-arm64" 1
|
||||||
|
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/mac-x64" 1
|
||||||
|
chmod +x "./bin/mac-arm64/cortex-cpp"
|
||||||
|
chmod +x "./bin/mac-x64/cortex-cpp"
|
||||||
|
|
||||||
|
ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac"
|
||||||
|
# Download engines for macOS
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp
|
||||||
|
|
||||||
|
else
|
||||||
|
echo "Unsupported operating system: $OS_TYPE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
@ -2,7 +2,7 @@
|
|||||||
"name": "@janhq/inference-cortex-extension",
|
"name": "@janhq/inference-cortex-extension",
|
||||||
"productName": "Cortex Inference Engine",
|
"productName": "Cortex Inference Engine",
|
||||||
"version": "1.0.15",
|
"version": "1.0.15",
|
||||||
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"node": "dist/node/index.cjs.js",
|
"node": "dist/node/index.cjs.js",
|
||||||
"author": "Jan <service@jan.ai>",
|
"author": "Jan <service@jan.ai>",
|
||||||
@ -10,13 +10,11 @@
|
|||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "jest",
|
"test": "jest",
|
||||||
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
||||||
"downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64-noavx.tar.gz -e --strip 1 -o ./bin/linux-cpu/engines/cortex.llamacpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64-noavx-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0/engines/cortex.llamacpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64-noavx-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7/engines/cortex.llamacpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan/engines/cortex.llamacpp",
|
"downloadnitro:linux:darwin": "./download.sh",
|
||||||
"downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp && download https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-amd64/engines/cortex.llamacpp",
|
|
||||||
"downloadnitro:win32": "download.bat",
|
"downloadnitro:win32": "download.bat",
|
||||||
"downloadnitro": "run-script-os",
|
"downloadnitro": "run-script-os",
|
||||||
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||||
"build:publish:win32": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
"build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||||
"build:publish:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
|
||||||
"build:publish": "yarn test && run-script-os"
|
"build:publish": "yarn test && run-script-os"
|
||||||
},
|
},
|
||||||
"exports": {
|
"exports": {
|
||||||
@ -49,6 +47,7 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@janhq/core": "file:../../core",
|
"@janhq/core": "file:../../core",
|
||||||
|
"cpu-instructions": "^0.0.13",
|
||||||
"decompress": "^4.2.1",
|
"decompress": "^4.2.1",
|
||||||
"fetch-retry": "^5.0.6",
|
"fetch-retry": "^5.0.6",
|
||||||
"rxjs": "^7.8.1",
|
"rxjs": "^7.8.1",
|
||||||
@ -68,6 +67,7 @@
|
|||||||
"tcp-port-used",
|
"tcp-port-used",
|
||||||
"fetch-retry",
|
"fetch-retry",
|
||||||
"@janhq/core",
|
"@janhq/core",
|
||||||
"decompress"
|
"decompress",
|
||||||
|
"cpu-instructions"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@ -96,7 +96,7 @@ export default [
|
|||||||
llama3170bJson,
|
llama3170bJson,
|
||||||
gemma22bJson,
|
gemma22bJson,
|
||||||
gemma29bJson,
|
gemma29bJson,
|
||||||
gemma227bJson
|
gemma227bJson,
|
||||||
]),
|
]),
|
||||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||||
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
||||||
@ -117,7 +117,10 @@ export default [
|
|||||||
// Allow json resolution
|
// Allow json resolution
|
||||||
json(),
|
json(),
|
||||||
// Compile TypeScript files
|
// Compile TypeScript files
|
||||||
typescript({ useTsconfigDeclarationDir: true }),
|
typescript({
|
||||||
|
useTsconfigDeclarationDir: true,
|
||||||
|
exclude: ['**/__tests__', '**/*.test.ts'],
|
||||||
|
}),
|
||||||
// Compile TypeScript files
|
// Compile TypeScript files
|
||||||
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
|
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
|
||||||
commonjs(),
|
commonjs(),
|
||||||
@ -139,7 +142,7 @@ export default [
|
|||||||
{ file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
|
{ file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
|
||||||
],
|
],
|
||||||
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
|
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
|
||||||
external: ['@janhq/core/node'],
|
external: ['@janhq/core/node', 'cpu-instructions'],
|
||||||
watch: {
|
watch: {
|
||||||
include: 'src/node/**',
|
include: 'src/node/**',
|
||||||
},
|
},
|
||||||
@ -147,7 +150,10 @@ export default [
|
|||||||
// Allow json resolution
|
// Allow json resolution
|
||||||
json(),
|
json(),
|
||||||
// Compile TypeScript files
|
// Compile TypeScript files
|
||||||
typescript({ useTsconfigDeclarationDir: true }),
|
typescript({
|
||||||
|
useTsconfigDeclarationDir: true,
|
||||||
|
exclude: ['**/__tests__', '**/*.test.ts'],
|
||||||
|
}),
|
||||||
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
|
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
|
||||||
commonjs(),
|
commonjs(),
|
||||||
// Allow node_modules resolution, so you can use 'external' to control
|
// Allow node_modules resolution, so you can use 'external' to control
|
||||||
@ -156,7 +162,6 @@ export default [
|
|||||||
resolve({
|
resolve({
|
||||||
extensions: ['.ts', '.js', '.json'],
|
extensions: ['.ts', '.js', '.json'],
|
||||||
}),
|
}),
|
||||||
|
|
||||||
// Resolve source maps to the original source
|
// Resolve source maps to the original source
|
||||||
sourceMaps(),
|
sourceMaps(),
|
||||||
],
|
],
|
||||||
|
|||||||
@ -73,6 +73,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
|
|||||||
this.registerModels(models)
|
this.registerModels(models)
|
||||||
super.onLoad()
|
super.onLoad()
|
||||||
|
|
||||||
|
// Add additional dependencies PATH to the env
|
||||||
executeOnMain(NODE, 'addAdditionalDependencies', {
|
executeOnMain(NODE, 'addAdditionalDependencies', {
|
||||||
name: this.name,
|
name: this.name,
|
||||||
version: this.version,
|
version: this.version,
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
import { describe, expect, it } from '@jest/globals'
|
import { describe, expect, it } from '@jest/globals'
|
||||||
import { executableNitroFile } from './execute'
|
import { executableNitroFile } from './execute'
|
||||||
import { GpuSetting } from '@janhq/core'
|
import { GpuSetting } from '@janhq/core'
|
||||||
import { sep } from 'path'
|
import { cpuInfo } from 'cpu-instructions'
|
||||||
|
|
||||||
let testSettings: GpuSetting = {
|
let testSettings: GpuSetting = {
|
||||||
run_mode: 'cpu',
|
run_mode: 'cpu',
|
||||||
@ -22,6 +22,14 @@ let testSettings: GpuSetting = {
|
|||||||
}
|
}
|
||||||
const originalPlatform = process.platform
|
const originalPlatform = process.platform
|
||||||
|
|
||||||
|
jest.mock('cpu-instructions', () => ({
|
||||||
|
cpuInfo: {
|
||||||
|
cpuInfo: jest.fn(),
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
let mock = cpuInfo.cpuInfo as jest.Mock
|
||||||
|
mock.mockReturnValue([])
|
||||||
|
|
||||||
describe('test executable nitro file', () => {
|
describe('test executable nitro file', () => {
|
||||||
afterAll(function () {
|
afterAll(function () {
|
||||||
Object.defineProperty(process, 'platform', {
|
Object.defineProperty(process, 'platform', {
|
||||||
@ -38,17 +46,19 @@ describe('test executable nitro file', () => {
|
|||||||
})
|
})
|
||||||
expect(executableNitroFile(testSettings)).toEqual(
|
expect(executableNitroFile(testSettings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`),
|
enginePath: expect.stringContaining(`mac-arm64`),
|
||||||
|
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(),
|
||||||
cudaVisibleDevices: '',
|
cudaVisibleDevices: '',
|
||||||
vkVisibleDevices: '',
|
vkVisibleDevices: '',
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
Object.defineProperty(process, 'arch', {
|
Object.defineProperty(process, 'arch', {
|
||||||
value: 'amd64',
|
value: 'x64',
|
||||||
})
|
})
|
||||||
expect(executableNitroFile(testSettings)).toEqual(
|
expect(executableNitroFile(testSettings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`),
|
enginePath: expect.stringContaining(`mac-x64`),
|
||||||
|
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
|
||||||
cudaVisibleDevices: '',
|
cudaVisibleDevices: '',
|
||||||
vkVisibleDevices: '',
|
vkVisibleDevices: '',
|
||||||
})
|
})
|
||||||
@ -62,14 +72,11 @@ describe('test executable nitro file', () => {
|
|||||||
const settings: GpuSetting = {
|
const settings: GpuSetting = {
|
||||||
...testSettings,
|
...testSettings,
|
||||||
run_mode: 'cpu',
|
run_mode: 'cpu',
|
||||||
cuda: {
|
|
||||||
exist: true,
|
|
||||||
version: '11',
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableNitroFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`),
|
enginePath: expect.stringContaining(`win`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||||
cudaVisibleDevices: '',
|
cudaVisibleDevices: '',
|
||||||
vkVisibleDevices: '',
|
vkVisibleDevices: '',
|
||||||
})
|
})
|
||||||
@ -102,7 +109,8 @@ describe('test executable nitro file', () => {
|
|||||||
}
|
}
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableNitroFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`),
|
enginePath: expect.stringContaining(`win-cuda-11-7`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||||
cudaVisibleDevices: '0',
|
cudaVisibleDevices: '0',
|
||||||
vkVisibleDevices: '0',
|
vkVisibleDevices: '0',
|
||||||
})
|
})
|
||||||
@ -135,7 +143,8 @@ describe('test executable nitro file', () => {
|
|||||||
}
|
}
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableNitroFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`),
|
enginePath: expect.stringContaining(`win-cuda-12-0`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||||
cudaVisibleDevices: '0',
|
cudaVisibleDevices: '0',
|
||||||
vkVisibleDevices: '0',
|
vkVisibleDevices: '0',
|
||||||
})
|
})
|
||||||
@ -152,7 +161,8 @@ describe('test executable nitro file', () => {
|
|||||||
}
|
}
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableNitroFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`),
|
enginePath: expect.stringContaining(`linux`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||||
cudaVisibleDevices: '',
|
cudaVisibleDevices: '',
|
||||||
vkVisibleDevices: '',
|
vkVisibleDevices: '',
|
||||||
})
|
})
|
||||||
@ -185,7 +195,8 @@ describe('test executable nitro file', () => {
|
|||||||
}
|
}
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableNitroFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`),
|
enginePath: expect.stringContaining(`linux-cuda-11-7`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||||
cudaVisibleDevices: '0',
|
cudaVisibleDevices: '0',
|
||||||
vkVisibleDevices: '0',
|
vkVisibleDevices: '0',
|
||||||
})
|
})
|
||||||
@ -218,10 +229,203 @@ describe('test executable nitro file', () => {
|
|||||||
}
|
}
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableNitroFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`),
|
enginePath: expect.stringContaining(`linux-cuda-12-0`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||||
cudaVisibleDevices: '0',
|
cudaVisibleDevices: '0',
|
||||||
vkVisibleDevices: '0',
|
vkVisibleDevices: '0',
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Generate test for different cpu instructions on Linux
|
||||||
|
it(`executes on Linux CPU with different instructions`, () => {
|
||||||
|
Object.defineProperty(process, 'platform', {
|
||||||
|
value: 'linux',
|
||||||
|
})
|
||||||
|
const settings: GpuSetting = {
|
||||||
|
...testSettings,
|
||||||
|
run_mode: 'cpu',
|
||||||
|
}
|
||||||
|
|
||||||
|
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||||
|
cpuInstructions.forEach((instruction) => {
|
||||||
|
mock.mockReturnValue([instruction])
|
||||||
|
|
||||||
|
expect(executableNitroFile(settings)).toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
enginePath: expect.stringContaining(`linux-${instruction}`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||||
|
|
||||||
|
cudaVisibleDevices: '',
|
||||||
|
vkVisibleDevices: '',
|
||||||
|
})
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
// Generate test for different cpu instructions on Windows
|
||||||
|
it(`executes on Windows CPU with different instructions`, () => {
|
||||||
|
Object.defineProperty(process, 'platform', {
|
||||||
|
value: 'win32',
|
||||||
|
})
|
||||||
|
const settings: GpuSetting = {
|
||||||
|
...testSettings,
|
||||||
|
run_mode: 'cpu',
|
||||||
|
}
|
||||||
|
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||||
|
cpuInstructions.forEach((instruction) => {
|
||||||
|
mock.mockReturnValue([instruction])
|
||||||
|
expect(executableNitroFile(settings)).toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
enginePath: expect.stringContaining(`win-${instruction}`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||||
|
cudaVisibleDevices: '',
|
||||||
|
vkVisibleDevices: '',
|
||||||
|
})
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// Generate test for different cpu instructions on Windows
|
||||||
|
it(`executes on Windows GPU with different instructions`, () => {
|
||||||
|
Object.defineProperty(process, 'platform', {
|
||||||
|
value: 'win32',
|
||||||
|
})
|
||||||
|
const settings: GpuSetting = {
|
||||||
|
...testSettings,
|
||||||
|
run_mode: 'gpu',
|
||||||
|
cuda: {
|
||||||
|
exist: true,
|
||||||
|
version: '12',
|
||||||
|
},
|
||||||
|
nvidia_driver: {
|
||||||
|
exist: true,
|
||||||
|
version: '12',
|
||||||
|
},
|
||||||
|
gpus_in_use: ['0'],
|
||||||
|
gpus: [
|
||||||
|
{
|
||||||
|
id: '0',
|
||||||
|
name: 'NVIDIA GeForce GTX 1080',
|
||||||
|
vram: '80000000',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||||
|
cpuInstructions.forEach((instruction) => {
|
||||||
|
mock.mockReturnValue([instruction])
|
||||||
|
expect(executableNitroFile(settings)).toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
enginePath: expect.stringContaining(`win-cuda-12-0`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||||
|
cudaVisibleDevices: '0',
|
||||||
|
vkVisibleDevices: '0',
|
||||||
|
})
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// Generate test for different cpu instructions on Linux
|
||||||
|
it(`executes on Linux GPU with different instructions`, () => {
|
||||||
|
Object.defineProperty(process, 'platform', {
|
||||||
|
value: 'linux',
|
||||||
|
})
|
||||||
|
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||||
|
const settings: GpuSetting = {
|
||||||
|
...testSettings,
|
||||||
|
run_mode: 'gpu',
|
||||||
|
cuda: {
|
||||||
|
exist: true,
|
||||||
|
version: '12',
|
||||||
|
},
|
||||||
|
nvidia_driver: {
|
||||||
|
exist: true,
|
||||||
|
version: '12',
|
||||||
|
},
|
||||||
|
gpus_in_use: ['0'],
|
||||||
|
gpus: [
|
||||||
|
{
|
||||||
|
id: '0',
|
||||||
|
name: 'NVIDIA GeForce GTX 1080',
|
||||||
|
vram: '80000000',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
cpuInstructions.forEach((instruction) => {
|
||||||
|
mock.mockReturnValue([instruction])
|
||||||
|
expect(executableNitroFile(settings)).toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
enginePath: expect.stringContaining(`linux-cuda-12-0`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||||
|
cudaVisibleDevices: '0',
|
||||||
|
vkVisibleDevices: '0',
|
||||||
|
})
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// Generate test for different cpu instructions on Linux
|
||||||
|
it(`executes on Linux Vulkan should not have CPU instructions included`, () => {
|
||||||
|
Object.defineProperty(process, 'platform', {
|
||||||
|
value: 'linux',
|
||||||
|
})
|
||||||
|
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||||
|
const settings: GpuSetting = {
|
||||||
|
...testSettings,
|
||||||
|
run_mode: 'gpu',
|
||||||
|
vulkan: true,
|
||||||
|
cuda: {
|
||||||
|
exist: true,
|
||||||
|
version: '12',
|
||||||
|
},
|
||||||
|
nvidia_driver: {
|
||||||
|
exist: true,
|
||||||
|
version: '12',
|
||||||
|
},
|
||||||
|
gpus_in_use: ['0'],
|
||||||
|
gpus: [
|
||||||
|
{
|
||||||
|
id: '0',
|
||||||
|
name: 'NVIDIA GeForce GTX 1080',
|
||||||
|
vram: '80000000',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
cpuInstructions.forEach((instruction) => {
|
||||||
|
mock.mockReturnValue([instruction])
|
||||||
|
expect(executableNitroFile(settings)).toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
enginePath: expect.stringContaining(`linux-vulkan`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||||
|
cudaVisibleDevices: '0',
|
||||||
|
vkVisibleDevices: '0',
|
||||||
|
})
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// Generate test for different cpu instructions on MacOS
|
||||||
|
it(`executes on MacOS with different instructions`, () => {
|
||||||
|
Object.defineProperty(process, 'platform', {
|
||||||
|
value: 'darwin',
|
||||||
|
})
|
||||||
|
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||||
|
cpuInstructions.forEach(() => {
|
||||||
|
Object.defineProperty(process, 'platform', {
|
||||||
|
value: 'darwin',
|
||||||
|
})
|
||||||
|
const settings: GpuSetting = {
|
||||||
|
...testSettings,
|
||||||
|
run_mode: 'cpu',
|
||||||
|
}
|
||||||
|
mock.mockReturnValue([])
|
||||||
|
expect(executableNitroFile(settings)).toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
enginePath: expect.stringContaining(`mac-x64`),
|
||||||
|
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
|
||||||
|
cudaVisibleDevices: '',
|
||||||
|
vkVisibleDevices: '',
|
||||||
|
})
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@ -1,37 +1,59 @@
|
|||||||
import { GpuSetting } from '@janhq/core'
|
import { GpuSetting } from '@janhq/core'
|
||||||
import * as path from 'path'
|
import * as path from 'path'
|
||||||
|
import { cpuInfo } from 'cpu-instructions'
|
||||||
|
|
||||||
export interface NitroExecutableOptions {
|
export interface NitroExecutableOptions {
|
||||||
|
enginePath: string
|
||||||
executablePath: string
|
executablePath: string
|
||||||
cudaVisibleDevices: string
|
cudaVisibleDevices: string
|
||||||
vkVisibleDevices: string
|
vkVisibleDevices: string
|
||||||
}
|
}
|
||||||
const runMode = (settings?: GpuSetting): string => {
|
/**
|
||||||
|
* The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
|
||||||
|
* @param settings
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
|
const gpuRunMode = (settings?: GpuSetting): string => {
|
||||||
if (process.platform === 'darwin')
|
if (process.platform === 'darwin')
|
||||||
// MacOS now has universal binaries
|
// MacOS now has universal binaries
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
if (!settings) return 'cpu'
|
if (!settings) return ''
|
||||||
|
|
||||||
return settings.vulkan === true
|
return settings.vulkan === true
|
||||||
? 'vulkan'
|
? 'vulkan'
|
||||||
: settings.run_mode === 'cpu'
|
: settings.run_mode === 'cpu'
|
||||||
? 'cpu'
|
? ''
|
||||||
: 'cuda'
|
: 'cuda'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The OS & architecture that the current process is running on.
|
||||||
|
* @returns win, mac-x64, mac-arm64, or linux
|
||||||
|
*/
|
||||||
const os = (): string => {
|
const os = (): string => {
|
||||||
return process.platform === 'win32'
|
return process.platform === 'win32'
|
||||||
? 'win'
|
? 'win'
|
||||||
: process.platform === 'darwin'
|
: process.platform === 'darwin'
|
||||||
? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64'
|
? process.arch === 'arm64'
|
||||||
|
? 'mac-arm64'
|
||||||
|
: 'mac-x64'
|
||||||
: 'linux'
|
: 'linux'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The cortex.cpp extension based on the current platform.
|
||||||
|
* @returns .exe if on Windows, otherwise an empty string.
|
||||||
|
*/
|
||||||
const extension = (): '.exe' | '' => {
|
const extension = (): '.exe' | '' => {
|
||||||
return process.platform === 'win32' ? '.exe' : ''
|
return process.platform === 'win32' ? '.exe' : ''
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The CUDA version that will be set - either '11-7' or '12-0'.
|
||||||
|
* @param settings
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
|
const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
|
||||||
const isUsingCuda =
|
const isUsingCuda =
|
||||||
settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac'
|
settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac'
|
||||||
@ -40,6 +62,21 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
|
|||||||
return settings?.cuda?.version === '11' ? '11-7' : '12-0'
|
return settings?.cuda?.version === '11' ? '11-7' : '12-0'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
|
const cpuInstructions = () => {
|
||||||
|
if (process.platform === 'darwin') return ''
|
||||||
|
return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
|
||||||
|
? 'avx512'
|
||||||
|
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
|
||||||
|
? 'avx2'
|
||||||
|
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
|
||||||
|
? 'avx'
|
||||||
|
: 'noavx'
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find which executable file to run based on the current platform.
|
* Find which executable file to run based on the current platform.
|
||||||
* @returns The name of the executable file to run.
|
* @returns The name of the executable file to run.
|
||||||
@ -47,15 +84,26 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
|
|||||||
export const executableNitroFile = (
|
export const executableNitroFile = (
|
||||||
gpuSetting?: GpuSetting
|
gpuSetting?: GpuSetting
|
||||||
): NitroExecutableOptions => {
|
): NitroExecutableOptions => {
|
||||||
let binaryFolder = [os(), runMode(gpuSetting), cudaVersion(gpuSetting)]
|
let engineFolder = [
|
||||||
|
os(),
|
||||||
|
...(gpuSetting?.vulkan
|
||||||
|
? []
|
||||||
|
: [
|
||||||
|
gpuRunMode(gpuSetting) !== 'cuda' ? cpuInstructions() : '',
|
||||||
|
gpuRunMode(gpuSetting),
|
||||||
|
cudaVersion(gpuSetting),
|
||||||
|
]),
|
||||||
|
gpuSetting?.vulkan ? 'vulkan' : undefined,
|
||||||
|
]
|
||||||
.filter((e) => !!e)
|
.filter((e) => !!e)
|
||||||
.join('-')
|
.join('-')
|
||||||
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||||
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||||
let binaryName = `cortex-cpp${extension()}`
|
let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}`
|
||||||
|
|
||||||
return {
|
return {
|
||||||
executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName),
|
enginePath: path.join(__dirname, '..', 'bin', engineFolder),
|
||||||
|
executablePath: path.join(__dirname, '..', 'bin', binaryName),
|
||||||
cudaVisibleDevices,
|
cudaVisibleDevices,
|
||||||
vkVisibleDevices,
|
vkVisibleDevices,
|
||||||
}
|
}
|
||||||
|
|||||||
465
extensions/inference-nitro-extension/src/node/index.test.ts
Normal file
465
extensions/inference-nitro-extension/src/node/index.test.ts
Normal file
@ -0,0 +1,465 @@
|
|||||||
|
jest.mock('fetch-retry', () => ({
|
||||||
|
default: () => () => {
|
||||||
|
return Promise.resolve({
|
||||||
|
ok: true,
|
||||||
|
status: 200,
|
||||||
|
json: () =>
|
||||||
|
Promise.resolve({
|
||||||
|
model_loaded: true,
|
||||||
|
}),
|
||||||
|
text: () => Promise.resolve(''),
|
||||||
|
})
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
jest.mock('path', () => ({
|
||||||
|
default: {
|
||||||
|
isAbsolute: jest.fn(),
|
||||||
|
join: jest.fn(),
|
||||||
|
parse: () => {
|
||||||
|
return { dir: 'dir' }
|
||||||
|
},
|
||||||
|
delimiter: { concat: () => '' },
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
jest.mock('decompress', () => ({
|
||||||
|
default: () => {
|
||||||
|
return Promise.resolve()
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
jest.mock('@janhq/core/node', () => ({
|
||||||
|
...jest.requireActual('@janhq/core/node'),
|
||||||
|
getJanDataFolderPath: () => '',
|
||||||
|
getSystemResourceInfo: () => {
|
||||||
|
return {
|
||||||
|
cpu: {
|
||||||
|
cores: 1,
|
||||||
|
logicalCores: 1,
|
||||||
|
threads: 1,
|
||||||
|
model: 'model',
|
||||||
|
speed: 1,
|
||||||
|
},
|
||||||
|
memory: {
|
||||||
|
total: 1,
|
||||||
|
free: 1,
|
||||||
|
},
|
||||||
|
gpu: {
|
||||||
|
model: 'model',
|
||||||
|
memory: 1,
|
||||||
|
cuda: {
|
||||||
|
version: 'version',
|
||||||
|
devices: 'devices',
|
||||||
|
},
|
||||||
|
vulkan: {
|
||||||
|
version: 'version',
|
||||||
|
devices: 'devices',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
jest.mock('fs', () => ({
|
||||||
|
default: {
|
||||||
|
readdirSync: () => [],
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
jest.mock('child_process', () => ({
|
||||||
|
exec: () => {
|
||||||
|
return {
|
||||||
|
stdout: { on: jest.fn() },
|
||||||
|
stderr: { on: jest.fn() },
|
||||||
|
on: jest.fn(),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
spawn: () => {
|
||||||
|
return {
|
||||||
|
stdout: { on: jest.fn() },
|
||||||
|
stderr: { on: jest.fn() },
|
||||||
|
on: jest.fn(),
|
||||||
|
pid: '111',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
jest.mock('tcp-port-used', () => ({
|
||||||
|
default: {
|
||||||
|
waitUntilFree: () => Promise.resolve(true),
|
||||||
|
waitUntilUsed: () => Promise.resolve(true),
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
jest.mock('./execute', () => ({
|
||||||
|
executableNitroFile: () => {
|
||||||
|
return {
|
||||||
|
enginePath: 'enginePath',
|
||||||
|
executablePath: 'executablePath',
|
||||||
|
cudaVisibleDevices: 'cudaVisibleDevices',
|
||||||
|
vkVisibleDevices: 'vkVisibleDevices',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
jest.mock('terminate', () => ({
|
||||||
|
default: (id: String, func: Function) => {
|
||||||
|
console.log(id)
|
||||||
|
func()
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
import * as execute from './execute'
|
||||||
|
import index from './index'
|
||||||
|
|
||||||
|
let executeMock = execute
|
||||||
|
|
||||||
|
const modelInitOptions: any = {
|
||||||
|
modelFolder: '/path/to/model',
|
||||||
|
model: {
|
||||||
|
id: 'test',
|
||||||
|
name: 'test',
|
||||||
|
engine: 'nitro',
|
||||||
|
version: '0.0',
|
||||||
|
format: 'GGUF',
|
||||||
|
object: 'model',
|
||||||
|
sources: [],
|
||||||
|
created: 0,
|
||||||
|
description: 'test',
|
||||||
|
parameters: {},
|
||||||
|
metadata: {
|
||||||
|
author: '',
|
||||||
|
tags: [],
|
||||||
|
size: 0,
|
||||||
|
},
|
||||||
|
settings: {
|
||||||
|
prompt_template: '{prompt}',
|
||||||
|
llama_model_path: 'model.gguf',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('loadModel', () => {
|
||||||
|
it('should load a model successfully', async () => {
|
||||||
|
// Mock the necessary parameters and system information
|
||||||
|
|
||||||
|
const systemInfo = {
|
||||||
|
// Mock the system information if needed
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call the loadModel function
|
||||||
|
const result = await index.loadModel(modelInitOptions, systemInfo)
|
||||||
|
|
||||||
|
// Assert that the result is as expected
|
||||||
|
expect(result).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should reject with an error message if the model is not a nitro model', async () => {
|
||||||
|
// Mock the necessary parameters and system information
|
||||||
|
|
||||||
|
const systemInfo = {
|
||||||
|
// Mock the system information if needed
|
||||||
|
}
|
||||||
|
modelInitOptions.model.engine = 'not-nitro'
|
||||||
|
// Call the loadModel function
|
||||||
|
try {
|
||||||
|
await index.loadModel(modelInitOptions, systemInfo)
|
||||||
|
} catch (error) {
|
||||||
|
// Assert that the error message is as expected
|
||||||
|
expect(error).toBe('Not a cortex model')
|
||||||
|
}
|
||||||
|
modelInitOptions.model.engine = 'nitro'
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should reject if model load failed with an error message', async () => {
|
||||||
|
// Mock the necessary parameters and system information
|
||||||
|
|
||||||
|
const systemInfo = {
|
||||||
|
// Mock the system information if needed
|
||||||
|
}
|
||||||
|
// Mock the fetch-retry module to return a failed response
|
||||||
|
jest.mock('fetch-retry', () => ({
|
||||||
|
default: () => () => {
|
||||||
|
return Promise.resolve({
|
||||||
|
ok: false,
|
||||||
|
status: 500,
|
||||||
|
json: () =>
|
||||||
|
Promise.resolve({
|
||||||
|
model_loaded: false,
|
||||||
|
}),
|
||||||
|
text: () => Promise.resolve('Failed to load model'),
|
||||||
|
})
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
// Call the loadModel function
|
||||||
|
try {
|
||||||
|
await index.loadModel(modelInitOptions, systemInfo)
|
||||||
|
} catch (error) {
|
||||||
|
// Assert that the error message is as expected
|
||||||
|
expect(error).toBe('Failed to load model')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should reject if port not available', async () => {
|
||||||
|
// Mock the necessary parameters and system information
|
||||||
|
|
||||||
|
const systemInfo = {
|
||||||
|
// Mock the system information if needed
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mock the tcp-port-used module to return false
|
||||||
|
jest.mock('tcp-port-used', () => ({
|
||||||
|
default: {
|
||||||
|
waitUntilFree: () => Promise.resolve(false),
|
||||||
|
waitUntilUsed: () => Promise.resolve(false),
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
// Call the loadModel function
|
||||||
|
try {
|
||||||
|
await index.loadModel(modelInitOptions, systemInfo)
|
||||||
|
} catch (error) {
|
||||||
|
// Assert that the error message is as expected
|
||||||
|
expect(error).toBe('Port not available')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should run on GPU model if ngl is set', async () => {
|
||||||
|
const systemInfo: any = {
|
||||||
|
gpuSetting: {
|
||||||
|
run_mode: 'gpu',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
// Spy executableNitroFile
|
||||||
|
jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
|
||||||
|
enginePath: '',
|
||||||
|
executablePath: '',
|
||||||
|
cudaVisibleDevices: '',
|
||||||
|
vkVisibleDevices: '',
|
||||||
|
})
|
||||||
|
|
||||||
|
Object.defineProperty(process, 'platform', { value: 'win32' })
|
||||||
|
await index.loadModel(
|
||||||
|
{
|
||||||
|
...modelInitOptions,
|
||||||
|
model: {
|
||||||
|
...modelInitOptions.model,
|
||||||
|
settings: {
|
||||||
|
...modelInitOptions.model.settings,
|
||||||
|
ngl: 40,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
systemInfo
|
||||||
|
)
|
||||||
|
expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
|
||||||
|
run_mode: 'gpu',
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should run on correct CPU instructions if ngl is not set', async () => {
|
||||||
|
const systemInfo: any = {
|
||||||
|
gpuSetting: {
|
||||||
|
run_mode: 'gpu',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
// Spy executableNitroFile
|
||||||
|
jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
|
||||||
|
enginePath: '',
|
||||||
|
executablePath: '',
|
||||||
|
cudaVisibleDevices: '',
|
||||||
|
vkVisibleDevices: '',
|
||||||
|
})
|
||||||
|
|
||||||
|
Object.defineProperty(process, 'platform', { value: 'win32' })
|
||||||
|
await index.loadModel(
|
||||||
|
{
|
||||||
|
...modelInitOptions,
|
||||||
|
model: {
|
||||||
|
...modelInitOptions.model,
|
||||||
|
settings: {
|
||||||
|
...modelInitOptions.model.settings,
|
||||||
|
ngl: undefined,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
systemInfo
|
||||||
|
)
|
||||||
|
expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
|
||||||
|
run_mode: 'cpu',
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should run on correct CPU instructions if ngl is 0', async () => {
|
||||||
|
const systemInfo: any = {
|
||||||
|
gpuSetting: {
|
||||||
|
run_mode: 'gpu',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
// Spy executableNitroFile
|
||||||
|
jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
|
||||||
|
enginePath: '',
|
||||||
|
executablePath: '',
|
||||||
|
cudaVisibleDevices: '',
|
||||||
|
vkVisibleDevices: '',
|
||||||
|
})
|
||||||
|
|
||||||
|
Object.defineProperty(process, 'platform', { value: 'win32' })
|
||||||
|
await index.loadModel(
|
||||||
|
{
|
||||||
|
...modelInitOptions,
|
||||||
|
model: {
|
||||||
|
...modelInitOptions.model,
|
||||||
|
settings: {
|
||||||
|
...modelInitOptions.model.settings,
|
||||||
|
ngl: 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
systemInfo
|
||||||
|
)
|
||||||
|
expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
|
||||||
|
run_mode: 'cpu',
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('unloadModel', () => {
|
||||||
|
it('should unload a model successfully', async () => {
|
||||||
|
// Call the unloadModel function
|
||||||
|
const result = await index.unloadModel()
|
||||||
|
|
||||||
|
// Assert that the result is as expected
|
||||||
|
expect(result).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should reject with an error message if the model is not a nitro model', async () => {
|
||||||
|
// Call the unloadModel function
|
||||||
|
try {
|
||||||
|
await index.unloadModel()
|
||||||
|
} catch (error) {
|
||||||
|
// Assert that the error message is as expected
|
||||||
|
expect(error).toBe('Not a cortex model')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should reject if model unload failed with an error message', async () => {
|
||||||
|
// Mock the fetch-retry module to return a failed response
|
||||||
|
jest.mock('fetch-retry', () => ({
|
||||||
|
default: () => () => {
|
||||||
|
return Promise.resolve({
|
||||||
|
ok: false,
|
||||||
|
status: 500,
|
||||||
|
json: () =>
|
||||||
|
Promise.resolve({
|
||||||
|
model_unloaded: false,
|
||||||
|
}),
|
||||||
|
text: () => Promise.resolve('Failed to unload model'),
|
||||||
|
})
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
// Call the unloadModel function
|
||||||
|
try {
|
||||||
|
await index.unloadModel()
|
||||||
|
} catch (error) {
|
||||||
|
// Assert that the error message is as expected
|
||||||
|
expect(error).toBe('Failed to unload model')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should reject if port not available', async () => {
|
||||||
|
// Mock the tcp-port-used module to return false
|
||||||
|
jest.mock('tcp-port-used', () => ({
|
||||||
|
default: {
|
||||||
|
waitUntilFree: () => Promise.resolve(false),
|
||||||
|
waitUntilUsed: () => Promise.resolve(false),
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
// Call the unloadModel function
|
||||||
|
try {
|
||||||
|
await index.unloadModel()
|
||||||
|
} catch (error) {
|
||||||
|
// Assert that the error message is as expected
|
||||||
|
expect(error).toBe('Port not available')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
describe('dispose', () => {
|
||||||
|
it('should dispose a model successfully on Mac', async () => {
|
||||||
|
Object.defineProperty(process, 'platform', {
|
||||||
|
value: 'darwin',
|
||||||
|
})
|
||||||
|
|
||||||
|
// Call the dispose function
|
||||||
|
const result = await index.dispose()
|
||||||
|
|
||||||
|
// Assert that the result is as expected
|
||||||
|
expect(result).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should kill the subprocess successfully on Windows', async () => {
|
||||||
|
Object.defineProperty(process, 'platform', {
|
||||||
|
value: 'win32',
|
||||||
|
})
|
||||||
|
|
||||||
|
// Call the killSubprocess function
|
||||||
|
const result = await index.dispose()
|
||||||
|
|
||||||
|
// Assert that the result is as expected
|
||||||
|
expect(result).toBeUndefined()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('getCurrentNitroProcessInfo', () => {
|
||||||
|
it('should return the current nitro process info', async () => {
|
||||||
|
// Call the getCurrentNitroProcessInfo function
|
||||||
|
const result = await index.getCurrentNitroProcessInfo()
|
||||||
|
|
||||||
|
// Assert that the result is as expected
|
||||||
|
expect(result).toEqual({
|
||||||
|
isRunning: true,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('decompressRunner', () => {
|
||||||
|
it('should decompress the runner successfully', async () => {
|
||||||
|
jest.mock('decompress', () => ({
|
||||||
|
default: () => {
|
||||||
|
return Promise.resolve()
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
// Call the decompressRunner function
|
||||||
|
const result = await index.decompressRunner('', '')
|
||||||
|
|
||||||
|
// Assert that the result is as expected
|
||||||
|
expect(result).toBeUndefined()
|
||||||
|
})
|
||||||
|
it('should not reject if decompression failed', async () => {
|
||||||
|
jest.mock('decompress', () => ({
|
||||||
|
default: () => {
|
||||||
|
return Promise.reject('Failed to decompress')
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
// Call the decompressRunner function
|
||||||
|
const result = await index.decompressRunner('', '')
|
||||||
|
expect(result).toBeUndefined()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('addAdditionalDependencies', () => {
|
||||||
|
it('should add additional dependencies successfully', async () => {
|
||||||
|
// Call the addAdditionalDependencies function
|
||||||
|
const result = await index.addAdditionalDependencies({
|
||||||
|
name: 'name',
|
||||||
|
version: 'version',
|
||||||
|
})
|
||||||
|
|
||||||
|
// Assert that the result is as expected
|
||||||
|
expect(result).toBeUndefined()
|
||||||
|
})
|
||||||
|
})
|
||||||
@ -266,7 +266,7 @@ async function validateModelStatus(modelId: string): Promise<void> {
|
|||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: modelId,
|
model: modelId,
|
||||||
// TODO: force to use cortex llamacpp by default
|
// TODO: force to use cortex llamacpp by default
|
||||||
engine: 'cortex.llamacpp'
|
engine: 'cortex.llamacpp',
|
||||||
}),
|
}),
|
||||||
headers: {
|
headers: {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
@ -365,14 +365,37 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
|
|||||||
log(`[CORTEX]::Debug: Spawning cortex subprocess...`)
|
log(`[CORTEX]::Debug: Spawning cortex subprocess...`)
|
||||||
|
|
||||||
return new Promise<void>(async (resolve, reject) => {
|
return new Promise<void>(async (resolve, reject) => {
|
||||||
let executableOptions = executableNitroFile(systemInfo?.gpuSetting)
|
let executableOptions = executableNitroFile(
|
||||||
|
// If ngl is not set or equal to 0, run on CPU with correct instructions
|
||||||
|
systemInfo?.gpuSetting
|
||||||
|
? {
|
||||||
|
...systemInfo.gpuSetting,
|
||||||
|
run_mode:
|
||||||
|
currentSettings?.ngl === undefined || currentSettings.ngl === 0
|
||||||
|
? 'cpu'
|
||||||
|
: systemInfo.gpuSetting.run_mode,
|
||||||
|
}
|
||||||
|
: undefined
|
||||||
|
)
|
||||||
|
|
||||||
const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
|
const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
|
||||||
// Execute the binary
|
// Execute the binary
|
||||||
log(
|
log(
|
||||||
`[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
|
`[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||||
)
|
)
|
||||||
log(path.parse(executableOptions.executablePath).dir)
|
log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`)
|
||||||
|
|
||||||
|
// Add engine path to the PATH and LD_LIBRARY_PATH
|
||||||
|
process.env.PATH = (process.env.PATH || '').concat(
|
||||||
|
path.delimiter,
|
||||||
|
executableOptions.enginePath
|
||||||
|
)
|
||||||
|
log(`[CORTEX] PATH: ${process.env.PATH}`)
|
||||||
|
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
|
||||||
|
path.delimiter,
|
||||||
|
executableOptions.enginePath
|
||||||
|
)
|
||||||
|
|
||||||
subprocess = spawn(
|
subprocess = spawn(
|
||||||
executableOptions.executablePath,
|
executableOptions.executablePath,
|
||||||
['1', LOCAL_HOST, PORT.toString()],
|
['1', LOCAL_HOST, PORT.toString()],
|
||||||
@ -380,6 +403,7 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
|
|||||||
cwd: path.join(path.parse(executableOptions.executablePath).dir),
|
cwd: path.join(path.parse(executableOptions.executablePath).dir),
|
||||||
env: {
|
env: {
|
||||||
...process.env,
|
...process.env,
|
||||||
|
ENGINE_PATH: executableOptions.enginePath,
|
||||||
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
||||||
// Vulkan - Support 1 device at a time for now
|
// Vulkan - Support 1 device at a time for now
|
||||||
...(executableOptions.vkVisibleDevices?.length > 0 && {
|
...(executableOptions.vkVisibleDevices?.length > 0 && {
|
||||||
@ -440,12 +464,19 @@ const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const addAdditionalDependencies = (data: { name: string; version: string }) => {
|
const addAdditionalDependencies = (data: { name: string; version: string }) => {
|
||||||
|
log(
|
||||||
|
`[CORTEX]::Debug: Adding additional dependencies for ${data.name} ${data.version}`
|
||||||
|
)
|
||||||
const additionalPath = path.delimiter.concat(
|
const additionalPath = path.delimiter.concat(
|
||||||
path.join(getJanDataFolderPath(), 'engines', data.name, data.version)
|
path.join(getJanDataFolderPath(), 'engines', data.name, data.version)
|
||||||
)
|
)
|
||||||
// Set the updated PATH
|
// Set the updated PATH
|
||||||
process.env.PATH = (process.env.PATH || '').concat(additionalPath)
|
process.env.PATH = (process.env.PATH || '').concat(
|
||||||
|
path.delimiter,
|
||||||
|
additionalPath
|
||||||
|
)
|
||||||
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
|
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
|
||||||
|
path.delimiter,
|
||||||
additionalPath
|
additionalPath
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -15,5 +15,6 @@
|
|||||||
"importHelpers": true,
|
"importHelpers": true,
|
||||||
"typeRoots": ["node_modules/@types"]
|
"typeRoots": ["node_modules/@types"]
|
||||||
},
|
},
|
||||||
"include": ["src"]
|
"include": ["src"],
|
||||||
|
"exclude": ["src/**/*.test.ts"]
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user