chore: replace nitro by cortex-cpp (#2912)

This commit is contained in:
Louis 2024-05-16 17:46:49 +07:00 committed by GitHub
parent 218259945f
commit 537ef20a54
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 96 additions and 71 deletions

View File

@ -9,11 +9,11 @@ export const SUPPORTED_MODEL_FORMAT = '.gguf'
// The URL for the Nitro subprocess // The URL for the Nitro subprocess
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}` const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
// The URL for the Nitro subprocess to load a model // The URL for the Nitro subprocess to load a model
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel` export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
// The URL for the Nitro subprocess to validate a model // The URL for the Nitro subprocess to validate a model
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus` export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
// The URL for the Nitro subprocess to kill itself // The URL for the Nitro subprocess to kill itself
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url

View File

@ -144,12 +144,12 @@ const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSe
} }
const spawnNitroProcess = async (): Promise<void> => { const spawnNitroProcess = async (): Promise<void> => {
log(`[SERVER]::Debug: Spawning Nitro subprocess...`) log(`[SERVER]::Debug: Spawning cortex subprocess...`)
let binaryFolder = join( let binaryFolder = join(
getJanExtensionsPath(), getJanExtensionsPath(),
'@janhq', '@janhq',
'inference-nitro-extension', 'inference-cortex-extension',
'dist', 'dist',
'bin' 'bin'
) )
@ -160,7 +160,7 @@ const spawnNitroProcess = async (): Promise<void> => {
const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()] const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
// Execute the binary // Execute the binary
log( log(
`[SERVER]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}` `[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
) )
subprocess = spawn( subprocess = spawn(
executableOptions.executablePath, executableOptions.executablePath,
@ -184,12 +184,12 @@ const spawnNitroProcess = async (): Promise<void> => {
}) })
subprocess.on('close', (code: any) => { subprocess.on('close', (code: any) => {
log(`[SERVER]::Debug: Nitro exited with code: ${code}`) log(`[SERVER]::Debug: cortex exited with code: ${code}`)
subprocess = undefined subprocess = undefined
}) })
tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => { tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
log(`[SERVER]::Debug: Nitro is ready`) log(`[SERVER]::Debug: cortex is ready`)
}) })
} }
@ -203,13 +203,13 @@ const executableNitroFile = (): NitroExecutableOptions => {
let binaryFolder = join( let binaryFolder = join(
getJanExtensionsPath(), getJanExtensionsPath(),
'@janhq', '@janhq',
'inference-nitro-extension', 'inference-cortex-extension',
'dist', 'dist',
'bin' 'bin'
) )
let cudaVisibleDevices = '' let cudaVisibleDevices = ''
let binaryName = 'nitro' let binaryName = 'cortex-cpp'
/** /**
* The binary folder is different for each platform. * The binary folder is different for each platform.
*/ */
@ -228,12 +228,16 @@ const executableNitroFile = (): NitroExecutableOptions => {
} }
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram'] cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
} }
binaryName = 'nitro.exe' binaryName = 'cortex-cpp.exe'
} else if (process.platform === 'darwin') { } else if (process.platform === 'darwin') {
/** /**
* For MacOS: mac-universal both Silicon and InteL * For MacOS: mac-universal both Silicon and InteL
*/ */
binaryFolder = join(binaryFolder, 'mac-universal') if(process.arch === 'arm64') {
binaryFolder = join(binaryFolder, 'mac-arm64')
} else {
binaryFolder = join(binaryFolder, 'mac-amd64')
}
} else { } else {
/** /**
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0 * For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
@ -300,7 +304,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
retryDelay: 500, retryDelay: 500,
}) })
.then((res: any) => { .then((res: any) => {
log(`[SERVER]::Debug: Load model success with response ${JSON.stringify(res)}`) log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`)
return Promise.resolve(res) return Promise.resolve(res)
}) })
.catch((err: any) => { .catch((err: any) => {
@ -327,7 +331,7 @@ export const stopModel = async (_modelId: string) => {
}) })
}, 5000) }, 5000)
const tcpPortUsed = require('tcp-port-used') const tcpPortUsed = require('tcp-port-used')
log(`[SERVER]::Debug: Request to kill Nitro`) log(`[SERVER]::Debug: Request to kill cortex`)
fetch(NITRO_HTTP_KILL_URL, { fetch(NITRO_HTTP_KILL_URL, {
method: 'DELETE', method: 'DELETE',

View File

@ -4,7 +4,7 @@ import { log } from './logger'
export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => { export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
const cpu = await physicalCpuCount() const cpu = await physicalCpuCount()
log(`[NITRO]::CPU information - ${cpu}`) log(`[CORTEX]::CPU information - ${cpu}`)
return { return {
numCpuPhysicalCore: cpu, numCpuPhysicalCore: cpu,

View File

@ -10,11 +10,12 @@ export function toolRetrievalUpdateTextSplitter(
} }
export async function toolRetrievalIngestNewDocument( export async function toolRetrievalIngestNewDocument(
file: string, file: string,
model: string,
engine: string engine: string
) { ) {
const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file)) const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file))
const threadPath = path.dirname(filePath.replace('files', '')) const threadPath = path.dirname(filePath.replace('files', ''))
retrieval.updateEmbeddingEngine(engine) retrieval.updateEmbeddingEngine(model, engine)
return retrieval return retrieval
.ingestAgentKnowledge(filePath, `${threadPath}/memory`) .ingestAgentKnowledge(filePath, `${threadPath}/memory`)
.catch((err) => { .catch((err) => {

View File

@ -28,14 +28,14 @@ export class Retrieval {
}) })
} }
public updateEmbeddingEngine(engine: string): void { public updateEmbeddingEngine(model: string, engine: string): void {
// Engine settings are not compatible with the current embedding model params // Engine settings are not compatible with the current embedding model params
// Switch case manually for now // Switch case manually for now
if (engine === 'nitro') { if (engine === 'nitro') {
this.embeddingModel = new OpenAIEmbeddings( this.embeddingModel = new OpenAIEmbeddings(
{ openAIApiKey: 'nitro-embedding' }, { openAIApiKey: 'nitro-embedding', model },
// TODO: Raw settings // TODO: Raw settings
{ basePath: 'http://127.0.0.1:3928/v1' } { basePath: 'http://127.0.0.1:3928/v1' },
) )
} else { } else {
// Fallback to OpenAI Settings // Fallback to OpenAI Settings

View File

@ -36,6 +36,7 @@ export class RetrievalTool extends InferenceTool {
NODE, NODE,
'toolRetrievalIngestNewDocument', 'toolRetrievalIngestNewDocument',
docFile, docFile,
data.model?.id,
data.model?.engine data.model?.engine
) )
} else { } else {

View File

@ -0,0 +1,2 @@
bin
!version.txt

View File

@ -1 +1 @@
0.3.22 0.4.4

View File

@ -1,3 +1,3 @@
@echo off @echo off
set /p NITRO_VERSION=<./bin/version.txt set /p CORTEX_VERSION=<./bin/version.txt
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan

View File

@ -1,8 +1,8 @@
{ {
"name": "@janhq/inference-nitro-extension", "name": "@janhq/inference-cortex-extension",
"productName": "Nitro Inference Engine", "productName": "Cortex Inference Engine",
"version": "1.0.7", "version": "1.0.7",
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js", "main": "dist/index.js",
"node": "dist/node/index.cjs.js", "node": "dist/node/index.cjs.js",
"author": "Jan <service@jan.ai>", "author": "Jan <service@jan.ai>",
@ -10,8 +10,8 @@
"scripts": { "scripts": {
"test": "jest", "test": "jest",
"build": "tsc --module commonjs && rollup -c rollup.config.ts", "build": "tsc --module commonjs && rollup -c rollup.config.ts",
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro", "downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp",
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-universal.tar.gz -o ./bin/ && mkdir -p ./bin/mac-universal && tar -zxvf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz --strip-components=1 -C ./bin/mac-universal && rm -rf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz && chmod +x ./bin/mac-universal/nitro", "downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp",
"downloadnitro:win32": "download.bat", "downloadnitro:win32": "download.bat",
"downloadnitro": "run-script-os", "downloadnitro": "run-script-os",
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",

View File

@ -80,7 +80,7 @@ export default [
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
INFERENCE_URL: JSON.stringify( INFERENCE_URL: JSON.stringify(
process.env.INFERENCE_URL || process.env.INFERENCE_URL ||
'http://127.0.0.1:3928/inferences/llamacpp/chat_completion' 'http://127.0.0.1:3928/inferences/server/chat_completion'
), ),
TROUBLESHOOTING_URL: JSON.stringify( TROUBLESHOOTING_URL: JSON.stringify(
'https://jan.ai/guides/troubleshooting' 'https://jan.ai/guides/troubleshooting'

View File

@ -130,7 +130,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
const executableFolderPath = await joinPath([ const executableFolderPath = await joinPath([
janDataFolderPath, janDataFolderPath,
'engines', 'engines',
this.name ?? 'nitro', this.name ?? 'cortex-cpp',
this.version ?? '1.0.0', this.version ?? '1.0.0',
]) ])
@ -179,7 +179,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
const executableFolderPath = await joinPath([ const executableFolderPath = await joinPath([
janDataFolderPath, janDataFolderPath,
'engines', 'engines',
this.name ?? 'nitro', this.name ?? 'cortex-cpp',
this.version ?? '1.0.0', this.version ?? '1.0.0',
]) ])

View File

@ -33,9 +33,22 @@ describe('test executable nitro file', () => {
Object.defineProperty(process, 'platform', { Object.defineProperty(process, 'platform', {
value: 'darwin', value: 'darwin',
}) })
Object.defineProperty(process, 'arch', {
value: 'arm64',
})
expect(executableNitroFile(testSettings)).toEqual( expect(executableNitroFile(testSettings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`mac-universal${sep}nitro`), executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
)
Object.defineProperty(process, 'arch', {
value: 'amd64',
})
expect(executableNitroFile(testSettings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`),
cudaVisibleDevices: '', cudaVisibleDevices: '',
vkVisibleDevices: '', vkVisibleDevices: '',
}) })
@ -56,7 +69,7 @@ describe('test executable nitro file', () => {
} }
expect(executableNitroFile(settings)).toEqual( expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`win-cpu${sep}nitro.exe`), executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`),
cudaVisibleDevices: '', cudaVisibleDevices: '',
vkVisibleDevices: '', vkVisibleDevices: '',
}) })
@ -89,7 +102,7 @@ describe('test executable nitro file', () => {
} }
expect(executableNitroFile(settings)).toEqual( expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`win-cuda-11-7${sep}nitro.exe`), executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })
@ -122,7 +135,7 @@ describe('test executable nitro file', () => {
} }
expect(executableNitroFile(settings)).toEqual( expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`win-cuda-12-0${sep}nitro.exe`), executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })
@ -139,7 +152,7 @@ describe('test executable nitro file', () => {
} }
expect(executableNitroFile(settings)).toEqual( expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`linux-cpu${sep}nitro`), executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`),
cudaVisibleDevices: '', cudaVisibleDevices: '',
vkVisibleDevices: '', vkVisibleDevices: '',
}) })
@ -172,7 +185,7 @@ describe('test executable nitro file', () => {
} }
expect(executableNitroFile(settings)).toEqual( expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`linux-cuda-11-7${sep}nitro`), executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })
@ -205,7 +218,7 @@ describe('test executable nitro file', () => {
} }
expect(executableNitroFile(settings)).toEqual( expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({ expect.objectContaining({
executablePath: expect.stringContaining(`linux-cuda-12-0${sep}nitro`), executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`),
cudaVisibleDevices: '0', cudaVisibleDevices: '0',
vkVisibleDevices: '0', vkVisibleDevices: '0',
}) })

View File

@ -1,4 +1,4 @@
import { GpuSetting, SystemInformation } from '@janhq/core' import { GpuSetting } from '@janhq/core'
import * as path from 'path' import * as path from 'path'
export interface NitroExecutableOptions { export interface NitroExecutableOptions {
@ -24,7 +24,7 @@ const os = (): string => {
return process.platform === 'win32' return process.platform === 'win32'
? 'win' ? 'win'
: process.platform === 'darwin' : process.platform === 'darwin'
? 'mac-universal' ? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64'
: 'linux' : 'linux'
} }
@ -52,7 +52,7 @@ export const executableNitroFile = (
.join('-') .join('-')
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
let binaryName = `nitro${extension()}` let binaryName = `cortex-cpp${extension()}`
return { return {
executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName), executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName),

View File

@ -34,9 +34,9 @@ const LOCAL_HOST = '127.0.0.1'
// The URL for the Nitro subprocess // The URL for the Nitro subprocess
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}` const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
// The URL for the Nitro subprocess to load a model // The URL for the Nitro subprocess to load a model
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel` const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
// The URL for the Nitro subprocess to validate a model // The URL for the Nitro subprocess to validate a model
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus` const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
// The URL for the Nitro subprocess to kill itself // The URL for the Nitro subprocess to kill itself
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
@ -50,7 +50,7 @@ const SUPPORTED_MODEL_FORMAT = '.gguf'
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
// The current model settings // The current model settings
let currentSettings: ModelSettingParams | undefined = undefined let currentSettings: ModelSettingParams & { model?: string } | undefined = undefined
/** /**
* Stops a Nitro subprocess. * Stops a Nitro subprocess.
@ -77,7 +77,7 @@ async function loadModel(
} }
if (params.model.engine !== InferenceEngine.nitro) { if (params.model.engine !== InferenceEngine.nitro) {
return Promise.reject('Not a nitro model') return Promise.reject('Not a cortex model')
} else { } else {
const nitroResourceProbe = await getSystemResourceInfo() const nitroResourceProbe = await getSystemResourceInfo()
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
@ -135,6 +135,7 @@ async function loadModel(
// model.settings can override the default settings // model.settings can override the default settings
...params.model.settings, ...params.model.settings,
llama_model_path, llama_model_path,
model: params.model.id,
// This is critical and requires real CPU physical core count (or performance core) // This is critical and requires real CPU physical core count (or performance core)
...(params.model.settings.mmproj && { ...(params.model.settings.mmproj && {
mmproj: path.isAbsolute(params.model.settings.mmproj) mmproj: path.isAbsolute(params.model.settings.mmproj)
@ -142,7 +143,7 @@ async function loadModel(
: path.join(modelFolder, params.model.settings.mmproj), : path.join(modelFolder, params.model.settings.mmproj),
}), }),
} }
return runNitroAndLoadModel(systemInfo) return runNitroAndLoadModel(params.model.id, systemInfo)
} }
} }
@ -152,7 +153,7 @@ async function loadModel(
* 3. Validate model status * 3. Validate model status
* @returns * @returns
*/ */
async function runNitroAndLoadModel(systemInfo?: SystemInformation) { async function runNitroAndLoadModel(modelId: string, systemInfo?: SystemInformation) {
// Gather system information for CPU physical cores and memory // Gather system information for CPU physical cores and memory
return killSubprocess() return killSubprocess()
.then(() => .then(() =>
@ -160,10 +161,10 @@ async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
) )
.then(() => spawnNitroProcess(systemInfo)) .then(() => spawnNitroProcess(systemInfo))
.then(() => loadLLMModel(currentSettings)) .then(() => loadLLMModel(currentSettings))
.then(validateModelStatus) .then(() => validateModelStatus(modelId))
.catch((err) => { .catch((err) => {
// TODO: Broadcast error so app could display proper error message // TODO: Broadcast error so app could display proper error message
log(`[NITRO]::Error: ${err}`) log(`[CORTEX]::Error: ${err}`)
return { error: err } return { error: err }
}) })
} }
@ -222,7 +223,7 @@ function loadLLMModel(settings: any): Promise<Response> {
if (!settings?.ngl) { if (!settings?.ngl) {
settings.ngl = 100 settings.ngl = 100
} }
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`) log(`[CORTEX]::Debug: Loading model with params ${JSON.stringify(settings)}`)
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: 'POST', method: 'POST',
headers: { headers: {
@ -234,14 +235,14 @@ function loadLLMModel(settings: any): Promise<Response> {
}) })
.then((res) => { .then((res) => {
log( log(
`[NITRO]::Debug: Load model success with response ${JSON.stringify( `[CORTEX]::Debug: Load model success with response ${JSON.stringify(
res res
)}` )}`
) )
return Promise.resolve(res) return Promise.resolve(res)
}) })
.catch((err) => { .catch((err) => {
log(`[NITRO]::Error: Load model failed with error ${err}`) log(`[CORTEX]::Error: Load model failed with error ${err}`)
return Promise.reject(err) return Promise.reject(err)
}) })
} }
@ -252,11 +253,12 @@ function loadLLMModel(settings: any): Promise<Response> {
* If the model is loaded successfully, the object is empty. * If the model is loaded successfully, the object is empty.
* If the model is not loaded successfully, the object contains an error message. * If the model is not loaded successfully, the object contains an error message.
*/ */
async function validateModelStatus(): Promise<void> { async function validateModelStatus(modelId: string): Promise<void> {
// Send a GET request to the validation URL. // Send a GET request to the validation URL.
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries. // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
method: 'GET', method: 'POST',
body: JSON.stringify({ model: modelId }),
headers: { headers: {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}, },
@ -264,7 +266,7 @@ async function validateModelStatus(): Promise<void> {
retryDelay: 300, retryDelay: 300,
}).then(async (res: Response) => { }).then(async (res: Response) => {
log( log(
`[NITRO]::Debug: Validate model state with response ${JSON.stringify( `[CORTEX]::Debug: Validate model state with response ${JSON.stringify(
res.status res.status
)}` )}`
) )
@ -275,7 +277,7 @@ async function validateModelStatus(): Promise<void> {
// Otherwise, return an object with an error message. // Otherwise, return an object with an error message.
if (body.model_loaded) { if (body.model_loaded) {
log( log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify( `[CORTEX]::Debug: Validate model state success with response ${JSON.stringify(
body body
)}` )}`
) )
@ -283,7 +285,7 @@ async function validateModelStatus(): Promise<void> {
} }
} }
log( log(
`[NITRO]::Debug: Validate model state failed with response ${JSON.stringify( `[CORTEX]::Debug: Validate model state failed with response ${JSON.stringify(
res.statusText res.statusText
)}` )}`
) )
@ -298,7 +300,7 @@ async function validateModelStatus(): Promise<void> {
async function killSubprocess(): Promise<void> { async function killSubprocess(): Promise<void> {
const controller = new AbortController() const controller = new AbortController()
setTimeout(() => controller.abort(), 5000) setTimeout(() => controller.abort(), 5000)
log(`[NITRO]::Debug: Request to kill Nitro`) log(`[CORTEX]::Debug: Request to kill cortex`)
const killRequest = () => { const killRequest = () => {
return fetch(NITRO_HTTP_KILL_URL, { return fetch(NITRO_HTTP_KILL_URL, {
@ -309,17 +311,17 @@ async function killSubprocess(): Promise<void> {
.then(() => .then(() =>
tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
) )
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
.catch((err) => { .catch((err) => {
log( log(
`[NITRO]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}` `[CORTEX]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
) )
throw 'PORT_NOT_AVAILABLE' throw 'PORT_NOT_AVAILABLE'
}) })
} }
if (subprocess?.pid) { if (subprocess?.pid) {
log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`) log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
const pid = subprocess.pid const pid = subprocess.pid
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
terminate(pid, function (err) { terminate(pid, function (err) {
@ -329,7 +331,7 @@ async function killSubprocess(): Promise<void> {
tcpPortUsed tcpPortUsed
.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
.then(() => resolve()) .then(() => resolve())
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
.catch(() => { .catch(() => {
killRequest().then(resolve).catch(reject) killRequest().then(resolve).catch(reject)
}) })
@ -346,22 +348,24 @@ async function killSubprocess(): Promise<void> {
* @returns A promise that resolves when the Nitro subprocess is started. * @returns A promise that resolves when the Nitro subprocess is started.
*/ */
function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> { function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
log(`[NITRO]::Debug: Spawning Nitro subprocess...`) log(`[CORTEX]::Debug: Spawning cortex subprocess...`)
return new Promise<void>(async (resolve, reject) => { return new Promise<void>(async (resolve, reject) => {
let binaryFolder = path.join(__dirname, '..', 'bin') // Current directory by default
let executableOptions = executableNitroFile(systemInfo?.gpuSetting) let executableOptions = executableNitroFile(systemInfo?.gpuSetting)
const args: string[] = ['1', LOCAL_HOST, PORT.toString()] const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
// Execute the binary // Execute the binary
log( log(
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}` `[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
)
log(
path.parse(executableOptions.executablePath).dir
) )
subprocess = spawn( subprocess = spawn(
executableOptions.executablePath, executableOptions.executablePath,
['1', LOCAL_HOST, PORT.toString()], ['1', LOCAL_HOST, PORT.toString()],
{ {
cwd: binaryFolder, cwd: path.join(path.parse(executableOptions.executablePath).dir),
env: { env: {
...process.env, ...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
@ -375,15 +379,15 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
// Handle subprocess output // Handle subprocess output
subprocess.stdout.on('data', (data: any) => { subprocess.stdout.on('data', (data: any) => {
log(`[NITRO]::Debug: ${data}`) log(`[CORTEX]::Debug: ${data}`)
}) })
subprocess.stderr.on('data', (data: any) => { subprocess.stderr.on('data', (data: any) => {
log(`[NITRO]::Error: ${data}`) log(`[CORTEX]::Error: ${data}`)
}) })
subprocess.on('close', (code: any) => { subprocess.on('close', (code: any) => {
log(`[NITRO]::Debug: Nitro exited with code: ${code}`) log(`[CORTEX]::Debug: cortex exited with code: ${code}`)
subprocess = undefined subprocess = undefined
reject(`child process exited with code ${code}`) reject(`child process exited with code ${code}`)
}) })
@ -391,7 +395,7 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
tcpPortUsed tcpPortUsed
.waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000) .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
.then(() => { .then(() => {
log(`[NITRO]::Debug: Nitro is ready`) log(`[CORTEX]::Debug: cortex is ready`)
resolve() resolve()
}) })
}) })

View File

@ -97,7 +97,7 @@ function unloadModel(): Promise<void> {
} }
if (subprocess?.pid) { if (subprocess?.pid) {
log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`) log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
const pid = subprocess.pid const pid = subprocess.pid
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
terminate(pid, function (err) { terminate(pid, function (err) {
@ -107,7 +107,7 @@ function unloadModel(): Promise<void> {
return tcpPortUsed return tcpPortUsed
.waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000) .waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
.then(() => resolve()) .then(() => resolve())
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
.catch(() => { .catch(() => {
killRequest() killRequest()
}) })