chore: replace nitro by cortex-cpp (#2912)

This commit is contained in:
Louis 2024-05-16 17:46:49 +07:00 committed by GitHub
parent 218259945f
commit 537ef20a54
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 96 additions and 71 deletions

View File

@ -9,11 +9,11 @@ export const SUPPORTED_MODEL_FORMAT = '.gguf'
// The URL for the Nitro subprocess
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
// The URL for the Nitro subprocess to load a model
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
// The URL for the Nitro subprocess to validate a model
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
// The URL for the Nitro subprocess to kill itself
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url

View File

@ -144,12 +144,12 @@ const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSe
}
const spawnNitroProcess = async (): Promise<void> => {
log(`[SERVER]::Debug: Spawning Nitro subprocess...`)
log(`[SERVER]::Debug: Spawning cortex subprocess...`)
let binaryFolder = join(
getJanExtensionsPath(),
'@janhq',
'inference-nitro-extension',
'inference-cortex-extension',
'dist',
'bin'
)
@ -160,7 +160,7 @@ const spawnNitroProcess = async (): Promise<void> => {
const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
// Execute the binary
log(
`[SERVER]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
`[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
)
subprocess = spawn(
executableOptions.executablePath,
@ -184,12 +184,12 @@ const spawnNitroProcess = async (): Promise<void> => {
})
subprocess.on('close', (code: any) => {
log(`[SERVER]::Debug: Nitro exited with code: ${code}`)
log(`[SERVER]::Debug: cortex exited with code: ${code}`)
subprocess = undefined
})
tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
log(`[SERVER]::Debug: Nitro is ready`)
log(`[SERVER]::Debug: cortex is ready`)
})
}
@ -203,13 +203,13 @@ const executableNitroFile = (): NitroExecutableOptions => {
let binaryFolder = join(
getJanExtensionsPath(),
'@janhq',
'inference-nitro-extension',
'inference-cortex-extension',
'dist',
'bin'
)
let cudaVisibleDevices = ''
let binaryName = 'nitro'
let binaryName = 'cortex-cpp'
/**
* The binary folder is different for each platform.
*/
@ -228,12 +228,16 @@ const executableNitroFile = (): NitroExecutableOptions => {
}
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
}
binaryName = 'nitro.exe'
binaryName = 'cortex-cpp.exe'
} else if (process.platform === 'darwin') {
/**
* For MacOS: mac-universal both Silicon and InteL
*/
binaryFolder = join(binaryFolder, 'mac-universal')
if(process.arch === 'arm64') {
binaryFolder = join(binaryFolder, 'mac-arm64')
} else {
binaryFolder = join(binaryFolder, 'mac-amd64')
}
} else {
/**
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
@ -300,7 +304,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
retryDelay: 500,
})
.then((res: any) => {
log(`[SERVER]::Debug: Load model success with response ${JSON.stringify(res)}`)
log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`)
return Promise.resolve(res)
})
.catch((err: any) => {
@ -327,7 +331,7 @@ export const stopModel = async (_modelId: string) => {
})
}, 5000)
const tcpPortUsed = require('tcp-port-used')
log(`[SERVER]::Debug: Request to kill Nitro`)
log(`[SERVER]::Debug: Request to kill cortex`)
fetch(NITRO_HTTP_KILL_URL, {
method: 'DELETE',

View File

@ -4,7 +4,7 @@ import { log } from './logger'
export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
const cpu = await physicalCpuCount()
log(`[NITRO]::CPU information - ${cpu}`)
log(`[CORTEX]::CPU information - ${cpu}`)
return {
numCpuPhysicalCore: cpu,

View File

@ -10,11 +10,12 @@ export function toolRetrievalUpdateTextSplitter(
}
export async function toolRetrievalIngestNewDocument(
file: string,
model: string,
engine: string
) {
const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file))
const threadPath = path.dirname(filePath.replace('files', ''))
retrieval.updateEmbeddingEngine(engine)
retrieval.updateEmbeddingEngine(model, engine)
return retrieval
.ingestAgentKnowledge(filePath, `${threadPath}/memory`)
.catch((err) => {

View File

@ -28,14 +28,14 @@ export class Retrieval {
})
}
public updateEmbeddingEngine(engine: string): void {
public updateEmbeddingEngine(model: string, engine: string): void {
// Engine settings are not compatible with the current embedding model params
// Switch case manually for now
if (engine === 'nitro') {
this.embeddingModel = new OpenAIEmbeddings(
{ openAIApiKey: 'nitro-embedding' },
{ openAIApiKey: 'nitro-embedding', model },
// TODO: Raw settings
{ basePath: 'http://127.0.0.1:3928/v1' }
{ basePath: 'http://127.0.0.1:3928/v1' },
)
} else {
// Fallback to OpenAI Settings

View File

@ -36,6 +36,7 @@ export class RetrievalTool extends InferenceTool {
NODE,
'toolRetrievalIngestNewDocument',
docFile,
data.model?.id,
data.model?.engine
)
} else {

View File

@ -0,0 +1,2 @@
bin
!version.txt

View File

@ -1 +1 @@
0.3.22
0.4.4

View File

@ -1,3 +1,3 @@
@echo off
set /p NITRO_VERSION=<./bin/version.txt
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
set /p CORTEX_VERSION=<./bin/version.txt
.\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan

View File

@ -1,8 +1,8 @@
{
"name": "@janhq/inference-nitro-extension",
"productName": "Nitro Inference Engine",
"name": "@janhq/inference-cortex-extension",
"productName": "Cortex Inference Engine",
"version": "1.0.7",
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
"author": "Jan <service@jan.ai>",
@ -10,8 +10,8 @@
"scripts": {
"test": "jest",
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-universal.tar.gz -o ./bin/ && mkdir -p ./bin/mac-universal && tar -zxvf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz --strip-components=1 -C ./bin/mac-universal && rm -rf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz && chmod +x ./bin/mac-universal/nitro",
"downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp",
"downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp",
"downloadnitro:win32": "download.bat",
"downloadnitro": "run-script-os",
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",

View File

@ -80,7 +80,7 @@ export default [
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
INFERENCE_URL: JSON.stringify(
process.env.INFERENCE_URL ||
'http://127.0.0.1:3928/inferences/llamacpp/chat_completion'
'http://127.0.0.1:3928/inferences/server/chat_completion'
),
TROUBLESHOOTING_URL: JSON.stringify(
'https://jan.ai/guides/troubleshooting'

View File

@ -130,7 +130,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
const executableFolderPath = await joinPath([
janDataFolderPath,
'engines',
this.name ?? 'nitro',
this.name ?? 'cortex-cpp',
this.version ?? '1.0.0',
])
@ -179,7 +179,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
const executableFolderPath = await joinPath([
janDataFolderPath,
'engines',
this.name ?? 'nitro',
this.name ?? 'cortex-cpp',
this.version ?? '1.0.0',
])

View File

@ -33,9 +33,22 @@ describe('test executable nitro file', () => {
Object.defineProperty(process, 'platform', {
value: 'darwin',
})
Object.defineProperty(process, 'arch', {
value: 'arm64',
})
expect(executableNitroFile(testSettings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`mac-universal${sep}nitro`),
executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
)
Object.defineProperty(process, 'arch', {
value: 'amd64',
})
expect(executableNitroFile(testSettings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
@ -56,7 +69,7 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`win-cpu${sep}nitro.exe`),
executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
@ -89,7 +102,7 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`win-cuda-11-7${sep}nitro.exe`),
executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
@ -122,7 +135,7 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`win-cuda-12-0${sep}nitro.exe`),
executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
@ -139,7 +152,7 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`linux-cpu${sep}nitro`),
executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
@ -172,7 +185,7 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`linux-cuda-11-7${sep}nitro`),
executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
@ -205,7 +218,7 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`linux-cuda-12-0${sep}nitro`),
executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})

View File

@ -1,4 +1,4 @@
import { GpuSetting, SystemInformation } from '@janhq/core'
import { GpuSetting } from '@janhq/core'
import * as path from 'path'
export interface NitroExecutableOptions {
@ -24,7 +24,7 @@ const os = (): string => {
return process.platform === 'win32'
? 'win'
: process.platform === 'darwin'
? 'mac-universal'
? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64'
: 'linux'
}
@ -52,7 +52,7 @@ export const executableNitroFile = (
.join('-')
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
let binaryName = `nitro${extension()}`
let binaryName = `cortex-cpp${extension()}`
return {
executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName),

View File

@ -34,9 +34,9 @@ const LOCAL_HOST = '127.0.0.1'
// The URL for the Nitro subprocess
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
// The URL for the Nitro subprocess to load a model
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
// The URL for the Nitro subprocess to validate a model
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
// The URL for the Nitro subprocess to kill itself
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
@ -50,7 +50,7 @@ const SUPPORTED_MODEL_FORMAT = '.gguf'
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
// The current model settings
let currentSettings: ModelSettingParams | undefined = undefined
let currentSettings: ModelSettingParams & { model?: string } | undefined = undefined
/**
* Stops a Nitro subprocess.
@ -77,7 +77,7 @@ async function loadModel(
}
if (params.model.engine !== InferenceEngine.nitro) {
return Promise.reject('Not a nitro model')
return Promise.reject('Not a cortex model')
} else {
const nitroResourceProbe = await getSystemResourceInfo()
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
@ -135,6 +135,7 @@ async function loadModel(
// model.settings can override the default settings
...params.model.settings,
llama_model_path,
model: params.model.id,
// This is critical and requires real CPU physical core count (or performance core)
...(params.model.settings.mmproj && {
mmproj: path.isAbsolute(params.model.settings.mmproj)
@ -142,7 +143,7 @@ async function loadModel(
: path.join(modelFolder, params.model.settings.mmproj),
}),
}
return runNitroAndLoadModel(systemInfo)
return runNitroAndLoadModel(params.model.id, systemInfo)
}
}
@ -152,7 +153,7 @@ async function loadModel(
* 3. Validate model status
* @returns
*/
async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
async function runNitroAndLoadModel(modelId: string, systemInfo?: SystemInformation) {
// Gather system information for CPU physical cores and memory
return killSubprocess()
.then(() =>
@ -160,10 +161,10 @@ async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
)
.then(() => spawnNitroProcess(systemInfo))
.then(() => loadLLMModel(currentSettings))
.then(validateModelStatus)
.then(() => validateModelStatus(modelId))
.catch((err) => {
// TODO: Broadcast error so app could display proper error message
log(`[NITRO]::Error: ${err}`)
log(`[CORTEX]::Error: ${err}`)
return { error: err }
})
}
@ -222,7 +223,7 @@ function loadLLMModel(settings: any): Promise<Response> {
if (!settings?.ngl) {
settings.ngl = 100
}
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
log(`[CORTEX]::Debug: Loading model with params ${JSON.stringify(settings)}`)
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: 'POST',
headers: {
@ -234,14 +235,14 @@ function loadLLMModel(settings: any): Promise<Response> {
})
.then((res) => {
log(
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
`[CORTEX]::Debug: Load model success with response ${JSON.stringify(
res
)}`
)
return Promise.resolve(res)
})
.catch((err) => {
log(`[NITRO]::Error: Load model failed with error ${err}`)
log(`[CORTEX]::Error: Load model failed with error ${err}`)
return Promise.reject(err)
})
}
@ -252,11 +253,12 @@ function loadLLMModel(settings: any): Promise<Response> {
* If the model is loaded successfully, the object is empty.
* If the model is not loaded successfully, the object contains an error message.
*/
async function validateModelStatus(): Promise<void> {
async function validateModelStatus(modelId: string): Promise<void> {
// Send a GET request to the validation URL.
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
method: 'GET',
method: 'POST',
body: JSON.stringify({ model: modelId }),
headers: {
'Content-Type': 'application/json',
},
@ -264,7 +266,7 @@ async function validateModelStatus(): Promise<void> {
retryDelay: 300,
}).then(async (res: Response) => {
log(
`[NITRO]::Debug: Validate model state with response ${JSON.stringify(
`[CORTEX]::Debug: Validate model state with response ${JSON.stringify(
res.status
)}`
)
@ -275,7 +277,7 @@ async function validateModelStatus(): Promise<void> {
// Otherwise, return an object with an error message.
if (body.model_loaded) {
log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
`[CORTEX]::Debug: Validate model state success with response ${JSON.stringify(
body
)}`
)
@ -283,7 +285,7 @@ async function validateModelStatus(): Promise<void> {
}
}
log(
`[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
`[CORTEX]::Debug: Validate model state failed with response ${JSON.stringify(
res.statusText
)}`
)
@ -298,7 +300,7 @@ async function validateModelStatus(): Promise<void> {
async function killSubprocess(): Promise<void> {
const controller = new AbortController()
setTimeout(() => controller.abort(), 5000)
log(`[NITRO]::Debug: Request to kill Nitro`)
log(`[CORTEX]::Debug: Request to kill cortex`)
const killRequest = () => {
return fetch(NITRO_HTTP_KILL_URL, {
@ -309,17 +311,17 @@ async function killSubprocess(): Promise<void> {
.then(() =>
tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
)
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
.then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
.catch((err) => {
log(
`[NITRO]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
`[CORTEX]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
)
throw 'PORT_NOT_AVAILABLE'
})
}
if (subprocess?.pid) {
log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
const pid = subprocess.pid
return new Promise((resolve, reject) => {
terminate(pid, function (err) {
@ -329,7 +331,7 @@ async function killSubprocess(): Promise<void> {
tcpPortUsed
.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
.then(() => resolve())
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
.then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
.catch(() => {
killRequest().then(resolve).catch(reject)
})
@ -346,22 +348,24 @@ async function killSubprocess(): Promise<void> {
* @returns A promise that resolves when the Nitro subprocess is started.
*/
function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
log(`[NITRO]::Debug: Spawning Nitro subprocess...`)
log(`[CORTEX]::Debug: Spawning cortex subprocess...`)
return new Promise<void>(async (resolve, reject) => {
let binaryFolder = path.join(__dirname, '..', 'bin') // Current directory by default
let executableOptions = executableNitroFile(systemInfo?.gpuSetting)
const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
// Execute the binary
log(
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
`[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
)
log(
path.parse(executableOptions.executablePath).dir
)
subprocess = spawn(
executableOptions.executablePath,
['1', LOCAL_HOST, PORT.toString()],
{
cwd: binaryFolder,
cwd: path.join(path.parse(executableOptions.executablePath).dir),
env: {
...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
@ -375,15 +379,15 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
// Handle subprocess output
subprocess.stdout.on('data', (data: any) => {
log(`[NITRO]::Debug: ${data}`)
log(`[CORTEX]::Debug: ${data}`)
})
subprocess.stderr.on('data', (data: any) => {
log(`[NITRO]::Error: ${data}`)
log(`[CORTEX]::Error: ${data}`)
})
subprocess.on('close', (code: any) => {
log(`[NITRO]::Debug: Nitro exited with code: ${code}`)
log(`[CORTEX]::Debug: cortex exited with code: ${code}`)
subprocess = undefined
reject(`child process exited with code ${code}`)
})
@ -391,7 +395,7 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
tcpPortUsed
.waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
.then(() => {
log(`[NITRO]::Debug: Nitro is ready`)
log(`[CORTEX]::Debug: cortex is ready`)
resolve()
})
})

View File

@ -97,7 +97,7 @@ function unloadModel(): Promise<void> {
}
if (subprocess?.pid) {
log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
const pid = subprocess.pid
return new Promise((resolve, reject) => {
terminate(pid, function (err) {
@ -107,7 +107,7 @@ function unloadModel(): Promise<void> {
return tcpPortUsed
.waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
.then(() => resolve())
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
.then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
.catch(() => {
killRequest()
})