chore: replace nitro by cortex-cpp (#2912)
This commit is contained in:
parent
218259945f
commit
537ef20a54
@ -9,11 +9,11 @@ export const SUPPORTED_MODEL_FORMAT = '.gguf'
|
||||
// The URL for the Nitro subprocess
|
||||
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
|
||||
// The URL for the Nitro subprocess to load a model
|
||||
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
|
||||
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
|
||||
// The URL for the Nitro subprocess to validate a model
|
||||
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
|
||||
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
|
||||
|
||||
// The URL for the Nitro subprocess to kill itself
|
||||
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
||||
|
||||
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url
|
||||
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
|
||||
|
||||
@ -144,12 +144,12 @@ const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSe
|
||||
}
|
||||
|
||||
const spawnNitroProcess = async (): Promise<void> => {
|
||||
log(`[SERVER]::Debug: Spawning Nitro subprocess...`)
|
||||
log(`[SERVER]::Debug: Spawning cortex subprocess...`)
|
||||
|
||||
let binaryFolder = join(
|
||||
getJanExtensionsPath(),
|
||||
'@janhq',
|
||||
'inference-nitro-extension',
|
||||
'inference-cortex-extension',
|
||||
'dist',
|
||||
'bin'
|
||||
)
|
||||
@ -160,7 +160,7 @@ const spawnNitroProcess = async (): Promise<void> => {
|
||||
const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
|
||||
// Execute the binary
|
||||
log(
|
||||
`[SERVER]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||
`[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||
)
|
||||
subprocess = spawn(
|
||||
executableOptions.executablePath,
|
||||
@ -184,12 +184,12 @@ const spawnNitroProcess = async (): Promise<void> => {
|
||||
})
|
||||
|
||||
subprocess.on('close', (code: any) => {
|
||||
log(`[SERVER]::Debug: Nitro exited with code: ${code}`)
|
||||
log(`[SERVER]::Debug: cortex exited with code: ${code}`)
|
||||
subprocess = undefined
|
||||
})
|
||||
|
||||
tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
|
||||
log(`[SERVER]::Debug: Nitro is ready`)
|
||||
log(`[SERVER]::Debug: cortex is ready`)
|
||||
})
|
||||
}
|
||||
|
||||
@ -203,13 +203,13 @@ const executableNitroFile = (): NitroExecutableOptions => {
|
||||
let binaryFolder = join(
|
||||
getJanExtensionsPath(),
|
||||
'@janhq',
|
||||
'inference-nitro-extension',
|
||||
'inference-cortex-extension',
|
||||
'dist',
|
||||
'bin'
|
||||
)
|
||||
|
||||
let cudaVisibleDevices = ''
|
||||
let binaryName = 'nitro'
|
||||
let binaryName = 'cortex-cpp'
|
||||
/**
|
||||
* The binary folder is different for each platform.
|
||||
*/
|
||||
@ -228,12 +228,16 @@ const executableNitroFile = (): NitroExecutableOptions => {
|
||||
}
|
||||
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
|
||||
}
|
||||
binaryName = 'nitro.exe'
|
||||
binaryName = 'cortex-cpp.exe'
|
||||
} else if (process.platform === 'darwin') {
|
||||
/**
|
||||
* For MacOS: mac-universal both Silicon and InteL
|
||||
*/
|
||||
binaryFolder = join(binaryFolder, 'mac-universal')
|
||||
if(process.arch === 'arm64') {
|
||||
binaryFolder = join(binaryFolder, 'mac-arm64')
|
||||
} else {
|
||||
binaryFolder = join(binaryFolder, 'mac-amd64')
|
||||
}
|
||||
} else {
|
||||
/**
|
||||
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
|
||||
@ -300,7 +304,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
|
||||
retryDelay: 500,
|
||||
})
|
||||
.then((res: any) => {
|
||||
log(`[SERVER]::Debug: Load model success with response ${JSON.stringify(res)}`)
|
||||
log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`)
|
||||
return Promise.resolve(res)
|
||||
})
|
||||
.catch((err: any) => {
|
||||
@ -327,7 +331,7 @@ export const stopModel = async (_modelId: string) => {
|
||||
})
|
||||
}, 5000)
|
||||
const tcpPortUsed = require('tcp-port-used')
|
||||
log(`[SERVER]::Debug: Request to kill Nitro`)
|
||||
log(`[SERVER]::Debug: Request to kill cortex`)
|
||||
|
||||
fetch(NITRO_HTTP_KILL_URL, {
|
||||
method: 'DELETE',
|
||||
|
||||
@ -4,7 +4,7 @@ import { log } from './logger'
|
||||
|
||||
export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
|
||||
const cpu = await physicalCpuCount()
|
||||
log(`[NITRO]::CPU information - ${cpu}`)
|
||||
log(`[CORTEX]::CPU information - ${cpu}`)
|
||||
|
||||
return {
|
||||
numCpuPhysicalCore: cpu,
|
||||
|
||||
@ -10,11 +10,12 @@ export function toolRetrievalUpdateTextSplitter(
|
||||
}
|
||||
export async function toolRetrievalIngestNewDocument(
|
||||
file: string,
|
||||
model: string,
|
||||
engine: string
|
||||
) {
|
||||
const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file))
|
||||
const threadPath = path.dirname(filePath.replace('files', ''))
|
||||
retrieval.updateEmbeddingEngine(engine)
|
||||
retrieval.updateEmbeddingEngine(model, engine)
|
||||
return retrieval
|
||||
.ingestAgentKnowledge(filePath, `${threadPath}/memory`)
|
||||
.catch((err) => {
|
||||
|
||||
@ -28,14 +28,14 @@ export class Retrieval {
|
||||
})
|
||||
}
|
||||
|
||||
public updateEmbeddingEngine(engine: string): void {
|
||||
public updateEmbeddingEngine(model: string, engine: string): void {
|
||||
// Engine settings are not compatible with the current embedding model params
|
||||
// Switch case manually for now
|
||||
if (engine === 'nitro') {
|
||||
this.embeddingModel = new OpenAIEmbeddings(
|
||||
{ openAIApiKey: 'nitro-embedding' },
|
||||
{ openAIApiKey: 'nitro-embedding', model },
|
||||
// TODO: Raw settings
|
||||
{ basePath: 'http://127.0.0.1:3928/v1' }
|
||||
{ basePath: 'http://127.0.0.1:3928/v1' },
|
||||
)
|
||||
} else {
|
||||
// Fallback to OpenAI Settings
|
||||
|
||||
@ -36,6 +36,7 @@ export class RetrievalTool extends InferenceTool {
|
||||
NODE,
|
||||
'toolRetrievalIngestNewDocument',
|
||||
docFile,
|
||||
data.model?.id,
|
||||
data.model?.engine
|
||||
)
|
||||
} else {
|
||||
|
||||
2
extensions/inference-nitro-extension/.gitignore
vendored
Normal file
2
extensions/inference-nitro-extension/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
bin
|
||||
!version.txt
|
||||
@ -1 +1 @@
|
||||
0.3.22
|
||||
0.4.4
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
@echo off
|
||||
set /p NITRO_VERSION=<./bin/version.txt
|
||||
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
|
||||
set /p CORTEX_VERSION=<./bin/version.txt
|
||||
.\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
{
|
||||
"name": "@janhq/inference-nitro-extension",
|
||||
"productName": "Nitro Inference Engine",
|
||||
"name": "@janhq/inference-cortex-extension",
|
||||
"productName": "Cortex Inference Engine",
|
||||
"version": "1.0.7",
|
||||
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
@ -10,8 +10,8 @@
|
||||
"scripts": {
|
||||
"test": "jest",
|
||||
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
||||
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
|
||||
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-universal.tar.gz -o ./bin/ && mkdir -p ./bin/mac-universal && tar -zxvf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz --strip-components=1 -C ./bin/mac-universal && rm -rf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz && chmod +x ./bin/mac-universal/nitro",
|
||||
"downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp",
|
||||
"downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp",
|
||||
"downloadnitro:win32": "download.bat",
|
||||
"downloadnitro": "run-script-os",
|
||||
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
|
||||
@ -80,7 +80,7 @@ export default [
|
||||
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
||||
INFERENCE_URL: JSON.stringify(
|
||||
process.env.INFERENCE_URL ||
|
||||
'http://127.0.0.1:3928/inferences/llamacpp/chat_completion'
|
||||
'http://127.0.0.1:3928/inferences/server/chat_completion'
|
||||
),
|
||||
TROUBLESHOOTING_URL: JSON.stringify(
|
||||
'https://jan.ai/guides/troubleshooting'
|
||||
|
||||
@ -130,7 +130,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
|
||||
const executableFolderPath = await joinPath([
|
||||
janDataFolderPath,
|
||||
'engines',
|
||||
this.name ?? 'nitro',
|
||||
this.name ?? 'cortex-cpp',
|
||||
this.version ?? '1.0.0',
|
||||
])
|
||||
|
||||
@ -179,7 +179,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
|
||||
const executableFolderPath = await joinPath([
|
||||
janDataFolderPath,
|
||||
'engines',
|
||||
this.name ?? 'nitro',
|
||||
this.name ?? 'cortex-cpp',
|
||||
this.version ?? '1.0.0',
|
||||
])
|
||||
|
||||
|
||||
@ -33,9 +33,22 @@ describe('test executable nitro file', () => {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'darwin',
|
||||
})
|
||||
Object.defineProperty(process, 'arch', {
|
||||
value: 'arm64',
|
||||
})
|
||||
expect(executableNitroFile(testSettings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`mac-universal${sep}nitro`),
|
||||
executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
)
|
||||
Object.defineProperty(process, 'arch', {
|
||||
value: 'amd64',
|
||||
})
|
||||
expect(executableNitroFile(testSettings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -56,7 +69,7 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`win-cpu${sep}nitro.exe`),
|
||||
executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -89,7 +102,7 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`win-cuda-11-7${sep}nitro.exe`),
|
||||
executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -122,7 +135,7 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`win-cuda-12-0${sep}nitro.exe`),
|
||||
executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -139,7 +152,7 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`linux-cpu${sep}nitro`),
|
||||
executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -172,7 +185,7 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`linux-cuda-11-7${sep}nitro`),
|
||||
executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -205,7 +218,7 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
executablePath: expect.stringContaining(`linux-cuda-12-0${sep}nitro`),
|
||||
executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { GpuSetting, SystemInformation } from '@janhq/core'
|
||||
import { GpuSetting } from '@janhq/core'
|
||||
import * as path from 'path'
|
||||
|
||||
export interface NitroExecutableOptions {
|
||||
@ -24,7 +24,7 @@ const os = (): string => {
|
||||
return process.platform === 'win32'
|
||||
? 'win'
|
||||
: process.platform === 'darwin'
|
||||
? 'mac-universal'
|
||||
? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64'
|
||||
: 'linux'
|
||||
}
|
||||
|
||||
@ -52,7 +52,7 @@ export const executableNitroFile = (
|
||||
.join('-')
|
||||
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||
let binaryName = `nitro${extension()}`
|
||||
let binaryName = `cortex-cpp${extension()}`
|
||||
|
||||
return {
|
||||
executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName),
|
||||
|
||||
@ -34,9 +34,9 @@ const LOCAL_HOST = '127.0.0.1'
|
||||
// The URL for the Nitro subprocess
|
||||
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
|
||||
// The URL for the Nitro subprocess to load a model
|
||||
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
|
||||
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
|
||||
// The URL for the Nitro subprocess to validate a model
|
||||
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
|
||||
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
|
||||
// The URL for the Nitro subprocess to kill itself
|
||||
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
||||
|
||||
@ -50,7 +50,7 @@ const SUPPORTED_MODEL_FORMAT = '.gguf'
|
||||
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
|
||||
|
||||
// The current model settings
|
||||
let currentSettings: ModelSettingParams | undefined = undefined
|
||||
let currentSettings: ModelSettingParams & { model?: string } | undefined = undefined
|
||||
|
||||
/**
|
||||
* Stops a Nitro subprocess.
|
||||
@ -77,7 +77,7 @@ async function loadModel(
|
||||
}
|
||||
|
||||
if (params.model.engine !== InferenceEngine.nitro) {
|
||||
return Promise.reject('Not a nitro model')
|
||||
return Promise.reject('Not a cortex model')
|
||||
} else {
|
||||
const nitroResourceProbe = await getSystemResourceInfo()
|
||||
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
|
||||
@ -135,6 +135,7 @@ async function loadModel(
|
||||
// model.settings can override the default settings
|
||||
...params.model.settings,
|
||||
llama_model_path,
|
||||
model: params.model.id,
|
||||
// This is critical and requires real CPU physical core count (or performance core)
|
||||
...(params.model.settings.mmproj && {
|
||||
mmproj: path.isAbsolute(params.model.settings.mmproj)
|
||||
@ -142,7 +143,7 @@ async function loadModel(
|
||||
: path.join(modelFolder, params.model.settings.mmproj),
|
||||
}),
|
||||
}
|
||||
return runNitroAndLoadModel(systemInfo)
|
||||
return runNitroAndLoadModel(params.model.id, systemInfo)
|
||||
}
|
||||
}
|
||||
|
||||
@ -152,7 +153,7 @@ async function loadModel(
|
||||
* 3. Validate model status
|
||||
* @returns
|
||||
*/
|
||||
async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
|
||||
async function runNitroAndLoadModel(modelId: string, systemInfo?: SystemInformation) {
|
||||
// Gather system information for CPU physical cores and memory
|
||||
return killSubprocess()
|
||||
.then(() =>
|
||||
@ -160,10 +161,10 @@ async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
|
||||
)
|
||||
.then(() => spawnNitroProcess(systemInfo))
|
||||
.then(() => loadLLMModel(currentSettings))
|
||||
.then(validateModelStatus)
|
||||
.then(() => validateModelStatus(modelId))
|
||||
.catch((err) => {
|
||||
// TODO: Broadcast error so app could display proper error message
|
||||
log(`[NITRO]::Error: ${err}`)
|
||||
log(`[CORTEX]::Error: ${err}`)
|
||||
return { error: err }
|
||||
})
|
||||
}
|
||||
@ -222,7 +223,7 @@ function loadLLMModel(settings: any): Promise<Response> {
|
||||
if (!settings?.ngl) {
|
||||
settings.ngl = 100
|
||||
}
|
||||
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
|
||||
log(`[CORTEX]::Debug: Loading model with params ${JSON.stringify(settings)}`)
|
||||
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
@ -234,14 +235,14 @@ function loadLLMModel(settings: any): Promise<Response> {
|
||||
})
|
||||
.then((res) => {
|
||||
log(
|
||||
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
|
||||
`[CORTEX]::Debug: Load model success with response ${JSON.stringify(
|
||||
res
|
||||
)}`
|
||||
)
|
||||
return Promise.resolve(res)
|
||||
})
|
||||
.catch((err) => {
|
||||
log(`[NITRO]::Error: Load model failed with error ${err}`)
|
||||
log(`[CORTEX]::Error: Load model failed with error ${err}`)
|
||||
return Promise.reject(err)
|
||||
})
|
||||
}
|
||||
@ -252,11 +253,12 @@ function loadLLMModel(settings: any): Promise<Response> {
|
||||
* If the model is loaded successfully, the object is empty.
|
||||
* If the model is not loaded successfully, the object contains an error message.
|
||||
*/
|
||||
async function validateModelStatus(): Promise<void> {
|
||||
async function validateModelStatus(modelId: string): Promise<void> {
|
||||
// Send a GET request to the validation URL.
|
||||
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
|
||||
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
|
||||
method: 'GET',
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ model: modelId }),
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
@ -264,7 +266,7 @@ async function validateModelStatus(): Promise<void> {
|
||||
retryDelay: 300,
|
||||
}).then(async (res: Response) => {
|
||||
log(
|
||||
`[NITRO]::Debug: Validate model state with response ${JSON.stringify(
|
||||
`[CORTEX]::Debug: Validate model state with response ${JSON.stringify(
|
||||
res.status
|
||||
)}`
|
||||
)
|
||||
@ -275,7 +277,7 @@ async function validateModelStatus(): Promise<void> {
|
||||
// Otherwise, return an object with an error message.
|
||||
if (body.model_loaded) {
|
||||
log(
|
||||
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
|
||||
`[CORTEX]::Debug: Validate model state success with response ${JSON.stringify(
|
||||
body
|
||||
)}`
|
||||
)
|
||||
@ -283,7 +285,7 @@ async function validateModelStatus(): Promise<void> {
|
||||
}
|
||||
}
|
||||
log(
|
||||
`[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
|
||||
`[CORTEX]::Debug: Validate model state failed with response ${JSON.stringify(
|
||||
res.statusText
|
||||
)}`
|
||||
)
|
||||
@ -298,7 +300,7 @@ async function validateModelStatus(): Promise<void> {
|
||||
async function killSubprocess(): Promise<void> {
|
||||
const controller = new AbortController()
|
||||
setTimeout(() => controller.abort(), 5000)
|
||||
log(`[NITRO]::Debug: Request to kill Nitro`)
|
||||
log(`[CORTEX]::Debug: Request to kill cortex`)
|
||||
|
||||
const killRequest = () => {
|
||||
return fetch(NITRO_HTTP_KILL_URL, {
|
||||
@ -309,17 +311,17 @@ async function killSubprocess(): Promise<void> {
|
||||
.then(() =>
|
||||
tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
|
||||
)
|
||||
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
|
||||
.then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
|
||||
.catch((err) => {
|
||||
log(
|
||||
`[NITRO]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
|
||||
`[CORTEX]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
|
||||
)
|
||||
throw 'PORT_NOT_AVAILABLE'
|
||||
})
|
||||
}
|
||||
|
||||
if (subprocess?.pid) {
|
||||
log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
|
||||
log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
|
||||
const pid = subprocess.pid
|
||||
return new Promise((resolve, reject) => {
|
||||
terminate(pid, function (err) {
|
||||
@ -329,7 +331,7 @@ async function killSubprocess(): Promise<void> {
|
||||
tcpPortUsed
|
||||
.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
|
||||
.then(() => resolve())
|
||||
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
|
||||
.then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
|
||||
.catch(() => {
|
||||
killRequest().then(resolve).catch(reject)
|
||||
})
|
||||
@ -346,22 +348,24 @@ async function killSubprocess(): Promise<void> {
|
||||
* @returns A promise that resolves when the Nitro subprocess is started.
|
||||
*/
|
||||
function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
|
||||
log(`[NITRO]::Debug: Spawning Nitro subprocess...`)
|
||||
log(`[CORTEX]::Debug: Spawning cortex subprocess...`)
|
||||
|
||||
return new Promise<void>(async (resolve, reject) => {
|
||||
let binaryFolder = path.join(__dirname, '..', 'bin') // Current directory by default
|
||||
let executableOptions = executableNitroFile(systemInfo?.gpuSetting)
|
||||
|
||||
const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
|
||||
// Execute the binary
|
||||
log(
|
||||
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||
`[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||
)
|
||||
log(
|
||||
path.parse(executableOptions.executablePath).dir
|
||||
)
|
||||
subprocess = spawn(
|
||||
executableOptions.executablePath,
|
||||
['1', LOCAL_HOST, PORT.toString()],
|
||||
{
|
||||
cwd: binaryFolder,
|
||||
cwd: path.join(path.parse(executableOptions.executablePath).dir),
|
||||
env: {
|
||||
...process.env,
|
||||
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
||||
@ -375,15 +379,15 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
|
||||
|
||||
// Handle subprocess output
|
||||
subprocess.stdout.on('data', (data: any) => {
|
||||
log(`[NITRO]::Debug: ${data}`)
|
||||
log(`[CORTEX]::Debug: ${data}`)
|
||||
})
|
||||
|
||||
subprocess.stderr.on('data', (data: any) => {
|
||||
log(`[NITRO]::Error: ${data}`)
|
||||
log(`[CORTEX]::Error: ${data}`)
|
||||
})
|
||||
|
||||
subprocess.on('close', (code: any) => {
|
||||
log(`[NITRO]::Debug: Nitro exited with code: ${code}`)
|
||||
log(`[CORTEX]::Debug: cortex exited with code: ${code}`)
|
||||
subprocess = undefined
|
||||
reject(`child process exited with code ${code}`)
|
||||
})
|
||||
@ -391,7 +395,7 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
|
||||
tcpPortUsed
|
||||
.waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
|
||||
.then(() => {
|
||||
log(`[NITRO]::Debug: Nitro is ready`)
|
||||
log(`[CORTEX]::Debug: cortex is ready`)
|
||||
resolve()
|
||||
})
|
||||
})
|
||||
|
||||
@ -97,7 +97,7 @@ function unloadModel(): Promise<void> {
|
||||
}
|
||||
|
||||
if (subprocess?.pid) {
|
||||
log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
|
||||
log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
|
||||
const pid = subprocess.pid
|
||||
return new Promise((resolve, reject) => {
|
||||
terminate(pid, function (err) {
|
||||
@ -107,7 +107,7 @@ function unloadModel(): Promise<void> {
|
||||
return tcpPortUsed
|
||||
.waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
|
||||
.then(() => resolve())
|
||||
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
|
||||
.then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
|
||||
.catch(() => {
|
||||
killRequest()
|
||||
})
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user