From 537ef20a548bdf054585ec0adfafa9135da9654c Mon Sep 17 00:00:00 2001 From: Louis Date: Thu, 16 May 2024 17:46:49 +0700 Subject: [PATCH] chore: replace nitro by cortex-cpp (#2912) --- core/src/node/api/restful/helper/consts.ts | 6 +- .../node/api/restful/helper/startStopModel.ts | 26 ++++---- core/src/node/helper/resource.ts | 2 +- .../assistant-extension/src/node/index.ts | 3 +- .../assistant-extension/src/node/retrieval.ts | 6 +- .../src/tools/retrieval.ts | 1 + .../inference-nitro-extension/.gitignore | 2 + .../inference-nitro-extension/bin/version.txt | 2 +- .../inference-nitro-extension/download.bat | 4 +- .../inference-nitro-extension/package.json | 10 +-- .../rollup.config.ts | 2 +- .../inference-nitro-extension/src/index.ts | 4 +- .../src/node/execute.test.ts | 27 +++++--- .../src/node/execute.ts | 6 +- .../src/node/index.ts | 62 ++++++++++--------- .../tensorrt-llm-extension/src/node/index.ts | 4 +- 16 files changed, 96 insertions(+), 71 deletions(-) create mode 100644 extensions/inference-nitro-extension/.gitignore diff --git a/core/src/node/api/restful/helper/consts.ts b/core/src/node/api/restful/helper/consts.ts index bc3cfe300..8d8f8e341 100644 --- a/core/src/node/api/restful/helper/consts.ts +++ b/core/src/node/api/restful/helper/consts.ts @@ -9,11 +9,11 @@ export const SUPPORTED_MODEL_FORMAT = '.gguf' // The URL for the Nitro subprocess const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}` // The URL for the Nitro subprocess to load a model -export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel` +export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel` // The URL for the Nitro subprocess to validate a model -export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus` +export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus` // The URL for the Nitro subprocess to kill itself export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` -export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url +export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts index bcd182cb5..8665850da 100644 --- a/core/src/node/api/restful/helper/startStopModel.ts +++ b/core/src/node/api/restful/helper/startStopModel.ts @@ -144,12 +144,12 @@ const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSe } const spawnNitroProcess = async (): Promise => { - log(`[SERVER]::Debug: Spawning Nitro subprocess...`) + log(`[SERVER]::Debug: Spawning cortex subprocess...`) let binaryFolder = join( getJanExtensionsPath(), '@janhq', - 'inference-nitro-extension', + 'inference-cortex-extension', 'dist', 'bin' ) @@ -160,7 +160,7 @@ const spawnNitroProcess = async (): Promise => { const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()] // Execute the binary log( - `[SERVER]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}` + `[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}` ) subprocess = spawn( executableOptions.executablePath, @@ -184,12 +184,12 @@ const spawnNitroProcess = async (): Promise => { }) subprocess.on('close', (code: any) => { - log(`[SERVER]::Debug: Nitro exited with code: ${code}`) + log(`[SERVER]::Debug: cortex exited with code: ${code}`) subprocess = undefined }) tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => { - log(`[SERVER]::Debug: Nitro is ready`) + log(`[SERVER]::Debug: cortex is ready`) }) } @@ -203,13 +203,13 @@ const executableNitroFile = (): NitroExecutableOptions => { let binaryFolder = join( getJanExtensionsPath(), '@janhq', - 'inference-nitro-extension', + 'inference-cortex-extension', 'dist', 'bin' ) let cudaVisibleDevices = '' - let binaryName = 'nitro' + let binaryName = 'cortex-cpp' /** * The binary folder is different for each platform. */ @@ -228,12 +228,16 @@ const executableNitroFile = (): NitroExecutableOptions => { } cudaVisibleDevices = nvidiaInfo['gpu_highest_vram'] } - binaryName = 'nitro.exe' + binaryName = 'cortex-cpp.exe' } else if (process.platform === 'darwin') { /** * For MacOS: mac-universal both Silicon and InteL */ - binaryFolder = join(binaryFolder, 'mac-universal') + if(process.arch === 'arm64') { + binaryFolder = join(binaryFolder, 'mac-arm64') + } else { + binaryFolder = join(binaryFolder, 'mac-amd64') + } } else { /** * For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0 @@ -300,7 +304,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise => retryDelay: 500, }) .then((res: any) => { - log(`[SERVER]::Debug: Load model success with response ${JSON.stringify(res)}`) + log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`) return Promise.resolve(res) }) .catch((err: any) => { @@ -327,7 +331,7 @@ export const stopModel = async (_modelId: string) => { }) }, 5000) const tcpPortUsed = require('tcp-port-used') - log(`[SERVER]::Debug: Request to kill Nitro`) + log(`[SERVER]::Debug: Request to kill cortex`) fetch(NITRO_HTTP_KILL_URL, { method: 'DELETE', diff --git a/core/src/node/helper/resource.ts b/core/src/node/helper/resource.ts index 6c4a71478..c7bfbf20c 100644 --- a/core/src/node/helper/resource.ts +++ b/core/src/node/helper/resource.ts @@ -4,7 +4,7 @@ import { log } from './logger' export const getSystemResourceInfo = async (): Promise => { const cpu = await physicalCpuCount() - log(`[NITRO]::CPU information - ${cpu}`) + log(`[CORTEX]::CPU information - ${cpu}`) return { numCpuPhysicalCore: cpu, diff --git a/extensions/assistant-extension/src/node/index.ts b/extensions/assistant-extension/src/node/index.ts index f303dd51d..46835614d 100644 --- a/extensions/assistant-extension/src/node/index.ts +++ b/extensions/assistant-extension/src/node/index.ts @@ -10,11 +10,12 @@ export function toolRetrievalUpdateTextSplitter( } export async function toolRetrievalIngestNewDocument( file: string, + model: string, engine: string ) { const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file)) const threadPath = path.dirname(filePath.replace('files', '')) - retrieval.updateEmbeddingEngine(engine) + retrieval.updateEmbeddingEngine(model, engine) return retrieval .ingestAgentKnowledge(filePath, `${threadPath}/memory`) .catch((err) => { diff --git a/extensions/assistant-extension/src/node/retrieval.ts b/extensions/assistant-extension/src/node/retrieval.ts index e89357d5c..52193f221 100644 --- a/extensions/assistant-extension/src/node/retrieval.ts +++ b/extensions/assistant-extension/src/node/retrieval.ts @@ -28,14 +28,14 @@ export class Retrieval { }) } - public updateEmbeddingEngine(engine: string): void { + public updateEmbeddingEngine(model: string, engine: string): void { // Engine settings are not compatible with the current embedding model params // Switch case manually for now if (engine === 'nitro') { this.embeddingModel = new OpenAIEmbeddings( - { openAIApiKey: 'nitro-embedding' }, + { openAIApiKey: 'nitro-embedding', model }, // TODO: Raw settings - { basePath: 'http://127.0.0.1:3928/v1' } + { basePath: 'http://127.0.0.1:3928/v1' }, ) } else { // Fallback to OpenAI Settings diff --git a/extensions/assistant-extension/src/tools/retrieval.ts b/extensions/assistant-extension/src/tools/retrieval.ts index e58305c60..a1a641941 100644 --- a/extensions/assistant-extension/src/tools/retrieval.ts +++ b/extensions/assistant-extension/src/tools/retrieval.ts @@ -36,6 +36,7 @@ export class RetrievalTool extends InferenceTool { NODE, 'toolRetrievalIngestNewDocument', docFile, + data.model?.id, data.model?.engine ) } else { diff --git a/extensions/inference-nitro-extension/.gitignore b/extensions/inference-nitro-extension/.gitignore new file mode 100644 index 000000000..10780f1d4 --- /dev/null +++ b/extensions/inference-nitro-extension/.gitignore @@ -0,0 +1,2 @@ +bin +!version.txt \ No newline at end of file diff --git a/extensions/inference-nitro-extension/bin/version.txt b/extensions/inference-nitro-extension/bin/version.txt index 0c4b45492..6f2743d65 100644 --- a/extensions/inference-nitro-extension/bin/version.txt +++ b/extensions/inference-nitro-extension/bin/version.txt @@ -1 +1 @@ -0.3.22 +0.4.4 diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat index c99162eba..9bd2d4b07 100644 --- a/extensions/inference-nitro-extension/download.bat +++ b/extensions/inference-nitro-extension/download.bat @@ -1,3 +1,3 @@ @echo off -set /p NITRO_VERSION=<./bin/version.txt -.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan +set /p CORTEX_VERSION=<./bin/version.txt +.\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 7d31a1d38..d396778d9 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,8 +1,8 @@ { - "name": "@janhq/inference-nitro-extension", - "productName": "Nitro Inference Engine", + "name": "@janhq/inference-cortex-extension", + "productName": "Cortex Inference Engine", "version": "1.0.7", - "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", + "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", "author": "Jan ", @@ -10,8 +10,8 @@ "scripts": { "test": "jest", "build": "tsc --module commonjs && rollup -c rollup.config.ts", - "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro", - "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-universal.tar.gz -o ./bin/ && mkdir -p ./bin/mac-universal && tar -zxvf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz --strip-components=1 -C ./bin/mac-universal && rm -rf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz && chmod +x ./bin/mac-universal/nitro", + "downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp", + "downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp", "downloadnitro:win32": "download.bat", "downloadnitro": "run-script-os", "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts index f1d3eb32f..b0707f404 100644 --- a/extensions/inference-nitro-extension/rollup.config.ts +++ b/extensions/inference-nitro-extension/rollup.config.ts @@ -80,7 +80,7 @@ export default [ DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), INFERENCE_URL: JSON.stringify( process.env.INFERENCE_URL || - 'http://127.0.0.1:3928/inferences/llamacpp/chat_completion' + 'http://127.0.0.1:3928/inferences/server/chat_completion' ), TROUBLESHOOTING_URL: JSON.stringify( 'https://jan.ai/guides/troubleshooting' diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index e6bad64f4..a027e8844 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -130,7 +130,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine { const executableFolderPath = await joinPath([ janDataFolderPath, 'engines', - this.name ?? 'nitro', + this.name ?? 'cortex-cpp', this.version ?? '1.0.0', ]) @@ -179,7 +179,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine { const executableFolderPath = await joinPath([ janDataFolderPath, 'engines', - this.name ?? 'nitro', + this.name ?? 'cortex-cpp', this.version ?? '1.0.0', ]) diff --git a/extensions/inference-nitro-extension/src/node/execute.test.ts b/extensions/inference-nitro-extension/src/node/execute.test.ts index dfd26deb8..cf9e84acf 100644 --- a/extensions/inference-nitro-extension/src/node/execute.test.ts +++ b/extensions/inference-nitro-extension/src/node/execute.test.ts @@ -33,9 +33,22 @@ describe('test executable nitro file', () => { Object.defineProperty(process, 'platform', { value: 'darwin', }) + Object.defineProperty(process, 'arch', { + value: 'arm64', + }) expect(executableNitroFile(testSettings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`mac-universal${sep}nitro`), + executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`), + cudaVisibleDevices: '', + vkVisibleDevices: '', + }) + ) + Object.defineProperty(process, 'arch', { + value: 'amd64', + }) + expect(executableNitroFile(testSettings)).toEqual( + expect.objectContaining({ + executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -56,7 +69,7 @@ describe('test executable nitro file', () => { } expect(executableNitroFile(settings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`win-cpu${sep}nitro.exe`), + executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -89,7 +102,7 @@ describe('test executable nitro file', () => { } expect(executableNitroFile(settings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`win-cuda-11-7${sep}nitro.exe`), + executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -122,7 +135,7 @@ describe('test executable nitro file', () => { } expect(executableNitroFile(settings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`win-cuda-12-0${sep}nitro.exe`), + executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -139,7 +152,7 @@ describe('test executable nitro file', () => { } expect(executableNitroFile(settings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`linux-cpu${sep}nitro`), + executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -172,7 +185,7 @@ describe('test executable nitro file', () => { } expect(executableNitroFile(settings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`linux-cuda-11-7${sep}nitro`), + executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -205,7 +218,7 @@ describe('test executable nitro file', () => { } expect(executableNitroFile(settings)).toEqual( expect.objectContaining({ - executablePath: expect.stringContaining(`linux-cuda-12-0${sep}nitro`), + executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-nitro-extension/src/node/execute.ts index 2cfcfe4f3..417734afa 100644 --- a/extensions/inference-nitro-extension/src/node/execute.ts +++ b/extensions/inference-nitro-extension/src/node/execute.ts @@ -1,4 +1,4 @@ -import { GpuSetting, SystemInformation } from '@janhq/core' +import { GpuSetting } from '@janhq/core' import * as path from 'path' export interface NitroExecutableOptions { @@ -24,7 +24,7 @@ const os = (): string => { return process.platform === 'win32' ? 'win' : process.platform === 'darwin' - ? 'mac-universal' + ? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64' : 'linux' } @@ -52,7 +52,7 @@ export const executableNitroFile = ( .join('-') let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' - let binaryName = `nitro${extension()}` + let binaryName = `cortex-cpp${extension()}` return { executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName), diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts index fbfdb8761..03e172c63 100644 --- a/extensions/inference-nitro-extension/src/node/index.ts +++ b/extensions/inference-nitro-extension/src/node/index.ts @@ -34,9 +34,9 @@ const LOCAL_HOST = '127.0.0.1' // The URL for the Nitro subprocess const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}` // The URL for the Nitro subprocess to load a model -const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel` +const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel` // The URL for the Nitro subprocess to validate a model -const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus` +const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus` // The URL for the Nitro subprocess to kill itself const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` @@ -50,7 +50,7 @@ const SUPPORTED_MODEL_FORMAT = '.gguf' let subprocess: ChildProcessWithoutNullStreams | undefined = undefined // The current model settings -let currentSettings: ModelSettingParams | undefined = undefined +let currentSettings: ModelSettingParams & { model?: string } | undefined = undefined /** * Stops a Nitro subprocess. @@ -77,7 +77,7 @@ async function loadModel( } if (params.model.engine !== InferenceEngine.nitro) { - return Promise.reject('Not a nitro model') + return Promise.reject('Not a cortex model') } else { const nitroResourceProbe = await getSystemResourceInfo() // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt @@ -135,6 +135,7 @@ async function loadModel( // model.settings can override the default settings ...params.model.settings, llama_model_path, + model: params.model.id, // This is critical and requires real CPU physical core count (or performance core) ...(params.model.settings.mmproj && { mmproj: path.isAbsolute(params.model.settings.mmproj) @@ -142,7 +143,7 @@ async function loadModel( : path.join(modelFolder, params.model.settings.mmproj), }), } - return runNitroAndLoadModel(systemInfo) + return runNitroAndLoadModel(params.model.id, systemInfo) } } @@ -152,7 +153,7 @@ async function loadModel( * 3. Validate model status * @returns */ -async function runNitroAndLoadModel(systemInfo?: SystemInformation) { +async function runNitroAndLoadModel(modelId: string, systemInfo?: SystemInformation) { // Gather system information for CPU physical cores and memory return killSubprocess() .then(() => @@ -160,10 +161,10 @@ async function runNitroAndLoadModel(systemInfo?: SystemInformation) { ) .then(() => spawnNitroProcess(systemInfo)) .then(() => loadLLMModel(currentSettings)) - .then(validateModelStatus) + .then(() => validateModelStatus(modelId)) .catch((err) => { // TODO: Broadcast error so app could display proper error message - log(`[NITRO]::Error: ${err}`) + log(`[CORTEX]::Error: ${err}`) return { error: err } }) } @@ -222,7 +223,7 @@ function loadLLMModel(settings: any): Promise { if (!settings?.ngl) { settings.ngl = 100 } - log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`) + log(`[CORTEX]::Debug: Loading model with params ${JSON.stringify(settings)}`) return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { method: 'POST', headers: { @@ -234,14 +235,14 @@ function loadLLMModel(settings: any): Promise { }) .then((res) => { log( - `[NITRO]::Debug: Load model success with response ${JSON.stringify( + `[CORTEX]::Debug: Load model success with response ${JSON.stringify( res )}` ) return Promise.resolve(res) }) .catch((err) => { - log(`[NITRO]::Error: Load model failed with error ${err}`) + log(`[CORTEX]::Error: Load model failed with error ${err}`) return Promise.reject(err) }) } @@ -252,11 +253,12 @@ function loadLLMModel(settings: any): Promise { * If the model is loaded successfully, the object is empty. * If the model is not loaded successfully, the object contains an error message. */ -async function validateModelStatus(): Promise { +async function validateModelStatus(modelId: string): Promise { // Send a GET request to the validation URL. // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries. return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { - method: 'GET', + method: 'POST', + body: JSON.stringify({ model: modelId }), headers: { 'Content-Type': 'application/json', }, @@ -264,7 +266,7 @@ async function validateModelStatus(): Promise { retryDelay: 300, }).then(async (res: Response) => { log( - `[NITRO]::Debug: Validate model state with response ${JSON.stringify( + `[CORTEX]::Debug: Validate model state with response ${JSON.stringify( res.status )}` ) @@ -275,7 +277,7 @@ async function validateModelStatus(): Promise { // Otherwise, return an object with an error message. if (body.model_loaded) { log( - `[NITRO]::Debug: Validate model state success with response ${JSON.stringify( + `[CORTEX]::Debug: Validate model state success with response ${JSON.stringify( body )}` ) @@ -283,7 +285,7 @@ async function validateModelStatus(): Promise { } } log( - `[NITRO]::Debug: Validate model state failed with response ${JSON.stringify( + `[CORTEX]::Debug: Validate model state failed with response ${JSON.stringify( res.statusText )}` ) @@ -298,7 +300,7 @@ async function validateModelStatus(): Promise { async function killSubprocess(): Promise { const controller = new AbortController() setTimeout(() => controller.abort(), 5000) - log(`[NITRO]::Debug: Request to kill Nitro`) + log(`[CORTEX]::Debug: Request to kill cortex`) const killRequest = () => { return fetch(NITRO_HTTP_KILL_URL, { @@ -309,17 +311,17 @@ async function killSubprocess(): Promise { .then(() => tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) ) - .then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) + .then(() => log(`[CORTEX]::Debug: cortex process is terminated`)) .catch((err) => { log( - `[NITRO]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}` + `[CORTEX]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}` ) throw 'PORT_NOT_AVAILABLE' }) } if (subprocess?.pid) { - log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`) + log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`) const pid = subprocess.pid return new Promise((resolve, reject) => { terminate(pid, function (err) { @@ -329,7 +331,7 @@ async function killSubprocess(): Promise { tcpPortUsed .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) .then(() => resolve()) - .then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) + .then(() => log(`[CORTEX]::Debug: cortex process is terminated`)) .catch(() => { killRequest().then(resolve).catch(reject) }) @@ -346,22 +348,24 @@ async function killSubprocess(): Promise { * @returns A promise that resolves when the Nitro subprocess is started. */ function spawnNitroProcess(systemInfo?: SystemInformation): Promise { - log(`[NITRO]::Debug: Spawning Nitro subprocess...`) + log(`[CORTEX]::Debug: Spawning cortex subprocess...`) return new Promise(async (resolve, reject) => { - let binaryFolder = path.join(__dirname, '..', 'bin') // Current directory by default let executableOptions = executableNitroFile(systemInfo?.gpuSetting) const args: string[] = ['1', LOCAL_HOST, PORT.toString()] // Execute the binary log( - `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}` + `[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}` + ) + log( + path.parse(executableOptions.executablePath).dir ) subprocess = spawn( executableOptions.executablePath, ['1', LOCAL_HOST, PORT.toString()], { - cwd: binaryFolder, + cwd: path.join(path.parse(executableOptions.executablePath).dir), env: { ...process.env, CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, @@ -375,15 +379,15 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise { // Handle subprocess output subprocess.stdout.on('data', (data: any) => { - log(`[NITRO]::Debug: ${data}`) + log(`[CORTEX]::Debug: ${data}`) }) subprocess.stderr.on('data', (data: any) => { - log(`[NITRO]::Error: ${data}`) + log(`[CORTEX]::Error: ${data}`) }) subprocess.on('close', (code: any) => { - log(`[NITRO]::Debug: Nitro exited with code: ${code}`) + log(`[CORTEX]::Debug: cortex exited with code: ${code}`) subprocess = undefined reject(`child process exited with code ${code}`) }) @@ -391,7 +395,7 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise { tcpPortUsed .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000) .then(() => { - log(`[NITRO]::Debug: Nitro is ready`) + log(`[CORTEX]::Debug: cortex is ready`) resolve() }) }) diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts index eb92c98af..c8bc48459 100644 --- a/extensions/tensorrt-llm-extension/src/node/index.ts +++ b/extensions/tensorrt-llm-extension/src/node/index.ts @@ -97,7 +97,7 @@ function unloadModel(): Promise { } if (subprocess?.pid) { - log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`) + log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`) const pid = subprocess.pid return new Promise((resolve, reject) => { terminate(pid, function (err) { @@ -107,7 +107,7 @@ function unloadModel(): Promise { return tcpPortUsed .waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000) .then(() => resolve()) - .then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) + .then(() => log(`[CORTEX]::Debug: cortex process is terminated`)) .catch(() => { killRequest() })