From cafdaaaccdc302312697caf15e91a8e07a6d638f Mon Sep 17 00:00:00 2001 From: hiro Date: Sat, 9 Dec 2023 22:58:00 +0700 Subject: [PATCH 1/6] feat: Nitro sensing hardware init --- .../inference-nitro-extension/package.json | 4 +- .../src/@types/global.d.ts | 5 +++ .../inference-nitro-extension/src/module.ts | 40 +++++++++++++++---- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index ef74fff08..ecbbf17a8 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -36,6 +36,7 @@ "kill-port": "^2.0.1", "path-browserify": "^1.0.1", "rxjs": "^7.8.1", + "systeminformation": "^5.21.20", "tcp-port-used": "^1.0.2", "ts-loader": "^9.5.0", "ulid": "^2.3.0" @@ -52,6 +53,7 @@ "tcp-port-used", "kill-port", "fetch-retry", - "electron-log" + "electron-log", + "systeminformation" ] } diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts index 642f10909..f93a3e4c9 100644 --- a/extensions/inference-nitro-extension/src/@types/global.d.ts +++ b/extensions/inference-nitro-extension/src/@types/global.d.ts @@ -24,3 +24,8 @@ interface ModelOperationResponse { error?: any; modelFile?: string; } + +interface ResourcesInfo { + numCpuPhysicalCore: number; + memAvailable: number; +} \ No newline at end of file diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts index d36553f40..64a7393fc 100644 --- a/extensions/inference-nitro-extension/src/module.ts +++ b/extensions/inference-nitro-extension/src/module.ts @@ -4,6 +4,7 @@ const path = require("path"); const { spawn } = require("child_process"); const tcpPortUsed = require("tcp-port-used"); const fetchRetry = require("fetch-retry")(global.fetch); +const si = require("systeminformation"); const log = require("electron-log"); @@ -167,7 +168,7 @@ async function checkAndUnloadNitro() { * Should run exactly platform specified Nitro binary version */ async function spawnNitroProcess(): Promise { - return new Promise((resolve, reject) => { + return new Promise(async (resolve, reject) => { let binaryFolder = path.join(__dirname, "bin"); // Current directory by default let binaryName; @@ -190,10 +191,20 @@ async function spawnNitroProcess(): Promise { const binaryPath = path.join(binaryFolder, binaryName); + // Gather system information for CPU physical cores and memory + const nitroResourceProbe = await getResourcesInfo(); + console.log( + "Nitro with physical core: " + nitroResourceProbe.numCpuPhysicalCore + ); + // Execute the binary - subprocess = spawn(binaryPath, [1, "127.0.0.1", PORT], { - cwd: binaryFolder, - }); + subprocess = spawn( + binaryPath, + [nitroResourceProbe.numCpuPhysicalCore, "127.0.0.1", PORT], + { + cwd: binaryFolder, + } + ); // Handle subprocess output subprocess.stdout.on("data", (data) => { @@ -263,15 +274,28 @@ function validateModelVersion(): Promise { }); } -/** - * Cleans up any registered resources. - * Its module specific function, should be called when application is closed - */ + function dispose() { // clean other registered resources here killSubprocess(); } +/** + * Get the system resources information + */ +async function getResourcesInfo(): Promise { + return new Promise(async (resolve) => { + const cpu = await si.cpu(); + const mem = await si.mem(); + + const response = { + numCpuPhysicalCore: cpu.physicalCores, + memAvailable: mem.available, + }; + resolve(response); + }); +} + module.exports = { initModel, killSubprocess, From f528e9ea7739460ddecac357a9be7d0ec60ae9e4 Mon Sep 17 00:00:00 2001 From: hiro Date: Mon, 11 Dec 2023 21:22:56 +0700 Subject: [PATCH 2/6] fix: Update inference nitro with n_threads equals to physcial core num --- .../inference-nitro-extension/src/module.ts | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts index 64a7393fc..80d474b94 100644 --- a/extensions/inference-nitro-extension/src/module.ts +++ b/extensions/inference-nitro-extension/src/module.ts @@ -39,15 +39,21 @@ function stopModel(): Promise { * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package * TODO: Should it be startModel instead? */ -function initModel(wrapper: any): Promise { +async function initModel(wrapper: any): Promise { currentModelFile = wrapper.modelFullPath; if (wrapper.model.engine !== "nitro") { return Promise.resolve({ error: "Not a nitro model" }); } else { - log.info("Started to load model " + wrapper.model.modelFullPath); + // Gather system information for CPU physical cores and memory + const nitroResourceProbe = await getResourcesInfo(); + console.log( + "Nitro with physical core: " + nitroResourceProbe.numCpuPhysicalCore + ); const settings = { llama_model_path: currentModelFile, ...wrapper.model.settings, + // This is critical and requires real system information + n_threads: nitroResourceProbe.numCpuPhysicalCore, }; log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`); return ( @@ -55,7 +61,7 @@ function initModel(wrapper: any): Promise { validateModelVersion() .then(checkAndUnloadNitro) // 2. Spawn the Nitro subprocess - .then(spawnNitroProcess) + .then(await spawnNitroProcess(nitroResourceProbe)) // 4. Load the model into the Nitro subprocess (HTTP POST request) .then(() => loadLLMModel(settings)) // 5. Check if the model is loaded successfully @@ -167,7 +173,7 @@ async function checkAndUnloadNitro() { * Using child-process to spawn the process * Should run exactly platform specified Nitro binary version */ -async function spawnNitroProcess(): Promise { +async function spawnNitroProcess(nitroResourceProbe: any): Promise { return new Promise(async (resolve, reject) => { let binaryFolder = path.join(__dirname, "bin"); // Current directory by default let binaryName; @@ -191,12 +197,6 @@ async function spawnNitroProcess(): Promise { const binaryPath = path.join(binaryFolder, binaryName); - // Gather system information for CPU physical cores and memory - const nitroResourceProbe = await getResourcesInfo(); - console.log( - "Nitro with physical core: " + nitroResourceProbe.numCpuPhysicalCore - ); - // Execute the binary subprocess = spawn( binaryPath, @@ -222,7 +222,7 @@ async function spawnNitroProcess(): Promise { reject(`Nitro process exited. ${code ?? ""}`); }); tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => { - resolve(); + resolve(nitroResourceProbe); }); }); } From 0e63689eae163cab4bb42c7490becace4462d229 Mon Sep 17 00:00:00 2001 From: hiro Date: Mon, 11 Dec 2023 21:37:23 +0700 Subject: [PATCH 3/6] fix: inference engine nitro stopModel undefined in module.ts --- extensions/inference-nitro-extension/src/module.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts index 80d474b94..b5ba69f5a 100644 --- a/extensions/inference-nitro-extension/src/module.ts +++ b/extensions/inference-nitro-extension/src/module.ts @@ -298,6 +298,7 @@ async function getResourcesInfo(): Promise { module.exports = { initModel, + stopModel, killSubprocess, dispose, }; From 16c66e968c08167e1891735fce39df9628a1ae94 Mon Sep 17 00:00:00 2001 From: hiro Date: Tue, 12 Dec 2023 05:58:08 +0700 Subject: [PATCH 4/6] chore: Update n_threads to cpu_threads --- core/src/types/index.ts | 1 + extensions/inference-nitro-extension/src/@types/global.d.ts | 1 + extensions/inference-nitro-extension/src/index.ts | 2 +- extensions/inference-nitro-extension/src/module.ts | 2 +- extensions/inference-openai-extension/src/index.ts | 1 - 5 files changed, 4 insertions(+), 3 deletions(-) diff --git a/core/src/types/index.ts b/core/src/types/index.ts index d5b51cfc0..81ea7e14e 100644 --- a/core/src/types/index.ts +++ b/core/src/types/index.ts @@ -275,6 +275,7 @@ export type ModelSettingParams = { ngl?: number; embedding?: boolean; n_parallel?: number; + cpu_threads: number; system_prompt?: string; user_prompt?: string; ai_prompt?: string; diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts index f93a3e4c9..62eb65e52 100644 --- a/extensions/inference-nitro-extension/src/@types/global.d.ts +++ b/extensions/inference-nitro-extension/src/@types/global.d.ts @@ -12,6 +12,7 @@ declare const INFERENCE_URL: string; interface EngineSettings { ctx_len: number; ngl: number; + cpu_threads: number; cont_batching: boolean; embedding: boolean; } diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index e5f3f4360..f2fbf0d34 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -12,7 +12,6 @@ import { EventName, MessageRequest, MessageStatus, - ModelSettingParams, ExtensionType, ThreadContent, ThreadMessage, @@ -41,6 +40,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension { private static _engineSettings: EngineSettings = { ctx_len: 2048, ngl: 100, + cpu_threads: 1, cont_batching: false, embedding: false, }; diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts index b5ba69f5a..266566e91 100644 --- a/extensions/inference-nitro-extension/src/module.ts +++ b/extensions/inference-nitro-extension/src/module.ts @@ -53,7 +53,7 @@ async function initModel(wrapper: any): Promise { llama_model_path: currentModelFile, ...wrapper.model.settings, // This is critical and requires real system information - n_threads: nitroResourceProbe.numCpuPhysicalCore, + cpu_threads: nitroResourceProbe.numCpuPhysicalCore, }; log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`); return ( diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts index 7e3e6e71e..6bab563dd 100644 --- a/extensions/inference-openai-extension/src/index.ts +++ b/extensions/inference-openai-extension/src/index.ts @@ -12,7 +12,6 @@ import { EventName, MessageRequest, MessageStatus, - ModelSettingParams, ExtensionType, ThreadContent, ThreadMessage, From 577921f21f56fbd5bd7c64b3d0ce68a2c4d5f357 Mon Sep 17 00:00:00 2001 From: hiro Date: Tue, 12 Dec 2023 06:12:29 +0700 Subject: [PATCH 5/6] chore: add cpu_threads to default model settings as 1 --- core/src/types/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/types/index.ts b/core/src/types/index.ts index 81ea7e14e..7314a4ae3 100644 --- a/core/src/types/index.ts +++ b/core/src/types/index.ts @@ -275,7 +275,7 @@ export type ModelSettingParams = { ngl?: number; embedding?: boolean; n_parallel?: number; - cpu_threads: number; + cpu_threads?: number; system_prompt?: string; user_prompt?: string; ai_prompt?: string; From 14f83ddb7078ca826130eea67b86504819a77232 Mon Sep 17 00:00:00 2001 From: hiro Date: Tue, 12 Dec 2023 07:27:25 +0700 Subject: [PATCH 6/6] fix: Revert drogon thread to 1 instead of CPU num --- .../inference-nitro-extension/src/module.ts | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts index 266566e91..047581dbe 100644 --- a/extensions/inference-nitro-extension/src/module.ts +++ b/extensions/inference-nitro-extension/src/module.ts @@ -179,10 +179,8 @@ async function spawnNitroProcess(nitroResourceProbe: any): Promise { let binaryName; if (process.platform === "win32") { - // Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries binaryName = "win-start.bat"; } else if (process.platform === "darwin") { - // Mac OS platform if (process.arch === "arm64") { binaryFolder = path.join(binaryFolder, "mac-arm64"); } else { @@ -190,21 +188,15 @@ async function spawnNitroProcess(nitroResourceProbe: any): Promise { } binaryName = "nitro"; } else { - // Linux - // Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries - binaryName = "linux-start.sh"; // For other platforms + binaryName = "linux-start.sh"; } const binaryPath = path.join(binaryFolder, binaryName); // Execute the binary - subprocess = spawn( - binaryPath, - [nitroResourceProbe.numCpuPhysicalCore, "127.0.0.1", PORT], - { - cwd: binaryFolder, - } - ); + subprocess = spawn(binaryPath, [1, LOCAL_HOST, PORT], { + cwd: binaryFolder, + }); // Handle subprocess output subprocess.stdout.on("data", (data) => { @@ -274,7 +266,6 @@ function validateModelVersion(): Promise { }); } - function dispose() { // clean other registered resources here killSubprocess();