Merge pull request #931 from janhq/fix/nitro_perf_cpu

feat: Hotfit for Nitro loading on CPU with hyper-threading support
This commit is contained in:
hiro 2023-12-12 08:15:02 +07:00 committed by GitHub
commit fef97f6736
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 43 additions and 19 deletions

View File

@ -275,6 +275,7 @@ export type ModelSettingParams = {
ngl?: number; ngl?: number;
embedding?: boolean; embedding?: boolean;
n_parallel?: number; n_parallel?: number;
cpu_threads?: number;
system_prompt?: string; system_prompt?: string;
user_prompt?: string; user_prompt?: string;
ai_prompt?: string; ai_prompt?: string;

View File

@ -36,6 +36,7 @@
"kill-port": "^2.0.1", "kill-port": "^2.0.1",
"path-browserify": "^1.0.1", "path-browserify": "^1.0.1",
"rxjs": "^7.8.1", "rxjs": "^7.8.1",
"systeminformation": "^5.21.20",
"tcp-port-used": "^1.0.2", "tcp-port-used": "^1.0.2",
"ts-loader": "^9.5.0", "ts-loader": "^9.5.0",
"ulid": "^2.3.0" "ulid": "^2.3.0"
@ -52,6 +53,7 @@
"tcp-port-used", "tcp-port-used",
"kill-port", "kill-port",
"fetch-retry", "fetch-retry",
"electron-log" "electron-log",
"systeminformation"
] ]
} }

View File

@ -12,6 +12,7 @@ declare const INFERENCE_URL: string;
interface EngineSettings { interface EngineSettings {
ctx_len: number; ctx_len: number;
ngl: number; ngl: number;
cpu_threads: number;
cont_batching: boolean; cont_batching: boolean;
embedding: boolean; embedding: boolean;
} }
@ -24,3 +25,8 @@ interface ModelOperationResponse {
error?: any; error?: any;
modelFile?: string; modelFile?: string;
} }
interface ResourcesInfo {
numCpuPhysicalCore: number;
memAvailable: number;
}

View File

@ -12,7 +12,6 @@ import {
EventName, EventName,
MessageRequest, MessageRequest,
MessageStatus, MessageStatus,
ModelSettingParams,
ExtensionType, ExtensionType,
ThreadContent, ThreadContent,
ThreadMessage, ThreadMessage,
@ -41,6 +40,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
private static _engineSettings: EngineSettings = { private static _engineSettings: EngineSettings = {
ctx_len: 2048, ctx_len: 2048,
ngl: 100, ngl: 100,
cpu_threads: 1,
cont_batching: false, cont_batching: false,
embedding: false, embedding: false,
}; };

View File

@ -4,6 +4,7 @@ const path = require("path");
const { spawn } = require("child_process"); const { spawn } = require("child_process");
const tcpPortUsed = require("tcp-port-used"); const tcpPortUsed = require("tcp-port-used");
const fetchRetry = require("fetch-retry")(global.fetch); const fetchRetry = require("fetch-retry")(global.fetch);
const si = require("systeminformation");
const log = require("electron-log"); const log = require("electron-log");
@ -38,15 +39,21 @@ function stopModel(): Promise<ModelOperationResponse> {
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
* TODO: Should it be startModel instead? * TODO: Should it be startModel instead?
*/ */
function initModel(wrapper: any): Promise<ModelOperationResponse> { async function initModel(wrapper: any): Promise<ModelOperationResponse> {
currentModelFile = wrapper.modelFullPath; currentModelFile = wrapper.modelFullPath;
if (wrapper.model.engine !== "nitro") { if (wrapper.model.engine !== "nitro") {
return Promise.resolve({ error: "Not a nitro model" }); return Promise.resolve({ error: "Not a nitro model" });
} else { } else {
log.info("Started to load model " + wrapper.model.modelFullPath); // Gather system information for CPU physical cores and memory
const nitroResourceProbe = await getResourcesInfo();
console.log(
"Nitro with physical core: " + nitroResourceProbe.numCpuPhysicalCore
);
const settings = { const settings = {
llama_model_path: currentModelFile, llama_model_path: currentModelFile,
...wrapper.model.settings, ...wrapper.model.settings,
// This is critical and requires real system information
cpu_threads: nitroResourceProbe.numCpuPhysicalCore,
}; };
log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`); log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`);
return ( return (
@ -54,7 +61,7 @@ function initModel(wrapper: any): Promise<ModelOperationResponse> {
validateModelVersion() validateModelVersion()
.then(checkAndUnloadNitro) .then(checkAndUnloadNitro)
// 2. Spawn the Nitro subprocess // 2. Spawn the Nitro subprocess
.then(spawnNitroProcess) .then(await spawnNitroProcess(nitroResourceProbe))
// 4. Load the model into the Nitro subprocess (HTTP POST request) // 4. Load the model into the Nitro subprocess (HTTP POST request)
.then(() => loadLLMModel(settings)) .then(() => loadLLMModel(settings))
// 5. Check if the model is loaded successfully // 5. Check if the model is loaded successfully
@ -166,16 +173,14 @@ async function checkAndUnloadNitro() {
* Using child-process to spawn the process * Using child-process to spawn the process
* Should run exactly platform specified Nitro binary version * Should run exactly platform specified Nitro binary version
*/ */
async function spawnNitroProcess(): Promise<void> { async function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
return new Promise((resolve, reject) => { return new Promise(async (resolve, reject) => {
let binaryFolder = path.join(__dirname, "bin"); // Current directory by default let binaryFolder = path.join(__dirname, "bin"); // Current directory by default
let binaryName; let binaryName;
if (process.platform === "win32") { if (process.platform === "win32") {
// Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries
binaryName = "win-start.bat"; binaryName = "win-start.bat";
} else if (process.platform === "darwin") { } else if (process.platform === "darwin") {
// Mac OS platform
if (process.arch === "arm64") { if (process.arch === "arm64") {
binaryFolder = path.join(binaryFolder, "mac-arm64"); binaryFolder = path.join(binaryFolder, "mac-arm64");
} else { } else {
@ -183,15 +188,13 @@ async function spawnNitroProcess(): Promise<void> {
} }
binaryName = "nitro"; binaryName = "nitro";
} else { } else {
// Linux binaryName = "linux-start.sh";
// Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries
binaryName = "linux-start.sh"; // For other platforms
} }
const binaryPath = path.join(binaryFolder, binaryName); const binaryPath = path.join(binaryFolder, binaryName);
// Execute the binary // Execute the binary
subprocess = spawn(binaryPath, [1, "127.0.0.1", PORT], { subprocess = spawn(binaryPath, [1, LOCAL_HOST, PORT], {
cwd: binaryFolder, cwd: binaryFolder,
}); });
@ -211,7 +214,7 @@ async function spawnNitroProcess(): Promise<void> {
reject(`Nitro process exited. ${code ?? ""}`); reject(`Nitro process exited. ${code ?? ""}`);
}); });
tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => { tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
resolve(); resolve(nitroResourceProbe);
}); });
}); });
} }
@ -263,17 +266,30 @@ function validateModelVersion(): Promise<void> {
}); });
} }
/**
* Cleans up any registered resources.
* Its module specific function, should be called when application is closed
*/
function dispose() { function dispose() {
// clean other registered resources here // clean other registered resources here
killSubprocess(); killSubprocess();
} }
/**
* Get the system resources information
*/
async function getResourcesInfo(): Promise<ResourcesInfo> {
return new Promise(async (resolve) => {
const cpu = await si.cpu();
const mem = await si.mem();
const response = {
numCpuPhysicalCore: cpu.physicalCores,
memAvailable: mem.available,
};
resolve(response);
});
}
module.exports = { module.exports = {
initModel, initModel,
stopModel,
killSubprocess, killSubprocess,
dispose, dispose,
}; };

View File

@ -12,7 +12,6 @@ import {
EventName, EventName,
MessageRequest, MessageRequest,
MessageStatus, MessageStatus,
ModelSettingParams,
ExtensionType, ExtensionType,
ThreadContent, ThreadContent,
ThreadMessage, ThreadMessage,