From e3c1787d57dcc3f1afadfc46d936acbe89cbf4c8 Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 19 Dec 2023 11:10:07 +0700 Subject: [PATCH] fix: app failed to load model sometime due to race condition (#1071) --- .../inference-nitro-extension/package.json | 4 - .../inference-nitro-extension/src/module.ts | 199 ++++++------------ 2 files changed, 60 insertions(+), 143 deletions(-) diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 795046cbe..01de6a2b9 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -31,9 +31,7 @@ "dependencies": { "@janhq/core": "file:../../core", "download-cli": "^1.1.1", - "electron-log": "^5.0.1", "fetch-retry": "^5.0.6", - "kill-port": "^2.0.1", "path-browserify": "^1.0.1", "rxjs": "^7.8.1", "systeminformation": "^5.21.20", @@ -51,9 +49,7 @@ ], "bundleDependencies": [ "tcp-port-used", - "kill-port", "fetch-retry", - "electron-log", "systeminformation" ] } diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts index ac7b394c6..bc39e8fca 100644 --- a/extensions/inference-nitro-extension/src/module.ts +++ b/extensions/inference-nitro-extension/src/module.ts @@ -1,13 +1,10 @@ const fs = require("fs"); -const kill = require("kill-port"); const path = require("path"); const { spawn } = require("child_process"); const tcpPortUsed = require("tcp-port-used"); const fetchRetry = require("fetch-retry")(global.fetch); const si = require("systeminformation"); -const log = require("electron-log"); - // The PORT to use for the Nitro subprocess const PORT = 3928; const LOCAL_HOST = "127.0.0.1"; @@ -18,19 +15,17 @@ const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llama const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`; // The subprocess instance for Nitro -let subprocess = null; -let currentModelFile = null; +let subprocess = undefined; +let currentModelFile = undefined; +let currentSettings = undefined; /** * Stops a Nitro subprocess. * @param wrapper - The model wrapper. * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. */ -function stopModel(): Promise { - return new Promise((resolve, reject) => { - checkAndUnloadNitro(); - resolve({ error: undefined }); - }); +function stopModel(): Promise { + return killSubprocess(); } /** @@ -45,9 +40,7 @@ async function initModel(wrapper: any): Promise { if (wrapper.model.engine !== "nitro") { return Promise.resolve({ error: "Not a nitro model" }); } else { - // Gather system information for CPU physical cores and memory const nitroResourceProbe = await getResourcesInfo(); - // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt if (wrapper.model.settings.prompt_template) { const promptTemplate = wrapper.model.settings.prompt_template; @@ -60,31 +53,30 @@ async function initModel(wrapper: any): Promise { wrapper.model.settings.ai_prompt = prompt.ai_prompt; } - const settings = { + currentSettings = { llama_model_path: currentModelFile, ...wrapper.model.settings, // This is critical and requires real system information cpu_threads: nitroResourceProbe.numCpuPhysicalCore, }; - log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`); - return ( - // 1. Check if the port is used, if used, attempt to unload model / kill nitro process - validateModelVersion() - .then(checkAndUnloadNitro) - // 2. Spawn the Nitro subprocess - .then(await spawnNitroProcess(nitroResourceProbe)) - // 4. Load the model into the Nitro subprocess (HTTP POST request) - .then(() => loadLLMModel(settings)) - // 5. Check if the model is loaded successfully - .then(validateModelStatus) - .catch((err) => { - log.error("error: " + JSON.stringify(err)); - return { error: err, currentModelFile }; - }) - ); + return loadModel(nitroResourceProbe); } } +async function loadModel(nitroResourceProbe: any | undefined) { + // Gather system information for CPU physical cores and memory + if (!nitroResourceProbe) nitroResourceProbe = await getResourcesInfo(); + return killSubprocess() + .then(() => spawnNitroProcess(nitroResourceProbe)) + .then(() => loadLLMModel(currentSettings)) + .then(validateModelStatus) + .catch((err) => { + console.log("error: ", err); + // TODO: Broadcast error so app could display proper error message + return { error: err, currentModelFile }; + }); +} + function promptTemplateConverter(promptTemplate) { // Split the string using the markers const systemMarker = "{system_message}"; @@ -138,13 +130,8 @@ function loadLLMModel(settings): Promise { "Content-Type": "application/json", }, body: JSON.stringify(settings), - retries: 5, - retryDelay: 1000, - }).catch((err) => { - console.error(err); - log.error("error: " + JSON.stringify(err)); - // Fetch error, Nitro server might not started properly - throw new Error("Model loading failed."); + retries: 3, + retryDelay: 500, }); } @@ -164,67 +151,47 @@ async function validateModelStatus(): Promise { }, retries: 5, retryDelay: 500, - }) - .then(async (res: Response) => { - // If the response is OK, check model_loaded status. - if (res.ok) { - const body = await res.json(); - // If the model is loaded, return an empty object. - // Otherwise, return an object with an error message. - if (body.model_loaded) { - return { error: undefined }; - } + }).then(async (res: Response) => { + // If the response is OK, check model_loaded status. + if (res.ok) { + const body = await res.json(); + // If the model is loaded, return an empty object. + // Otherwise, return an object with an error message. + if (body.model_loaded) { + return { error: undefined }; } - return { error: "Model loading failed" }; - }) - .catch((err) => { - log.error("Model loading failed" + err.toString()); - return { error: `Model loading failed.` }; - }); + } + return { error: "Model loading failed" }; + }); } /** * Terminates the Nitro subprocess. * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. */ -function killSubprocess(): Promise { - fetch(NITRO_HTTP_KILL_URL, { +async function killSubprocess(): Promise { + const controller = new AbortController(); + setTimeout(() => controller.abort(), 5000); + console.log("Start requesting to kill Nitro..."); + return fetch(NITRO_HTTP_KILL_URL, { method: "DELETE", - }).catch((err) => { - console.error(err); - subprocess?.kill(); - kill(PORT, "tcp").then(console.log).catch(console.log); - subprocess = null; - }); - return + signal: controller.signal, + }) + .then(() => { + subprocess?.kill(); + subprocess = undefined; + }) + .catch(() => {}) + .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000)) + .then(() => console.log("Nitro is killed")); } - -/** - * Check port is used or not, if used, attempt to unload model - * If unload failed, kill the port - */ -async function checkAndUnloadNitro() { - return tcpPortUsed.check(PORT, LOCAL_HOST).then(async (inUse) => { - // If inUse - try unload or kill process, otherwise do nothing - if (inUse) { - // Attempt to unload model - return fetch(NITRO_HTTP_UNLOAD_MODEL_URL, { - method: "GET", - }).catch((err) => { - console.error(err); - // Fallback to kill the port - return killSubprocess(); - }); - } - }); -} - /** * Look for the Nitro binary and execute it * Using child-process to spawn the process * Should run exactly platform specified Nitro binary version */ -async function spawnNitroProcess(nitroResourceProbe: any): Promise { +function spawnNitroProcess(nitroResourceProbe: any): Promise { + console.log("Starting Nitro subprocess..."); return new Promise(async (resolve, reject) => { let binaryFolder = path.join(__dirname, "bin"); // Current directory by default let binaryName; @@ -243,7 +210,6 @@ async function spawnNitroProcess(nitroResourceProbe: any): Promise { } const binaryPath = path.join(binaryFolder, binaryName); - // Execute the binary subprocess = spawn(binaryPath, [1, LOCAL_HOST, PORT], { cwd: binaryFolder, @@ -255,77 +221,27 @@ async function spawnNitroProcess(nitroResourceProbe: any): Promise { }); subprocess.stderr.on("data", (data) => { - log.error("subprocess error:" + data.toString()); + console.log("subprocess error:" + data.toString()); console.error(`stderr: ${data}`); }); subprocess.on("close", (code) => { console.debug(`child process exited with code ${code}`); subprocess = null; - reject(`Nitro process exited. ${code ?? ""}`); + reject(`child process exited with code ${code}`); }); + tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => { resolve(nitroResourceProbe); }); }); } -/** - * Validate the model version, if it is GGUFv1, reject the promise - * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. - */ -function validateModelVersion(): Promise { - log.info("validateModelVersion"); - // Read the file - return new Promise((resolve, reject) => { - fs.open(currentModelFile, "r", (err, fd) => { - if (err) { - log.error("validateModelVersion error" + JSON.stringify(err)); - console.error(err.message); - reject(err); - return; - } - - // Buffer to store the byte - const buffer = Buffer.alloc(1); - - // Model version will be the 5th byte of the file - fs.read(fd, buffer, 0, 1, 4, (err, bytesRead, buffer) => { - if (err) { - log.error("validateModelVersion open error" + JSON.stringify(err)); - console.error(err.message); - fs.close(fd, (err) => { - log.error("validateModelVersion close error" + JSON.stringify(err)); - if (err) console.error(err.message); - }); - reject(err); - } else { - // Interpret the byte as ASCII - if (buffer[0] === 0x01) { - // This is GGUFv1, which is deprecated - reject("GGUFv1 model is deprecated, please try another model."); - } - } - - // Close the file descriptor - fs.close(fd, (err) => { - if (err) console.error(err.message); - }); - resolve(); - }); - }); - }); -} - -function dispose() { - // clean other registered resources here - killSubprocess(); -} - /** * Get the system resources information + * TODO: Move to Core so that it can be reused */ -async function getResourcesInfo(): Promise { +function getResourcesInfo(): Promise { return new Promise(async (resolve) => { const cpu = await si.cpu(); const mem = await si.mem(); @@ -338,6 +254,11 @@ async function getResourcesInfo(): Promise { }); } +function dispose() { + // clean other registered resources here + killSubprocess(); +} + module.exports = { initModel, stopModel,