From e3c1787d57dcc3f1afadfc46d936acbe89cbf4c8 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 19 Dec 2023 11:10:07 +0700
Subject: [PATCH] fix: app failed to load model sometime due to race condition
 (#1071)

---
 .../inference-nitro-extension/package.json    |   4 -
 .../inference-nitro-extension/src/module.ts   | 199 ++++++------------
 2 files changed, 60 insertions(+), 143 deletions(-)
diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json
index 795046cbe..01de6a2b9 100644
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@@ -31,9 +31,7 @@
   "dependencies": {
     "@janhq/core": "file:../../core",
     "download-cli": "^1.1.1",
-    "electron-log": "^5.0.1",
     "fetch-retry": "^5.0.6",
-    "kill-port": "^2.0.1",
     "path-browserify": "^1.0.1",
     "rxjs": "^7.8.1",
     "systeminformation": "^5.21.20",
@@ -51,9 +49,7 @@
   ],
   "bundleDependencies": [
     "tcp-port-used",
-    "kill-port",
     "fetch-retry",
-    "electron-log",
     "systeminformation"
   ]
 }
diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts
index ac7b394c6..bc39e8fca 100644
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@@ -1,13 +1,10 @@
 const fs = require("fs");
-const kill = require("kill-port");
 const path = require("path");
 const { spawn } = require("child_process");
 const tcpPortUsed = require("tcp-port-used");
 const fetchRetry = require("fetch-retry")(global.fetch);
 const si = require("systeminformation");
 
-const log = require("electron-log");
-
 // The PORT to use for the Nitro subprocess
 const PORT = 3928;
 const LOCAL_HOST = "127.0.0.1";
@@ -18,19 +15,17 @@ const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llama
 const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`;
 
 // The subprocess instance for Nitro
-let subprocess = null;
-let currentModelFile = null;
+let subprocess = undefined;
+let currentModelFile = undefined;
+let currentSettings = undefined;
 
 /**
  * Stops a Nitro subprocess.
  * @param wrapper - The model wrapper.
  * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
  */
-function stopModel(): Promise<ModelOperationResponse> {
-  return new Promise((resolve, reject) => {
-    checkAndUnloadNitro();
-    resolve({ error: undefined });
-  });
+function stopModel(): Promise<void> {
+  return killSubprocess();
 }
 
 /**
@@ -45,9 +40,7 @@ async function initModel(wrapper: any): Promise<ModelOperationResponse> {
   if (wrapper.model.engine !== "nitro") {
     return Promise.resolve({ error: "Not a nitro model" });
   } else {
-    // Gather system information for CPU physical cores and memory
     const nitroResourceProbe = await getResourcesInfo();
-
     // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
     if (wrapper.model.settings.prompt_template) {
       const promptTemplate = wrapper.model.settings.prompt_template;
@@ -60,31 +53,30 @@ async function initModel(wrapper: any): Promise<ModelOperationResponse> {
       wrapper.model.settings.ai_prompt = prompt.ai_prompt;
     }
 
-    const settings = {
+    currentSettings = {
       llama_model_path: currentModelFile,
       ...wrapper.model.settings,
       // This is critical and requires real system information
       cpu_threads: nitroResourceProbe.numCpuPhysicalCore,
     };
-    log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`);
-    return (
-      // 1. Check if the port is used, if used, attempt to unload model / kill nitro process
-      validateModelVersion()
-        .then(checkAndUnloadNitro)
-        // 2. Spawn the Nitro subprocess
-        .then(await spawnNitroProcess(nitroResourceProbe))
-        // 4. Load the model into the Nitro subprocess (HTTP POST request)
-        .then(() => loadLLMModel(settings))
-        // 5. Check if the model is loaded successfully
-        .then(validateModelStatus)
-        .catch((err) => {
-          log.error("error: " + JSON.stringify(err));
-          return { error: err, currentModelFile };
-        })
-    );
+    return loadModel(nitroResourceProbe);
   }
 }
 
+async function loadModel(nitroResourceProbe: any | undefined) {
+  // Gather system information for CPU physical cores and memory
+  if (!nitroResourceProbe) nitroResourceProbe = await getResourcesInfo();
+  return killSubprocess()
+    .then(() => spawnNitroProcess(nitroResourceProbe))
+    .then(() => loadLLMModel(currentSettings))
+    .then(validateModelStatus)
+    .catch((err) => {
+      console.log("error: ", err);
+      // TODO: Broadcast error so app could display proper error message
+      return { error: err, currentModelFile };
+    });
+}
+
 function promptTemplateConverter(promptTemplate) {
   // Split the string using the markers
   const systemMarker = "{system_message}";
@@ -138,13 +130,8 @@ function loadLLMModel(settings): Promise<Response> {
       "Content-Type": "application/json",
     },
     body: JSON.stringify(settings),
-    retries: 5,
-    retryDelay: 1000,
-  }).catch((err) => {
-    console.error(err);
-    log.error("error: " + JSON.stringify(err));
-    // Fetch error, Nitro server might not started properly
-    throw new Error("Model loading failed.");
+    retries: 3,
+    retryDelay: 500,
   });
 }
 
@@ -164,67 +151,47 @@ async function validateModelStatus(): Promise<ModelOperationResponse> {
     },
     retries: 5,
     retryDelay: 500,
-  })
-    .then(async (res: Response) => {
-      // If the response is OK, check model_loaded status.
-      if (res.ok) {
-        const body = await res.json();
-        // If the model is loaded, return an empty object.
-        // Otherwise, return an object with an error message.
-        if (body.model_loaded) {
-          return { error: undefined };
-        }
+  }).then(async (res: Response) => {
+    // If the response is OK, check model_loaded status.
+    if (res.ok) {
+      const body = await res.json();
+      // If the model is loaded, return an empty object.
+      // Otherwise, return an object with an error message.
+      if (body.model_loaded) {
+        return { error: undefined };
       }
-      return { error: "Model loading failed" };
-    })
-    .catch((err) => {
-      log.error("Model loading failed" + err.toString());
-      return { error: `Model loading failed.` };
-    });
+    }
+    return { error: "Model loading failed" };
+  });
 }
 
 /**
  * Terminates the Nitro subprocess.
  * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
  */
-function killSubprocess(): Promise<void> {
-  fetch(NITRO_HTTP_KILL_URL, {
+async function killSubprocess(): Promise<void> {
+  const controller = new AbortController();
+  setTimeout(() => controller.abort(), 5000);
+  console.log("Start requesting to kill Nitro...");
+  return fetch(NITRO_HTTP_KILL_URL, {
     method: "DELETE",
-  }).catch((err) => {
-    console.error(err);
-    subprocess?.kill();
-    kill(PORT, "tcp").then(console.log).catch(console.log);
-    subprocess = null;
-  });
-  return
+    signal: controller.signal,
+  })
+    .then(() => {
+      subprocess?.kill();
+      subprocess = undefined;
+    })
+    .catch(() => {})
+    .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
+    .then(() => console.log("Nitro is killed"));
 }
-
-/**
- * Check port is used or not, if used, attempt to unload model
- * If unload failed, kill the port
- */
-async function checkAndUnloadNitro() {
-  return tcpPortUsed.check(PORT, LOCAL_HOST).then(async (inUse) => {
-    // If inUse - try unload or kill process, otherwise do nothing
-    if (inUse) {
-      // Attempt to unload model
-      return fetch(NITRO_HTTP_UNLOAD_MODEL_URL, {
-        method: "GET",
-      }).catch((err) => {
-        console.error(err);
-        // Fallback to kill the port
-        return killSubprocess();
-      });
-    }
-  });
-}
-
 /**
  * Look for the Nitro binary and execute it
  * Using child-process to spawn the process
  * Should run exactly platform specified Nitro binary version
  */
-async function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
+function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
+  console.log("Starting Nitro subprocess...");
   return new Promise(async (resolve, reject) => {
     let binaryFolder = path.join(__dirname, "bin"); // Current directory by default
     let binaryName;
@@ -243,7 +210,6 @@ async function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
     }
 
     const binaryPath = path.join(binaryFolder, binaryName);
-
     // Execute the binary
     subprocess = spawn(binaryPath, [1, LOCAL_HOST, PORT], {
       cwd: binaryFolder,
@@ -255,77 +221,27 @@ async function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
     });
 
     subprocess.stderr.on("data", (data) => {
-      log.error("subprocess error:" + data.toString());
+      console.log("subprocess error:" + data.toString());
       console.error(`stderr: ${data}`);
     });
 
     subprocess.on("close", (code) => {
       console.debug(`child process exited with code ${code}`);
       subprocess = null;
-      reject(`Nitro process exited. ${code ?? ""}`);
+      reject(`child process exited with code ${code}`);
     });
+
     tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
       resolve(nitroResourceProbe);
     });
   });
 }
 
-/**
- * Validate the model version, if it is GGUFv1, reject the promise
- * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
- */
-function validateModelVersion(): Promise<void> {
-  log.info("validateModelVersion");
-  // Read the file
-  return new Promise((resolve, reject) => {
-    fs.open(currentModelFile, "r", (err, fd) => {
-      if (err) {
-        log.error("validateModelVersion error" + JSON.stringify(err));
-        console.error(err.message);
-        reject(err);
-        return;
-      }
-
-      // Buffer to store the byte
-      const buffer = Buffer.alloc(1);
-
-      // Model version will be the 5th byte of the file
-      fs.read(fd, buffer, 0, 1, 4, (err, bytesRead, buffer) => {
-        if (err) {
-          log.error("validateModelVersion open error" + JSON.stringify(err));
-          console.error(err.message);
-          fs.close(fd, (err) => {
-            log.error("validateModelVersion close error" + JSON.stringify(err));
-            if (err) console.error(err.message);
-          });
-          reject(err);
-        } else {
-          // Interpret the byte as ASCII
-          if (buffer[0] === 0x01) {
-            // This is GGUFv1, which is deprecated
-            reject("GGUFv1 model is deprecated, please try another model.");
-          }
-        }
-
-        // Close the file descriptor
-        fs.close(fd, (err) => {
-          if (err) console.error(err.message);
-        });
-        resolve();
-      });
-    });
-  });
-}
-
-function dispose() {
-  // clean other registered resources here
-  killSubprocess();
-}
-
 /**
  * Get the system resources information
+ * TODO: Move to Core so that it can be reused
  */
-async function getResourcesInfo(): Promise<ResourcesInfo> {
+function getResourcesInfo(): Promise<ResourcesInfo> {
   return new Promise(async (resolve) => {
     const cpu = await si.cpu();
     const mem = await si.mem();
@@ -338,6 +254,11 @@ async function getResourcesInfo(): Promise<ResourcesInfo> {
   });
 }
 
+function dispose() {
+  // clean other registered resources here
+  killSubprocess();
+}
+
 module.exports = {
   initModel,
   stopModel,