From cafdaaaccdc302312697caf15e91a8e07a6d638f Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Sat, 9 Dec 2023 22:58:00 +0700
Subject: [PATCH 1/6] feat: Nitro sensing hardware init

---
 .../inference-nitro-extension/package.json    |  4 +-
 .../src/@types/global.d.ts                    |  5 +++
 .../inference-nitro-extension/src/module.ts   | 40 +++++++++++++++----
 3 files changed, 40 insertions(+), 9 deletions(-)
diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json
index ef74fff08..ecbbf17a8 100644
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@@ -36,6 +36,7 @@
     "kill-port": "^2.0.1",
     "path-browserify": "^1.0.1",
     "rxjs": "^7.8.1",
+    "systeminformation": "^5.21.20",
     "tcp-port-used": "^1.0.2",
     "ts-loader": "^9.5.0",
     "ulid": "^2.3.0"
@@ -52,6 +53,7 @@
     "tcp-port-used",
     "kill-port",
     "fetch-retry",
-    "electron-log"
+    "electron-log",
+    "systeminformation"
   ]
 }
diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts
index 642f10909..f93a3e4c9 100644
--- a/extensions/inference-nitro-extension/src/@types/global.d.ts
+++ b/extensions/inference-nitro-extension/src/@types/global.d.ts
@@ -24,3 +24,8 @@ interface ModelOperationResponse {
   error?: any;
   modelFile?: string;
 }
+
+interface ResourcesInfo {
+  numCpuPhysicalCore: number;
+  memAvailable: number;
+}
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts
index d36553f40..64a7393fc 100644
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@@ -4,6 +4,7 @@ const path = require("path");
 const { spawn } = require("child_process");
 const tcpPortUsed = require("tcp-port-used");
 const fetchRetry = require("fetch-retry")(global.fetch);
+const si = require("systeminformation");
 
 const log = require("electron-log");
 
@@ -167,7 +168,7 @@ async function checkAndUnloadNitro() {
  * Should run exactly platform specified Nitro binary version
  */
 async function spawnNitroProcess(): Promise<void> {
-  return new Promise((resolve, reject) => {
+  return new Promise(async (resolve, reject) => {
     let binaryFolder = path.join(__dirname, "bin"); // Current directory by default
     let binaryName;
 
@@ -190,10 +191,20 @@ async function spawnNitroProcess(): Promise<void> {
 
     const binaryPath = path.join(binaryFolder, binaryName);
 
+    // Gather system information for CPU physical cores and memory
+    const nitroResourceProbe = await getResourcesInfo();
+    console.log(
+      "Nitro with physical core: " + nitroResourceProbe.numCpuPhysicalCore
+    );
+
     // Execute the binary
-    subprocess = spawn(binaryPath, [1, "127.0.0.1", PORT], {
-      cwd: binaryFolder,
-    });
+    subprocess = spawn(
+      binaryPath,
+      [nitroResourceProbe.numCpuPhysicalCore, "127.0.0.1", PORT],
+      {
+        cwd: binaryFolder,
+      }
+    );
 
     // Handle subprocess output
     subprocess.stdout.on("data", (data) => {
@@ -263,15 +274,28 @@ function validateModelVersion(): Promise<void> {
   });
 }
 
-/**
- * Cleans up any registered resources.
- * Its module specific function, should be called when application is closed
- */
+
 function dispose() {
   // clean other registered resources here
   killSubprocess();
 }
 
+/**
+ * Get the system resources information
+ */
+async function getResourcesInfo(): Promise<ResourcesInfo> {
+  return new Promise(async (resolve) => {
+    const cpu = await si.cpu();
+    const mem = await si.mem();
+
+    const response = {
+      numCpuPhysicalCore: cpu.physicalCores,
+      memAvailable: mem.available,
+    };
+    resolve(response);
+  });
+}
+
 module.exports = {
   initModel,
   killSubprocess,

From f528e9ea7739460ddecac357a9be7d0ec60ae9e4 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Mon, 11 Dec 2023 21:22:56 +0700
Subject: [PATCH 2/6] fix: Update inference nitro with n_threads equals to
 physcial core num

---
 .../inference-nitro-extension/src/module.ts   | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts
index 64a7393fc..80d474b94 100644
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@@ -39,15 +39,21 @@ function stopModel(): Promise<ModelOperationResponse> {
  * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
  * TODO: Should it be startModel instead?
  */
-function initModel(wrapper: any): Promise<ModelOperationResponse> {
+async function initModel(wrapper: any): Promise<ModelOperationResponse> {
   currentModelFile = wrapper.modelFullPath;
   if (wrapper.model.engine !== "nitro") {
     return Promise.resolve({ error: "Not a nitro model" });
   } else {
-    log.info("Started to load model " + wrapper.model.modelFullPath);
+    // Gather system information for CPU physical cores and memory
+    const nitroResourceProbe = await getResourcesInfo();
+    console.log(
+      "Nitro with physical core: " + nitroResourceProbe.numCpuPhysicalCore
+    );
     const settings = {
       llama_model_path: currentModelFile,
       ...wrapper.model.settings,
+      // This is critical and requires real system information
+      n_threads: nitroResourceProbe.numCpuPhysicalCore,
     };
     log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`);
     return (
@@ -55,7 +61,7 @@ function initModel(wrapper: any): Promise<ModelOperationResponse> {
       validateModelVersion()
         .then(checkAndUnloadNitro)
         // 2. Spawn the Nitro subprocess
-        .then(spawnNitroProcess)
+        .then(await spawnNitroProcess(nitroResourceProbe))
         // 4. Load the model into the Nitro subprocess (HTTP POST request)
         .then(() => loadLLMModel(settings))
         // 5. Check if the model is loaded successfully
@@ -167,7 +173,7 @@ async function checkAndUnloadNitro() {
  * Using child-process to spawn the process
  * Should run exactly platform specified Nitro binary version
  */
-async function spawnNitroProcess(): Promise<void> {
+async function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
   return new Promise(async (resolve, reject) => {
     let binaryFolder = path.join(__dirname, "bin"); // Current directory by default
     let binaryName;
@@ -191,12 +197,6 @@ async function spawnNitroProcess(): Promise<void> {
 
     const binaryPath = path.join(binaryFolder, binaryName);
 
-    // Gather system information for CPU physical cores and memory
-    const nitroResourceProbe = await getResourcesInfo();
-    console.log(
-      "Nitro with physical core: " + nitroResourceProbe.numCpuPhysicalCore
-    );
-
     // Execute the binary
     subprocess = spawn(
       binaryPath,
@@ -222,7 +222,7 @@ async function spawnNitroProcess(): Promise<void> {
       reject(`Nitro process exited. ${code ?? ""}`);
     });
     tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
-      resolve();
+      resolve(nitroResourceProbe);
     });
   });
 }

From 0e63689eae163cab4bb42c7490becace4462d229 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Mon, 11 Dec 2023 21:37:23 +0700
Subject: [PATCH 3/6] fix: inference engine nitro stopModel undefined in
 module.ts

---
 extensions/inference-nitro-extension/src/module.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts
index 80d474b94..b5ba69f5a 100644
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@@ -298,6 +298,7 @@ async function getResourcesInfo(): Promise<ResourcesInfo> {
 
 module.exports = {
   initModel,
+  stopModel,
   killSubprocess,
   dispose,
 };

From 16c66e968c08167e1891735fce39df9628a1ae94 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 12 Dec 2023 05:58:08 +0700
Subject: [PATCH 4/6] chore: Update n_threads to cpu_threads

---
 core/src/types/index.ts                                     | 1 +
 extensions/inference-nitro-extension/src/@types/global.d.ts | 1 +
 extensions/inference-nitro-extension/src/index.ts           | 2 +-
 extensions/inference-nitro-extension/src/module.ts          | 2 +-
 extensions/inference-openai-extension/src/index.ts          | 1 -
 5 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/core/src/types/index.ts b/core/src/types/index.ts
index d5b51cfc0..81ea7e14e 100644
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@@ -275,6 +275,7 @@ export type ModelSettingParams = {
   ngl?: number;
   embedding?: boolean;
   n_parallel?: number;
+  cpu_threads: number;
   system_prompt?: string;
   user_prompt?: string;
   ai_prompt?: string;
diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts
index f93a3e4c9..62eb65e52 100644
--- a/extensions/inference-nitro-extension/src/@types/global.d.ts
+++ b/extensions/inference-nitro-extension/src/@types/global.d.ts
@@ -12,6 +12,7 @@ declare const INFERENCE_URL: string;
 interface EngineSettings {
   ctx_len: number;
   ngl: number;
+  cpu_threads: number;
   cont_batching: boolean;
   embedding: boolean;
 }
diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index e5f3f4360..f2fbf0d34 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -12,7 +12,6 @@ import {
   EventName,
   MessageRequest,
   MessageStatus,
-  ModelSettingParams,
   ExtensionType,
   ThreadContent,
   ThreadMessage,
@@ -41,6 +40,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
   private static _engineSettings: EngineSettings = {
     ctx_len: 2048,
     ngl: 100,
+    cpu_threads: 1,
     cont_batching: false,
     embedding: false,
   };
diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts
index b5ba69f5a..266566e91 100644
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@@ -53,7 +53,7 @@ async function initModel(wrapper: any): Promise<ModelOperationResponse> {
       llama_model_path: currentModelFile,
       ...wrapper.model.settings,
       // This is critical and requires real system information
-      n_threads: nitroResourceProbe.numCpuPhysicalCore,
+      cpu_threads: nitroResourceProbe.numCpuPhysicalCore,
     };
     log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`);
     return (
diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 7e3e6e71e..6bab563dd 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -12,7 +12,6 @@ import {
   EventName,
   MessageRequest,
   MessageStatus,
-  ModelSettingParams,
   ExtensionType,
   ThreadContent,
   ThreadMessage,

From 577921f21f56fbd5bd7c64b3d0ce68a2c4d5f357 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 12 Dec 2023 06:12:29 +0700
Subject: [PATCH 5/6] chore: add cpu_threads to default model settings as 1

---
 core/src/types/index.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/types/index.ts b/core/src/types/index.ts
index 81ea7e14e..7314a4ae3 100644
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@@ -275,7 +275,7 @@ export type ModelSettingParams = {
   ngl?: number;
   embedding?: boolean;
   n_parallel?: number;
-  cpu_threads: number;
+  cpu_threads?: number;
   system_prompt?: string;
   user_prompt?: string;
   ai_prompt?: string;

From 14f83ddb7078ca826130eea67b86504819a77232 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 12 Dec 2023 07:27:25 +0700
Subject: [PATCH 6/6] fix: Revert drogon thread to 1 instead of CPU num

---
 .../inference-nitro-extension/src/module.ts     | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts
index 266566e91..047581dbe 100644
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@@ -179,10 +179,8 @@ async function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
     let binaryName;
 
     if (process.platform === "win32") {
-      // Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries
       binaryName = "win-start.bat";
     } else if (process.platform === "darwin") {
-      // Mac OS platform
       if (process.arch === "arm64") {
         binaryFolder = path.join(binaryFolder, "mac-arm64");
       } else {
@@ -190,21 +188,15 @@ async function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
       }
       binaryName = "nitro";
     } else {
-      // Linux
-      // Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries
-      binaryName = "linux-start.sh"; // For other platforms
+      binaryName = "linux-start.sh";
     }
 
     const binaryPath = path.join(binaryFolder, binaryName);
 
     // Execute the binary
-    subprocess = spawn(
-      binaryPath,
-      [nitroResourceProbe.numCpuPhysicalCore, "127.0.0.1", PORT],
-      {
-        cwd: binaryFolder,
-      }
-    );
+    subprocess = spawn(binaryPath, [1, LOCAL_HOST, PORT], {
+      cwd: binaryFolder,
+    });
 
     // Handle subprocess output
     subprocess.stdout.on("data", (data) => {
@@ -274,7 +266,6 @@ function validateModelVersion(): Promise<void> {
   });
 }
 
-
 function dispose() {
   // clean other registered resources here
   killSubprocess();