refactor: introduce node module in nitro extension (#1630)

2024-01-17 11:28:54 +07:00 · 2024-01-17 11:28:54 +07:00 · f4f861d0e9
commit f4f861d0e9
parent db987e88f9
12 changed files with 782 additions and 651 deletions
--- a/core/package.json
+++ b/core/package.json
@ -15,13 +15,6 @@
    "dist"
  ],
  "author": "Jan <service@jan.ai>",
  "repository": {
    "type": "git",
    "url": ""
  },
  "engines": {
    "node": ">=6.0.0"
  },
  "exports": {
    ".": "./dist/core.umd.js",
    "./sdk": "./dist/core.umd.js",
@ -49,53 +42,6 @@
    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
    "start": "rollup -c rollup.config.ts -w"
  },
  "lint-staged": {
    "{src,test}/**/*.ts": [
      "prettier --write",
      "git add"
    ]
  },
  "config": {
    "commitizen": {
      "path": "node_modules/cz-conventional-changelog"
    }
  },
  "jest": {
    "transform": {
      ".(ts|tsx)": "ts-jest"
    },
    "testEnvironment": "node",
    "testRegex": "(/__tests__/.*|\\.(test|spec))\\.(ts|tsx|js)$",
    "moduleFileExtensions": [
      "ts",
      "tsx",
      "js"
    ],
    "coveragePathIgnorePatterns": [
      "/node_modules/",
      "/test/"
    ],
    "coverageThreshold": {
      "global": {
        "branches": 90,
        "functions": 95,
        "lines": 95,
        "statements": 95
      }
    },
    "collectCoverageFrom": [
      "src/*.{js,ts}"
    ]
  },
  "prettier": {
    "semi": false,
    "singleQuote": true
  },
  "commitlint": {
    "extends": [
      "@commitlint/config-conventional"
    ]
  },
  "devDependencies": {
    "@types/node": "^12.0.2",
    "rollup": "^2.38.5",
@ -104,7 +50,6 @@
    "rollup-plugin-node-resolve": "^5.2.0",
    "rollup-plugin-sourcemaps": "^0.6.3",
    "rollup-plugin-typescript2": "^0.36.0",
    "ts-node": "^7.0.1",
    "tslib": "^2.6.2",
    "typescript": "^5.2.2"
  }
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -104,6 +104,9 @@ export type ModelSettingParams = {
  n_parallel?: number
  cpu_threads?: number
  prompt_template?: string
  system_prompt?: string
  ai_prompt?: string
  user_prompt?: string
 }
 /**
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@ -3,11 +3,11 @@
  "version": "1.0.0",
  "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See nitro.jan.ai",
  "main": "dist/index.js",
-  "module": "dist/module.js",
+  "node": "dist/node/index.cjs.js",
  "author": "Jan <service@jan.ai>",
  "license": "AGPL-3.0",
  "scripts": {
-    "build": "tsc -b . && webpack --config webpack.config.js",
+    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro",
    "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
    "downloadnitro:win32": "download.bat",
@ -19,24 +19,33 @@
  },
  "exports": {
    ".": "./dist/index.js",
-    "./main": "./dist/module.js"
+    "./main": "./dist/node/index.cjs.js"
  },
  "devDependencies": {
    "@rollup/plugin-commonjs": "^25.0.7",
    "@rollup/plugin-json": "^6.1.0",
    "@rollup/plugin-node-resolve": "^15.2.3",
    "@types/node": "^20.11.4",
    "@types/tcp-port-used": "^1.0.4",
    "cpx": "^1.5.0",
    "download-cli": "^1.1.1",
    "rimraf": "^3.0.2",
    "rollup": "^2.38.5",
    "rollup-plugin-define": "^1.0.1",
    "rollup-plugin-sourcemaps": "^0.6.3",
    "rollup-plugin-typescript2": "^0.36.0",
    "run-script-os": "^1.1.6",
-    "webpack": "^5.88.2",
+    "typescript": "^5.3.3"
    "webpack-cli": "^5.1.4"
  },
  "dependencies": {
    "@janhq/core": "file:../../core",
-    "download-cli": "^1.1.1",
+    "@rollup/plugin-replace": "^5.0.5",
    "@types/os-utils": "^0.0.4",
    "fetch-retry": "^5.0.6",
    "os-utils": "^0.0.14",
    "path-browserify": "^1.0.1",
    "rxjs": "^7.8.1",
    "tcp-port-used": "^1.0.2",
    "ts-loader": "^9.5.0",
    "ulid": "^2.3.0"
  },
  "engines": {
--- a/extensions/inference-nitro-extension/rollup.config.ts
+++ b/extensions/inference-nitro-extension/rollup.config.ts
@ -0,0 +1,77 @@
 import resolve from "@rollup/plugin-node-resolve";
 import commonjs from "@rollup/plugin-commonjs";
 import sourceMaps from "rollup-plugin-sourcemaps";
 import typescript from "rollup-plugin-typescript2";
 import json from "@rollup/plugin-json";
 import replace from "@rollup/plugin-replace";
 const packageJson = require("./package.json");
 const pkg = require("./package.json");
 export default [
  {
    input: `src/index.ts`,
    output: [{ file: pkg.main, format: "es", sourcemap: true }],
    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
    external: [],
    watch: {
      include: "src/**",
    },
    plugins: [
      replace({
        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
        INFERENCE_URL: JSON.stringify(
          process.env.INFERENCE_URL ||
            "http://127.0.0.1:3928/inferences/llamacpp/chat_completion"
        ),
        TROUBLESHOOTING_URL: JSON.stringify(
          "https://jan.ai/guides/troubleshooting"
        ),
      }),
      // Allow json resolution
      json(),
      //     Compile TypeScript files
      typescript({ useTsconfigDeclarationDir: true }),
      // Compile TypeScript files
      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
      commonjs(),
      // Allow node_modules resolution, so you can use 'external' to control
      // which external modules to include in the bundle
      // https://github.com/rollup/rollup-plugin-node-resolve#usage
      resolve({
        extensions: [".js", ".ts", ".svelte"],
      }),
      // Resolve source maps to the original source
      sourceMaps(),
    ],
  },
  {
    input: `src/node/index.ts`,
    output: [
      { file: "dist/node/index.cjs.js", format: "cjs", sourcemap: true },
    ],
    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
    external: ["@janhq/core/node"],
    watch: {
      include: "src/node/**",
    },
    plugins: [
      // Allow json resolution
      json(),
      // Compile TypeScript files
      typescript({ useTsconfigDeclarationDir: true }),
      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
      commonjs(),
      // Allow node_modules resolution, so you can use 'external' to control
      // which external modules to include in the bundle
      // https://github.com/rollup/rollup-plugin-node-resolve#usage
      resolve({
        extensions: [".ts", ".js", ".json"],
      }),
      // Resolve source maps to the original source
      sourceMaps(),
    ],
  },
 ];
--- a/extensions/inference-nitro-extension/src/@types/global.d.ts
+++ b/extensions/inference-nitro-extension/src/@types/global.d.ts
@ -1,4 +1,4 @@
-declare const MODULE: string;
+declare const NODE: string;
 declare const INFERENCE_URL: string;
 declare const TROUBLESHOOTING_URL: string;
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@ -26,7 +26,6 @@ import {
 } from "@janhq/core";
 import { requestInference } from "./helpers/sse";
 import { ulid } from "ulid";
 import { join } from "path";
 /**
 * A class that implements the InferenceExtension interface from the @janhq/core package.
@ -43,7 +42,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
   */
  private static readonly _intervalHealthCheck = 5 * 1000;
-  private _currentModel: Model;
+  private _currentModel: Model | undefined;
  private _engineSettings: EngineSettings = {
    ctx_len: 2048,
@ -82,7 +81,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
    if (!(await fs.existsSync(JanInferenceNitroExtension._homeDir))) {
      await fs
        .mkdirSync(JanInferenceNitroExtension._homeDir)
-        .catch((err) => console.debug(err));
+        .catch((err: Error) => console.debug(err));
    }
    if (!(await fs.existsSync(JanInferenceNitroExtension._settingsDir)))
@ -90,7 +89,9 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
    this.writeDefaultEngineSettings();
    // Events subscription
-    events.on(EventName.OnMessageSent, (data) => this.onMessageRequest(data));
+    events.on(EventName.OnMessageSent, (data: MessageRequest) =>
      this.onMessageRequest(data)
    );
    events.on(EventName.OnModelInit, (model: Model) => this.onModelInit(model));
@ -99,7 +100,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
    events.on(EventName.OnInferenceStopped, () => this.onInferenceStopped());
    // Attempt to fetch nvidia info
-    await executeOnMain(MODULE, "updateNvidiaInfo", {});
+    await executeOnMain(NODE, "updateNvidiaInfo", {});
  }
  /**
@ -109,10 +110,10 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
  private async writeDefaultEngineSettings() {
    try {
-      const engineFile = join(
+      const engineFile = await joinPath([
        JanInferenceNitroExtension._homeDir,
-        JanInferenceNitroExtension._engineMetadataFileName
+        JanInferenceNitroExtension._engineMetadataFileName,
-      );
+      ]);
      if (await fs.existsSync(engineFile)) {
        const engine = await fs.readFileSync(engineFile, "utf-8");
        this._engineSettings =
@ -133,12 +134,12 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
    const modelFullPath = await joinPath(["models", model.id]);
-    const nitroInitResult = await executeOnMain(MODULE, "initModel", {
+    const nitroInitResult = await executeOnMain(NODE, "runModel", {
-      modelFullPath: modelFullPath,
+      modelFullPath,
-      model: model,
+      model,
    });
-    if (nitroInitResult.error === null) {
+    if (nitroInitResult?.error) {
      events.emit(EventName.OnModelFail, model);
      return;
    }
@ -155,12 +156,11 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
  private async onModelStop(model: Model) {
    if (model.engine !== "nitro") return;
-    await executeOnMain(MODULE, "stopModel");
+    await executeOnMain(NODE, "stopModel");
    events.emit(EventName.OnModelStopped, {});
    // stop the periocally health check
    if (this.getNitroProcesHealthIntervalId) {
      console.debug("Stop calling Nitro process health check");
      clearInterval(this.getNitroProcesHealthIntervalId);
      this.getNitroProcesHealthIntervalId = undefined;
    }
@ -170,7 +170,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
   * Periodically check for nitro process's health.
   */
  private async periodicallyGetNitroHealth(): Promise<void> {
-    const health = await executeOnMain(MODULE, "getCurrentNitroProcessInfo");
+    const health = await executeOnMain(NODE, "getCurrentNitroProcessInfo");
    const isRunning = this.nitroProcessInfo?.isRunning ?? false;
    if (isRunning && health.isRunning === false) {
@ -204,6 +204,8 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
    };
    return new Promise(async (resolve, reject) => {
      if (!this._currentModel) return Promise.reject("No model loaded");
      requestInference(data.messages ?? [], this._currentModel).subscribe({
        next: (_content) => {},
        complete: async () => {
@ -223,7 +225,9 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
   * @param {MessageRequest} data - The data for the new message request.
   */
  private async onMessageRequest(data: MessageRequest) {
-    if (data.model.engine !== "nitro") return;
+    if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) {
      return;
    }
    const timestamp = Date.now();
    const message: ThreadMessage = {
@ -242,11 +246,12 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
    this.isCancelled = false;
    this.controller = new AbortController();
-    requestInference(
+    // @ts-ignore
-      data.messages ?? [],
+    const model: Model = {
-      { ...this._currentModel, ...data.model },
+      ...(this._currentModel || {}),
-      this.controller
+      ...(data.model || {}),
-    ).subscribe({
+    };
    requestInference(data.messages ?? [], model, this.controller).subscribe({
      next: (content) => {
        const messageContent: ThreadContent = {
          type: ContentType.Text,
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@ -1,514 +0,0 @@
 const fs = require("fs");
 const path = require("path");
 const { exec, spawn } = require("child_process");
 const tcpPortUsed = require("tcp-port-used");
 const fetchRetry = require("fetch-retry")(global.fetch);
 const osUtils = require("os-utils");
 const { readFileSync, writeFileSync, existsSync } = require("fs");
 const { log } = require("@janhq/core/node");
 // The PORT to use for the Nitro subprocess
 const PORT = 3928;
 const LOCAL_HOST = "127.0.0.1";
 const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`;
 const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`;
 const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`;
 const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`;
 const SUPPORTED_MODEL_FORMAT = ".gguf";
 const NVIDIA_INFO_FILE = path.join(
  require("os").homedir(),
  "jan",
  "settings",
  "settings.json"
 );
 // The subprocess instance for Nitro
 let subprocess = undefined;
 let currentModelFile: string = undefined;
 let currentSettings = undefined;
 let nitroProcessInfo = undefined;
 /**
 * Default GPU settings
 **/
 const DEFALT_SETTINGS = {
  notify: true,
  run_mode: "cpu",
  nvidia_driver: {
    exist: false,
    version: "",
  },
  cuda: {
    exist: false,
    version: "",
  },
  gpus: [],
  gpu_highest_vram: "",
 };
 /**
 * Stops a Nitro subprocess.
 * @param wrapper - The model wrapper.
 * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
 */
 function stopModel(): Promise<void> {
  return killSubprocess();
 }
 /**
 * Initializes a Nitro subprocess to load a machine learning model.
 * @param wrapper - The model wrapper.
 * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
 * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
 * TODO: Should it be startModel instead?
 */
 async function initModel(wrapper: any): Promise<ModelOperationResponse> {
  currentModelFile = wrapper.modelFullPath;
  const janRoot = path.join(require("os").homedir(), "jan");
  if (!currentModelFile.includes(janRoot)) {
    currentModelFile = path.join(janRoot, currentModelFile);
  }
  const files: string[] = fs.readdirSync(currentModelFile);
  // Look for GGUF model file
  const ggufBinFile = files.find(
    (file) =>
      file === path.basename(currentModelFile) ||
      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
  );
  currentModelFile = path.join(currentModelFile, ggufBinFile);
  if (wrapper.model.engine !== "nitro") {
    return Promise.resolve({ error: "Not a nitro model" });
  } else {
    const nitroResourceProbe = await getResourcesInfo();
    // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
    if (wrapper.model.settings.prompt_template) {
      const promptTemplate = wrapper.model.settings.prompt_template;
      const prompt = promptTemplateConverter(promptTemplate);
      if (prompt.error) {
        return Promise.resolve({ error: prompt.error });
      }
      wrapper.model.settings.system_prompt = prompt.system_prompt;
      wrapper.model.settings.user_prompt = prompt.user_prompt;
      wrapper.model.settings.ai_prompt = prompt.ai_prompt;
    }
    currentSettings = {
      llama_model_path: currentModelFile,
      ...wrapper.model.settings,
      // This is critical and requires real system information
      cpu_threads: nitroResourceProbe.numCpuPhysicalCore,
    };
    return loadModel(nitroResourceProbe);
  }
 }
 async function loadModel(nitroResourceProbe: any | undefined) {
  // Gather system information for CPU physical cores and memory
  if (!nitroResourceProbe) nitroResourceProbe = await getResourcesInfo();
  return killSubprocess()
    .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
    .then(() => {
      /**
       * There is a problem with Windows process manager
       * Should wait for awhile to make sure the port is free and subprocess is killed
       * The tested threshold is 500ms
       **/
      if (process.platform === "win32") {
        return new Promise((resolve) => setTimeout(resolve, 500));
      } else {
        return Promise.resolve();
      }
    })
    .then(() => spawnNitroProcess(nitroResourceProbe))
    .then(() => loadLLMModel(currentSettings))
    .then(validateModelStatus)
    .catch((err) => {
      log(`[NITRO]::Error: ${err}`);
      // TODO: Broadcast error so app could display proper error message
      return { error: err, currentModelFile };
    });
 }
 function promptTemplateConverter(promptTemplate) {
  // Split the string using the markers
  const systemMarker = "{system_message}";
  const promptMarker = "{prompt}";
  if (
    promptTemplate.includes(systemMarker) &&
    promptTemplate.includes(promptMarker)
  ) {
    // Find the indices of the markers
    const systemIndex = promptTemplate.indexOf(systemMarker);
    const promptIndex = promptTemplate.indexOf(promptMarker);
    // Extract the parts of the string
    const system_prompt = promptTemplate.substring(0, systemIndex);
    const user_prompt = promptTemplate.substring(
      systemIndex + systemMarker.length,
      promptIndex
    );
    const ai_prompt = promptTemplate.substring(
      promptIndex + promptMarker.length
    );
    // Return the split parts
    return { system_prompt, user_prompt, ai_prompt };
  } else if (promptTemplate.includes(promptMarker)) {
    // Extract the parts of the string for the case where only promptMarker is present
    const promptIndex = promptTemplate.indexOf(promptMarker);
    const user_prompt = promptTemplate.substring(0, promptIndex);
    const ai_prompt = promptTemplate.substring(
      promptIndex + promptMarker.length
    );
    const system_prompt = "";
    // Return the split parts
    return { system_prompt, user_prompt, ai_prompt };
  }
  // Return an error if none of the conditions are met
  return { error: "Cannot split prompt template" };
 }
 /**
 * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
 * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
 */
 function loadLLMModel(settings): Promise<Response> {
  log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`);
  return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(settings),
    retries: 3,
    retryDelay: 500,
  }).catch((err) => {
    log(`[NITRO]::Error: Load model failed with error ${err}`);
  });
 }
 /**
 * Validates the status of a model.
 * @returns {Promise<ModelOperationResponse>} A promise that resolves to an object.
 * If the model is loaded successfully, the object is empty.
 * If the model is not loaded successfully, the object contains an error message.
 */
 async function validateModelStatus(): Promise<ModelOperationResponse> {
  // Send a GET request to the validation URL.
  // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
  return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
    method: "GET",
    headers: {
      "Content-Type": "application/json",
    },
    retries: 5,
    retryDelay: 500,
  }).then(async (res: Response) => {
    // If the response is OK, check model_loaded status.
    if (res.ok) {
      const body = await res.json();
      // If the model is loaded, return an empty object.
      // Otherwise, return an object with an error message.
      if (body.model_loaded) {
        return { error: undefined };
      }
    }
    return { error: "Model loading failed" };
  });
 }
 /**
 * Terminates the Nitro subprocess.
 * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
 */
 async function killSubprocess(): Promise<void> {
  const controller = new AbortController();
  setTimeout(() => controller.abort(), 5000);
  log(`[NITRO]::Debug: Request to kill Nitro`);
  return fetch(NITRO_HTTP_KILL_URL, {
    method: "DELETE",
    signal: controller.signal,
  })
    .then(() => {
      subprocess?.kill();
      subprocess = undefined;
    })
    .catch(() => {})
    .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
    .then(() => log(`[NITRO]::Debug: Nitro process is terminated`));
 }
 /**
 * Spawns a Nitro subprocess.
 * @param nitroResourceProbe - The Nitro resource probe.
 * @returns A promise that resolves when the Nitro subprocess is started.
 */
 function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
  log(`[NITRO]::Debug: Spawning Nitro subprocess...`);
  return new Promise(async (resolve, reject) => {
    let binaryFolder = path.join(__dirname, "bin"); // Current directory by default
    let cudaVisibleDevices = "";
    let binaryName;
    if (process.platform === "win32") {
      let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
      if (nvidiaInfo["run_mode"] === "cpu") {
        binaryFolder = path.join(binaryFolder, "win-cpu");
      } else {
        if (nvidiaInfo["cuda"].version === "12") {
          binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
        } else {
          binaryFolder = path.join(binaryFolder, "win-cuda-11-7");
        }
        cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
      }
      binaryName = "nitro.exe";
    } else if (process.platform === "darwin") {
      if (process.arch === "arm64") {
        binaryFolder = path.join(binaryFolder, "mac-arm64");
      } else {
        binaryFolder = path.join(binaryFolder, "mac-x64");
      }
      binaryName = "nitro";
    } else {
      let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
      if (nvidiaInfo["run_mode"] === "cpu") {
        binaryFolder = path.join(binaryFolder, "linux-cpu");
      } else {
        if (nvidiaInfo["cuda"].version === "12") {
          binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
        } else {
          binaryFolder = path.join(binaryFolder, "linux-cuda-11-7");
        }
        cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
      }
      binaryName = "nitro";
    }
    const binaryPath = path.join(binaryFolder, binaryName);
    // Execute the binary
    subprocess = spawn(binaryPath, ["1", LOCAL_HOST, PORT.toString()], {
      cwd: binaryFolder,
      env: {
        ...process.env,
        CUDA_VISIBLE_DEVICES: cudaVisibleDevices,
      },
    });
    // Handle subprocess output
    subprocess.stdout.on("data", (data) => {
      log(`[NITRO]::Debug: ${data}`);
    });
    subprocess.stderr.on("data", (data) => {
      log(`[NITRO]::Error: ${data}`);
    });
    subprocess.on("close", (code) => {
      log(`[NITRO]::Debug: Nitro exited with code: ${code}`);
      subprocess = null;
      reject(`child process exited with code ${code}`);
    });
    tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
      resolve(nitroResourceProbe);
    });
  });
 }
 /**
 * Get the system resources information
 * TODO: Move to Core so that it can be reused
 */
 function getResourcesInfo(): Promise<ResourcesInfo> {
  return new Promise(async (resolve) => {
    const cpu = await osUtils.cpuCount();
    log(`[NITRO]::CPU informations - ${cpu}`);
    const response: ResourcesInfo = {
      numCpuPhysicalCore: cpu,
      memAvailable: 0,
    };
    resolve(response);
  });
 }
 /**
 * This will retrive GPU informations and persist settings.json
 * Will be called when the extension is loaded to turn on GPU acceleration if supported
 */
 async function updateNvidiaInfo() {
  if (process.platform !== "darwin") {
    await Promise.all([
      updateNvidiaDriverInfo(),
      updateCudaExistence(),
      updateGpuInfo(),
    ]);
  }
 }
 /**
 * Retrieve current nitro process
 */
 const getCurrentNitroProcessInfo = (): Promise<any> => {
  nitroProcessInfo = {
    isRunning: subprocess != null,
  };
  return nitroProcessInfo;
 };
 /**
 * Every module should have a dispose function
 * This will be called when the extension is unloaded and should clean up any resources
 * Also called when app is closed
 */
 function dispose() {
  // clean other registered resources here
  killSubprocess();
 }
 /**
 * Validate nvidia and cuda for linux and windows
 */
 async function updateNvidiaDriverInfo(): Promise<void> {
  exec(
    "nvidia-smi --query-gpu=driver_version --format=csv,noheader",
    (error, stdout) => {
      let data;
      try {
        data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
      } catch (error) {
        data = DEFALT_SETTINGS;
      }
      if (!error) {
        const firstLine = stdout.split("\n")[0].trim();
        data["nvidia_driver"].exist = true;
        data["nvidia_driver"].version = firstLine;
      } else {
        data["nvidia_driver"].exist = false;
      }
      writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
      Promise.resolve();
    }
  );
 }
 /**
 * Check if file exists in paths
 */
 function checkFileExistenceInPaths(file: string, paths: string[]): boolean {
  return paths.some((p) => existsSync(path.join(p, file)));
 }
 /**
 * Validate cuda for linux and windows
 */
 function updateCudaExistence() {
  let filesCuda12: string[];
  let filesCuda11: string[];
  let paths: string[];
  let cudaVersion: string = "";
  if (process.platform === "win32") {
    filesCuda12 = ["cublas64_12.dll", "cudart64_12.dll", "cublasLt64_12.dll"];
    filesCuda11 = ["cublas64_11.dll", "cudart64_11.dll", "cublasLt64_11.dll"];
    paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : [];
  } else {
    filesCuda12 = ["libcudart.so.12", "libcublas.so.12", "libcublasLt.so.12"];
    filesCuda11 = ["libcudart.so.11.0", "libcublas.so.11", "libcublasLt.so.11"];
    paths = process.env.LD_LIBRARY_PATH
      ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
      : [];
    paths.push("/usr/lib/x86_64-linux-gnu/");
  }
  let cudaExists = filesCuda12.every(
    (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
  );
  if (!cudaExists) {
    cudaExists = filesCuda11.every(
      (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
    );
    if (cudaExists) {
      cudaVersion = "11";
    }
  } else {
    cudaVersion = "12";
  }
  let data;
  try {
    data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
  } catch (error) {
    data = DEFALT_SETTINGS;
  }
  data["cuda"].exist = cudaExists;
  data["cuda"].version = cudaVersion;
  if (cudaExists) {
    data.run_mode = "gpu";
  }
  writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
 }
 /**
 * Get GPU information
 */
 async function updateGpuInfo(): Promise<void> {
  exec(
    "nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits",
    (error, stdout) => {
      let data;
      try {
        data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
      } catch (error) {
        data = DEFALT_SETTINGS;
      }
      if (!error) {
        // Get GPU info and gpu has higher memory first
        let highestVram = 0;
        let highestVramId = "0";
        let gpus = stdout
          .trim()
          .split("\n")
          .map((line) => {
            let [id, vram] = line.split(", ");
            vram = vram.replace(/\r/g, "");
            if (parseFloat(vram) > highestVram) {
              highestVram = parseFloat(vram);
              highestVramId = id;
            }
            return { id, vram };
          });
        data["gpus"] = gpus;
        data["gpu_highest_vram"] = highestVramId;
      } else {
        data["gpus"] = [];
      }
      writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
      Promise.resolve();
    }
  );
 }
 module.exports = {
  initModel,
  stopModel,
  killSubprocess,
  dispose,
  updateNvidiaInfo,
  getCurrentNitroProcessInfo,
 };
--- a/extensions/inference-nitro-extension/src/node/execute.ts
+++ b/extensions/inference-nitro-extension/src/node/execute.ts
@ -0,0 +1,65 @@
 import { readFileSync } from "fs";
 import * as path from "path";
 import { NVIDIA_INFO_FILE } from "./nvidia";
 export interface NitroExecutableOptions {
  executablePath: string;
  cudaVisibleDevices: string;
 }
 /**
 * Find which executable file to run based on the current platform.
 * @returns The name of the executable file to run.
 */
 export const executableNitroFile = (): NitroExecutableOptions => {
  let binaryFolder = path.join(__dirname, "..", "bin"); // Current directory by default
  let cudaVisibleDevices = "";
  let binaryName = "nitro";
  /**
   * The binary folder is different for each platform.
   */
  if (process.platform === "win32") {
    /**
     *  For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
     */
    let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
    if (nvidiaInfo["run_mode"] === "cpu") {
      binaryFolder = path.join(binaryFolder, "win-cpu");
    } else {
      if (nvidiaInfo["cuda"].version === "12") {
        binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
      } else {
        binaryFolder = path.join(binaryFolder, "win-cuda-11-7");
      }
      cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
    }
    binaryName = "nitro.exe";
  } else if (process.platform === "darwin") {
    /**
     *  For MacOS: mac-arm64 (Silicon), mac-x64 (InteL)
     */
    if (process.arch === "arm64") {
      binaryFolder = path.join(binaryFolder, "mac-arm64");
    } else {
      binaryFolder = path.join(binaryFolder, "mac-x64");
    }
  } else {
    /**
     *  For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
     */
    let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
    if (nvidiaInfo["run_mode"] === "cpu") {
      binaryFolder = path.join(binaryFolder, "linux-cpu");
    } else {
      if (nvidiaInfo["cuda"].version === "12") {
        binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
      } else {
        binaryFolder = path.join(binaryFolder, "linux-cuda-11-7");
      }
      cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
    }
  }
  return {
    executablePath: path.join(binaryFolder, binaryName),
    cudaVisibleDevices,
  };
 };
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@ -0,0 +1,379 @@
 import fs from "fs";
 import path from "path";
 import { ChildProcessWithoutNullStreams, spawn } from "child_process";
 import tcpPortUsed from "tcp-port-used";
 import fetchRT from "fetch-retry";
 import osUtils from "os-utils";
 import { log } from "@janhq/core/node";
 import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
 import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core";
 import { executableNitroFile } from "./execute";
 import { homedir } from "os";
 // Polyfill fetch with retry
 const fetchRetry = fetchRT(fetch);
 /**
 * The response object for model init operation.
 */
 interface ModelInitOptions {
  modelFullPath: string;
  model: Model;
 }
 /**
 * The response object of Prompt Template parsing.
 */
 interface PromptTemplate {
  system_prompt?: string;
  ai_prompt?: string;
  user_prompt?: string;
  error?: string;
 }
 /**
 * Model setting args for Nitro model load.
 */
 interface ModelSettingArgs extends ModelSettingParams {
  llama_model_path: string;
  cpu_threads: number;
 }
 // The PORT to use for the Nitro subprocess
 const PORT = 3928;
 // The HOST address to use for the Nitro subprocess
 const LOCAL_HOST = "127.0.0.1";
 // The URL for the Nitro subprocess
 const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`;
 // The URL for the Nitro subprocess to load a model
 const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`;
 // The URL for the Nitro subprocess to validate a model
 const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`;
 // The URL for the Nitro subprocess to kill itself
 const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`;
 // The supported model format
 // TODO: Should be an array to support more models
 const SUPPORTED_MODEL_FORMAT = ".gguf";
 // The subprocess instance for Nitro
 let subprocess: ChildProcessWithoutNullStreams | undefined = undefined;
 // The current model file url
 let currentModelFile: string = "";
 // The current model settings
 let currentSettings: ModelSettingArgs | undefined = undefined;
 /**
 * Stops a Nitro subprocess.
 * @param wrapper - The model wrapper.
 * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
 */
 function stopModel(): Promise<void> {
  return killSubprocess();
 }
 /**
 * Initializes a Nitro subprocess to load a machine learning model.
 * @param wrapper - The model wrapper.
 * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
 * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
 */
 async function runModel(
  wrapper: ModelInitOptions
 ): Promise<ModelOperationResponse | void> {
  if (wrapper.model.engine !== InferenceEngine.nitro) {
    // Not a nitro model
    return Promise.resolve();
  }
  currentModelFile = wrapper.modelFullPath;
  const janRoot = path.join(homedir(), "jan");
  if (!currentModelFile.includes(janRoot)) {
    currentModelFile = path.join(janRoot, currentModelFile);
  }
  const files: string[] = fs.readdirSync(currentModelFile);
  // Look for GGUF model file
  const ggufBinFile = files.find(
    (file) =>
      file === path.basename(currentModelFile) ||
      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
  );
  if (!ggufBinFile) return Promise.reject("No GGUF model file found");
  currentModelFile = path.join(currentModelFile, ggufBinFile);
  if (wrapper.model.engine !== InferenceEngine.nitro) {
    return Promise.reject("Not a nitro model");
  } else {
    const nitroResourceProbe = await getResourcesInfo();
    // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
    if (wrapper.model.settings.prompt_template) {
      const promptTemplate = wrapper.model.settings.prompt_template;
      const prompt = promptTemplateConverter(promptTemplate);
      if (prompt?.error) {
        return Promise.reject(prompt.error);
      }
      wrapper.model.settings.system_prompt = prompt.system_prompt;
      wrapper.model.settings.user_prompt = prompt.user_prompt;
      wrapper.model.settings.ai_prompt = prompt.ai_prompt;
    }
    currentSettings = {
      llama_model_path: currentModelFile,
      ...wrapper.model.settings,
      // This is critical and requires real system information
      cpu_threads: nitroResourceProbe.numCpuPhysicalCore,
    };
    return runNitroAndLoadModel();
  }
 }
 /**
 * 1. Spawn Nitro process
 * 2. Load model into Nitro subprocess
 * 3. Validate model status
 * @returns
 */
 async function runNitroAndLoadModel() {
  // Gather system information for CPU physical cores and memory
  return killSubprocess()
    .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
    .then(() => {
      /**
       * There is a problem with Windows process manager
       * Should wait for awhile to make sure the port is free and subprocess is killed
       * The tested threshold is 500ms
       **/
      if (process.platform === "win32") {
        return new Promise((resolve) => setTimeout(resolve, 500));
      } else {
        return Promise.resolve();
      }
    })
    .then(spawnNitroProcess)
    .then(() => loadLLMModel(currentSettings))
    .then(validateModelStatus)
    .catch((err) => {
      // TODO: Broadcast error so app could display proper error message
      log(`[NITRO]::Error: ${err}`);
      return { error: err };
    });
 }
 /**
 * Parse prompt template into agrs settings
 * @param promptTemplate Template as string
 * @returns
 */
 function promptTemplateConverter(promptTemplate: string): PromptTemplate {
  // Split the string using the markers
  const systemMarker = "{system_message}";
  const promptMarker = "{prompt}";
  if (
    promptTemplate.includes(systemMarker) &&
    promptTemplate.includes(promptMarker)
  ) {
    // Find the indices of the markers
    const systemIndex = promptTemplate.indexOf(systemMarker);
    const promptIndex = promptTemplate.indexOf(promptMarker);
    // Extract the parts of the string
    const system_prompt = promptTemplate.substring(0, systemIndex);
    const user_prompt = promptTemplate.substring(
      systemIndex + systemMarker.length,
      promptIndex
    );
    const ai_prompt = promptTemplate.substring(
      promptIndex + promptMarker.length
    );
    // Return the split parts
    return { system_prompt, user_prompt, ai_prompt };
  } else if (promptTemplate.includes(promptMarker)) {
    // Extract the parts of the string for the case where only promptMarker is present
    const promptIndex = promptTemplate.indexOf(promptMarker);
    const user_prompt = promptTemplate.substring(0, promptIndex);
    const ai_prompt = promptTemplate.substring(
      promptIndex + promptMarker.length
    );
    // Return the split parts
    return { user_prompt, ai_prompt };
  }
  // Return an error if none of the conditions are met
  return { error: "Cannot split prompt template" };
 }
 /**
 * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
 * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
 */
 function loadLLMModel(settings: any): Promise<Response> {
  log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`);
  return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(settings),
    retries: 3,
    retryDelay: 500,
  })
    .then((res) => {
      log(
        `[NITRO]::Debug: Load model success with response ${JSON.stringify(
          res
        )}`
      );
      return Promise.resolve(res);
    })
    .catch((err) => {
      log(`[NITRO]::Error: Load model failed with error ${err}`);
      return Promise.reject();
    });
 }
 /**
 * Validates the status of a model.
 * @returns {Promise<ModelOperationResponse>} A promise that resolves to an object.
 * If the model is loaded successfully, the object is empty.
 * If the model is not loaded successfully, the object contains an error message.
 */
 async function validateModelStatus(): Promise<void> {
  // Send a GET request to the validation URL.
  // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
  return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
    method: "GET",
    headers: {
      "Content-Type": "application/json",
    },
    retries: 5,
    retryDelay: 500,
  }).then(async (res: Response) => {
    log(
      `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
        res
      )}`
    );
    // If the response is OK, check model_loaded status.
    if (res.ok) {
      const body = await res.json();
      // If the model is loaded, return an empty object.
      // Otherwise, return an object with an error message.
      if (body.model_loaded) {
        return Promise.resolve();
      }
    }
    return Promise.reject("Validate model status failed");
  });
 }
 /**
 * Terminates the Nitro subprocess.
 * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
 */
 async function killSubprocess(): Promise<void> {
  const controller = new AbortController();
  setTimeout(() => controller.abort(), 5000);
  log(`[NITRO]::Debug: Request to kill Nitro`);
  return fetch(NITRO_HTTP_KILL_URL, {
    method: "DELETE",
    signal: controller.signal,
  })
    .then(() => {
      subprocess?.kill();
      subprocess = undefined;
    })
    .catch(() => {})
    .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
    .then(() => log(`[NITRO]::Debug: Nitro process is terminated`));
 }
 /**
 * Spawns a Nitro subprocess.
 * @returns A promise that resolves when the Nitro subprocess is started.
 */
 function spawnNitroProcess(): Promise<any> {
  log(`[NITRO]::Debug: Spawning Nitro subprocess...`);
  return new Promise<void>(async (resolve, reject) => {
    let binaryFolder = path.join(__dirname, "..", "bin"); // Current directory by default
    let executableOptions = executableNitroFile();
    const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
    // Execute the binary
    log(
      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
    );
    subprocess = spawn(
      executableOptions.executablePath,
      ["1", LOCAL_HOST, PORT.toString()],
      {
        cwd: binaryFolder,
        env: {
          ...process.env,
          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
        },
      }
    );
    // Handle subprocess output
    subprocess.stdout.on("data", (data: any) => {
      log(`[NITRO]::Debug: ${data}`);
    });
    subprocess.stderr.on("data", (data: any) => {
      log(`[NITRO]::Error: ${data}`);
    });
    subprocess.on("close", (code: any) => {
      log(`[NITRO]::Debug: Nitro exited with code: ${code}`);
      subprocess = undefined;
      reject(`child process exited with code ${code}`);
    });
    tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
      log(`[NITRO]::Debug: Nitro is ready`);
      resolve();
    });
  });
 }
 /**
 * Get the system resources information
 * TODO: Move to Core so that it can be reused
 */
 function getResourcesInfo(): Promise<ResourcesInfo> {
  return new Promise(async (resolve) => {
    const cpu = await osUtils.cpuCount();
    log(`[NITRO]::CPU informations - ${cpu}`);
    const response: ResourcesInfo = {
      numCpuPhysicalCore: cpu,
      memAvailable: 0,
    };
    resolve(response);
  });
 }
 /**
 * Every module should have a dispose function
 * This will be called when the extension is unloaded and should clean up any resources
 * Also called when app is closed
 */
 function dispose() {
  // clean other registered resources here
  killSubprocess();
 }
 export default {
  runModel,
  stopModel,
  killSubprocess,
  dispose,
  updateNvidiaInfo,
  getCurrentNitroProcessInfo: () => getNitroProcessInfo(subprocess),
 };
--- a/extensions/inference-nitro-extension/src/node/nvidia.ts
+++ b/extensions/inference-nitro-extension/src/node/nvidia.ts
@ -0,0 +1,201 @@
 import { writeFileSync, existsSync, readFileSync } from "fs";
 import { exec } from "child_process";
 import path from "path";
 import { homedir } from "os";
 /**
 * Default GPU settings
 **/
 const DEFALT_SETTINGS = {
  notify: true,
  run_mode: "cpu",
  nvidia_driver: {
    exist: false,
    version: "",
  },
  cuda: {
    exist: false,
    version: "",
  },
  gpus: [],
  gpu_highest_vram: "",
 };
 /**
 * Path to the settings file
 **/
 export const NVIDIA_INFO_FILE = path.join(
  homedir(),
  "jan",
  "settings",
  "settings.json"
 );
 /**
 * Current nitro process
 */
 let nitroProcessInfo: NitroProcessInfo | undefined = undefined;
 /**
 * Nitro process info
 */
 export interface NitroProcessInfo {
  isRunning: boolean
 }
 /**
 * This will retrive GPU informations and persist settings.json
 * Will be called when the extension is loaded to turn on GPU acceleration if supported
 */
 export async function updateNvidiaInfo() {
  if (process.platform !== "darwin") {
    await Promise.all([
      updateNvidiaDriverInfo(),
      updateCudaExistence(),
      updateGpuInfo(),
    ]);
  }
 }
 /**
 * Retrieve current nitro process
 */
 export const getNitroProcessInfo = (subprocess: any): NitroProcessInfo => {
  nitroProcessInfo = {
    isRunning: subprocess != null,
  };
  return nitroProcessInfo;
 };
 /**
 * Validate nvidia and cuda for linux and windows
 */
 export async function updateNvidiaDriverInfo(): Promise<void> {
  exec(
    "nvidia-smi --query-gpu=driver_version --format=csv,noheader",
    (error, stdout) => {
      let data;
      try {
        data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
      } catch (error) {
        data = DEFALT_SETTINGS;
      }
      if (!error) {
        const firstLine = stdout.split("\n")[0].trim();
        data["nvidia_driver"].exist = true;
        data["nvidia_driver"].version = firstLine;
      } else {
        data["nvidia_driver"].exist = false;
      }
      writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
      Promise.resolve();
    }
  );
 }
 /**
 * Check if file exists in paths
 */
 export function checkFileExistenceInPaths(
  file: string,
  paths: string[]
 ): boolean {
  return paths.some((p) => existsSync(path.join(p, file)));
 }
 /**
 * Validate cuda for linux and windows
 */
 export function updateCudaExistence() {
  let filesCuda12: string[];
  let filesCuda11: string[];
  let paths: string[];
  let cudaVersion: string = "";
  if (process.platform === "win32") {
    filesCuda12 = ["cublas64_12.dll", "cudart64_12.dll", "cublasLt64_12.dll"];
    filesCuda11 = ["cublas64_11.dll", "cudart64_11.dll", "cublasLt64_11.dll"];
    paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : [];
  } else {
    filesCuda12 = ["libcudart.so.12", "libcublas.so.12", "libcublasLt.so.12"];
    filesCuda11 = ["libcudart.so.11.0", "libcublas.so.11", "libcublasLt.so.11"];
    paths = process.env.LD_LIBRARY_PATH
      ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
      : [];
    paths.push("/usr/lib/x86_64-linux-gnu/");
  }
  let cudaExists = filesCuda12.every(
    (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
  );
  if (!cudaExists) {
    cudaExists = filesCuda11.every(
      (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
    );
    if (cudaExists) {
      cudaVersion = "11";
    }
  } else {
    cudaVersion = "12";
  }
  let data;
  try {
    data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
  } catch (error) {
    data = DEFALT_SETTINGS;
  }
  data["cuda"].exist = cudaExists;
  data["cuda"].version = cudaVersion;
  if (cudaExists) {
    data.run_mode = "gpu";
  }
  writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
 }
 /**
 * Get GPU information
 */
 export async function updateGpuInfo(): Promise<void> {
  exec(
    "nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits",
    (error, stdout) => {
      let data;
      try {
        data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
      } catch (error) {
        data = DEFALT_SETTINGS;
      }
      if (!error) {
        // Get GPU info and gpu has higher memory first
        let highestVram = 0;
        let highestVramId = "0";
        let gpus = stdout
          .trim()
          .split("\n")
          .map((line) => {
            let [id, vram] = line.split(", ");
            vram = vram.replace(/\r/g, "");
            if (parseFloat(vram) > highestVram) {
              highestVram = parseFloat(vram);
              highestVramId = id;
            }
            return { id, vram };
          });
        data["gpus"] = gpus;
        data["gpu_highest_vram"] = highestVramId;
      } else {
        data["gpus"] = [];
      }
      writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
      Promise.resolve();
    }
  );
 }
--- a/extensions/inference-nitro-extension/tsconfig.json
+++ b/extensions/inference-nitro-extension/tsconfig.json
@ -1,15 +1,19 @@
 {
  "compilerOptions": {
    "target": "es2016",
    "module": "ES6",
    "moduleResolution": "node",
-
+    "target": "es5",
-    "outDir": "./dist",
+    "module": "ES2020",
-    "esModuleInterop": true,
+    "lib": ["es2015", "es2016", "es2017", "dom"],
-    "forceConsistentCasingInFileNames": true,
+    "strict": true,
-    "strict": false,
+    "sourceMap": true,
-    "skipLibCheck": true,
+    "declaration": true,
-    "rootDir": "./src"
+    "allowSyntheticDefaultImports": true,
    "experimentalDecorators": true,
    "emitDecoratorMetadata": true,
    "declarationDir": "dist/types",
    "outDir": "dist",
    "importHelpers": true,
    "typeRoots": ["node_modules/@types"]
  },
-  "include": ["./src"]
+  "include": ["src"]
 }
--- a/extensions/inference-nitro-extension/webpack.config.js
+++ b/extensions/inference-nitro-extension/webpack.config.js
@ -1,43 +0,0 @@
 const path = require("path");
 const webpack = require("webpack");
 const packageJson = require("./package.json");
 module.exports = {
  experiments: { outputModule: true },
  entry: "./src/index.ts", // Adjust the entry point to match your project's main file
  mode: "production",
  module: {
    rules: [
      {
        test: /\.tsx?$/,
        use: "ts-loader",
        exclude: /node_modules/,
      },
    ],
  },
  plugins: [
    new webpack.DefinePlugin({
      MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
      INFERENCE_URL: JSON.stringify(
        process.env.INFERENCE_URL ||
          "http://127.0.0.1:3928/inferences/llamacpp/chat_completion"
      ),
      TROUBLESHOOTING_URL: JSON.stringify("https://jan.ai/guides/troubleshooting")
    }),
  ],
  output: {
    filename: "index.js", // Adjust the output file name as needed
    path: path.resolve(__dirname, "dist"),
    library: { type: "module" }, // Specify ESM output format
  },
  resolve: {
    extensions: [".ts", ".js"],
    fallback: {
      path: require.resolve("path-browserify"),
    },
  },
  optimization: {
    minimize: false,
  },
  // Add loaders and other configuration as needed for your project
 };