const fs = require("fs"); const fsPromises = fs.promises; const path = require("path"); const { exec, spawn } = require("child_process"); const tcpPortUsed = require("tcp-port-used"); const fetchRetry = require("fetch-retry")(global.fetch); const osUtils = require("os-utils"); const { readFileSync, writeFileSync, existsSync } = require("fs"); // The PORT to use for the Nitro subprocess const PORT = 3928; const LOCAL_HOST = "127.0.0.1"; const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`; const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`; const NITRO_HTTP_UNLOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/unloadModel`; const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`; const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`; const SUPPORTED_MODEL_FORMAT = ".gguf"; const NVIDIA_INFO_FILE = path.join( require("os").homedir(), "jan", "settings", "settings.json" ); const DEFALT_SETTINGS = { "notify": true, "run_mode": "cpu", "nvidia_driver": { "exist": false, "version": "" }, "cuda": { "exist": false, "version": "" }, "gpus": [], "gpu_highest_vram": "" } // The subprocess instance for Nitro let subprocess = undefined; let currentModelFile: string = undefined; let currentSettings = undefined; /** * Stops a Nitro subprocess. * @param wrapper - The model wrapper. * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. */ function stopModel(): Promise { return killSubprocess(); } /** * Validate nvidia and cuda for linux and windows */ async function updateNvidiaDriverInfo(): Promise { exec( "nvidia-smi --query-gpu=driver_version --format=csv,noheader", (error, stdout) => { let data; try { data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf8")); } catch (error) { data = DEFALT_SETTINGS; } if (!error) { const firstLine = stdout.split("\n")[0].trim(); data["nvidia_driver"].exist = true; data["nvidia_driver"].version = firstLine; } else { data["nvidia_driver"].exist = false; } writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2)); Promise.resolve(); } ); } function checkFileExistenceInPaths(file: string, paths: string[]): boolean { return paths.some((p) => existsSync(path.join(p, file))); } function updateCudaExistence() { let files: string[]; let paths: string[]; if (process.platform === "win32") { files = ["cublas64_12.dll", "cudart64_12.dll", "cublasLt64_12.dll"]; paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []; const nitro_cuda_path = path.join(__dirname, "bin", "win-cuda"); paths.push(nitro_cuda_path); } else { files = ["libcudart.so.12", "libcublas.so.12", "libcublasLt.so.12"]; paths = process.env.LD_LIBRARY_PATH ? process.env.LD_LIBRARY_PATH.split(path.delimiter) : []; const nitro_cuda_path = path.join(__dirname, "bin", "linux-cuda"); paths.push(nitro_cuda_path); paths.push("/usr/lib/x86_64-linux-gnu/"); } let cudaExists = files.every( (file) => existsSync(file) || checkFileExistenceInPaths(file, paths) ); let data; try { data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf8")); } catch (error) { data = DEFALT_SETTINGS; } data["cuda"].exist = cudaExists; if (cudaExists) { data.run_mode = "gpu"; } writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2)); } async function updateGpuInfo(): Promise { exec( "nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits", (error, stdout) => { let data; try { data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf8")); } catch (error) { data = DEFALT_SETTINGS; } if (!error) { // Get GPU info and gpu has higher memory first let highestVram = 0; let highestVramId = "0"; let gpus = stdout .trim() .split("\n") .map((line) => { let [id, vram] = line.split(", "); vram = vram.replace(/\r/g, ""); if (parseFloat(vram) > highestVram) { highestVram = parseFloat(vram); highestVramId = id; } return { id, vram }; }); data["gpus"] = gpus; data["gpu_highest_vram"] = highestVramId; } else { data["gpus"] = []; } writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2)); Promise.resolve(); } ); } async function updateNvidiaInfo() { if (process.platform !== "darwin") { await Promise.all([ updateNvidiaDriverInfo(), updateCudaExistence(), updateGpuInfo(), ]); } } /** * Initializes a Nitro subprocess to load a machine learning model. * @param wrapper - The model wrapper. * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package * TODO: Should it be startModel instead? */ async function initModel(wrapper: any): Promise { currentModelFile = wrapper.modelFullPath; const janRoot = path.join(require("os").homedir(), "jan"); if (!currentModelFile.includes(janRoot)) { currentModelFile = path.join(janRoot, currentModelFile); } const files: string[] = fs.readdirSync(currentModelFile); // Look for GGUF model file const ggufBinFile = files.find( (file) => file === path.basename(currentModelFile) || file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) ); currentModelFile = path.join(currentModelFile, ggufBinFile); if (wrapper.model.engine !== "nitro") { return Promise.resolve({ error: "Not a nitro model" }); } else { const nitroResourceProbe = await getResourcesInfo(); // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt if (wrapper.model.settings.prompt_template) { const promptTemplate = wrapper.model.settings.prompt_template; const prompt = promptTemplateConverter(promptTemplate); if (prompt.error) { return Promise.resolve({ error: prompt.error }); } wrapper.model.settings.system_prompt = prompt.system_prompt; wrapper.model.settings.user_prompt = prompt.user_prompt; wrapper.model.settings.ai_prompt = prompt.ai_prompt; } currentSettings = { llama_model_path: currentModelFile, ...wrapper.model.settings, // This is critical and requires real system information cpu_threads: nitroResourceProbe.numCpuPhysicalCore, }; return loadModel(nitroResourceProbe); } } async function loadModel(nitroResourceProbe: any | undefined) { // Gather system information for CPU physical cores and memory if (!nitroResourceProbe) nitroResourceProbe = await getResourcesInfo(); return ( killSubprocess() .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000)) // wait for 500ms to make sure the port is free for windows platform .then(() => { if (process.platform === "win32") { return sleep(500); } else { return sleep(0); } }) .then(() => spawnNitroProcess(nitroResourceProbe)) .then(() => loadLLMModel(currentSettings)) .then(validateModelStatus) .catch((err) => { console.error("error: ", err); // TODO: Broadcast error so app could display proper error message return { error: err, currentModelFile }; }) ); } // Add function sleep function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } function promptTemplateConverter(promptTemplate) { // Split the string using the markers const systemMarker = "{system_message}"; const promptMarker = "{prompt}"; if ( promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker) ) { // Find the indices of the markers const systemIndex = promptTemplate.indexOf(systemMarker); const promptIndex = promptTemplate.indexOf(promptMarker); // Extract the parts of the string const system_prompt = promptTemplate.substring(0, systemIndex); const user_prompt = promptTemplate.substring( systemIndex + systemMarker.length, promptIndex ); const ai_prompt = promptTemplate.substring( promptIndex + promptMarker.length ); // Return the split parts return { system_prompt, user_prompt, ai_prompt }; } else if (promptTemplate.includes(promptMarker)) { // Extract the parts of the string for the case where only promptMarker is present const promptIndex = promptTemplate.indexOf(promptMarker); const user_prompt = promptTemplate.substring(0, promptIndex); const ai_prompt = promptTemplate.substring( promptIndex + promptMarker.length ); const system_prompt = ""; // Return the split parts return { system_prompt, user_prompt, ai_prompt }; } // Return an error if none of the conditions are met return { error: "Cannot split prompt template" }; } /** * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request. * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. */ function loadLLMModel(settings): Promise { return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(settings), retries: 3, retryDelay: 500, }); } /** * Validates the status of a model. * @returns {Promise} A promise that resolves to an object. * If the model is loaded successfully, the object is empty. * If the model is not loaded successfully, the object contains an error message. */ async function validateModelStatus(): Promise { // Send a GET request to the validation URL. // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries. return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { method: "GET", headers: { "Content-Type": "application/json", }, retries: 5, retryDelay: 500, }).then(async (res: Response) => { // If the response is OK, check model_loaded status. if (res.ok) { const body = await res.json(); // If the model is loaded, return an empty object. // Otherwise, return an object with an error message. if (body.model_loaded) { return { error: undefined }; } } return { error: "Model loading failed" }; }); } /** * Terminates the Nitro subprocess. * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. */ async function killSubprocess(): Promise { const controller = new AbortController(); setTimeout(() => controller.abort(), 5000); console.debug("Start requesting to kill Nitro..."); return fetch(NITRO_HTTP_KILL_URL, { method: "DELETE", signal: controller.signal, }) .then(() => { subprocess?.kill(); subprocess = undefined; }) .catch(() => {}) .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000)) .then(() => console.debug("Nitro is killed")); } /** * Look for the Nitro binary and execute it * Using child-process to spawn the process * Should run exactly platform specified Nitro binary version */ /** * Spawns a Nitro subprocess. * @param nitroResourceProbe - The Nitro resource probe. * @returns A promise that resolves when the Nitro subprocess is started. */ function spawnNitroProcess(nitroResourceProbe: any): Promise { console.debug("Starting Nitro subprocess..."); return new Promise(async (resolve, reject) => { let binaryFolder = path.join(__dirname, "bin"); // Current directory by default let cudaVisibleDevices = ""; let binaryName; if (process.platform === "win32") { let nvida_info = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf8")); if (nvida_info["run_mode"] === "cpu") { binaryFolder = path.join(binaryFolder, "win-cpu"); } else { binaryFolder = path.join(binaryFolder, "win-cuda"); cudaVisibleDevices = nvida_info["gpu_highest_vram"]; } binaryName = "nitro.exe"; } else if (process.platform === "darwin") { if (process.arch === "arm64") { binaryFolder = path.join(binaryFolder, "mac-arm64"); } else { binaryFolder = path.join(binaryFolder, "mac-x64"); } binaryName = "nitro"; } else { let nvida_info = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf8")); if (nvida_info["run_mode"] === "cpu") { binaryFolder = path.join(binaryFolder, "linux-cpu"); } else { binaryFolder = path.join(binaryFolder, "linux-cuda"); cudaVisibleDevices = nvida_info["gpu_highest_vram"]; } binaryName = "nitro"; } const binaryPath = path.join(binaryFolder, binaryName); // Execute the binary subprocess = spawn(binaryPath, [1, LOCAL_HOST, PORT], { cwd: binaryFolder, env: { ...process.env, CUDA_VISIBLE_DEVICES: cudaVisibleDevices, }, }); // Handle subprocess output subprocess.stdout.on("data", (data) => { console.debug(`stdout: ${data}`); }); subprocess.stderr.on("data", (data) => { console.error("subprocess error:" + data.toString()); console.error(`stderr: ${data}`); }); subprocess.on("close", (code) => { console.debug(`child process exited with code ${code}`); subprocess = null; reject(`child process exited with code ${code}`); }); tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => { resolve(nitroResourceProbe); }); }); } /** * Get the system resources information * TODO: Move to Core so that it can be reused */ function getResourcesInfo(): Promise { return new Promise(async (resolve) => { const cpu = await osUtils.cpuCount(); console.log("cpu: ", cpu); const response: ResourcesInfo = { numCpuPhysicalCore: cpu, memAvailable: 0, }; resolve(response); }); } function dispose() { // clean other registered resources here killSubprocess(); } module.exports = { initModel, stopModel, killSubprocess, dispose, updateNvidiaInfo, };