NamH 52d56a8ae1
refactor: move file to jan root (#598)
* feat: move necessary files to jan root

Signed-off-by: James <james@jan.ai>

* chore: check model dir

---------

Signed-off-by: James <james@jan.ai>
Co-authored-by: James <james@jan.ai>
Co-authored-by: Louis <louis@jan.ai>
2023-11-16 12:09:09 +07:00

256 lines
7.9 KiB
TypeScript

const fs = require("fs");
const kill = require("kill-port");
const path = require("path");
const { spawn } = require("child_process");
const tcpPortUsed = require("tcp-port-used");
const fetchRetry = require("fetch-retry")(global.fetch);
// The PORT to use for the Nitro subprocess
const PORT = 3928;
const LOCAL_HOST = "127.0.0.1";
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`;
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`;
const NITRO_HTTP_UNLOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/unloadModel`;
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`;
// The subprocess instance for Nitro
let subprocess = null;
let currentModelFile = null;
/**
* The response from the initModel function.
* @property error - An error message if the model fails to load.
*/
interface InitModelResponse {
error?: any;
}
/**
* Initializes a Nitro subprocess to load a machine learning model.
* @param modelFile - The name of the machine learning model file.
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
* TODO: Should it be startModel instead?
*/
function initModel(modelFile: string): Promise<InitModelResponse> {
// 1. Check if the model file exists
currentModelFile = modelFile;
return (
// 1. Check if the port is used, if used, attempt to unload model / kill nitro process
validateModelVersion()
.then(checkAndUnloadNitro)
// 2. Spawn the Nitro subprocess
.then(spawnNitroProcess)
// 3. Wait until the port is used (Nitro http server is up)
.then(() => tcpPortUsed.waitUntilUsed(PORT, 300, 30000))
// 4. Load the model into the Nitro subprocess (HTTP POST request)
.then(loadLLMModel)
// 5. Check if the model is loaded successfully
.then(validateModelStatus)
.catch((err) => {
return { error: err };
})
);
}
/**
* Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
*/
function loadLLMModel(): Promise<Response> {
const config = {
llama_model_path: currentModelFile,
ctx_len: 2048,
ngl: 100,
embedding: false, // Always enable embedding mode on
};
// Load model config
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(config),
retries: 3,
retryDelay: 500,
}).catch((err) => {
console.error(err);
// Fetch error, Nitro server might not started properly
throw new Error("Model loading failed.");
});
}
/**
* Validates the status of a model.
* @returns {Promise<InitModelResponse>} A promise that resolves to an object.
* If the model is loaded successfully, the object is empty.
* If the model is not loaded successfully, the object contains an error message.
*/
async function validateModelStatus(): Promise<InitModelResponse> {
// Send a GET request to the validation URL.
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
method: "GET",
headers: {
"Content-Type": "application/json",
},
retries: 5,
retryDelay: 500,
})
.then(async (res: Response) => {
// If the response is OK, check model_loaded status.
if (res.ok) {
const body = await res.json();
// If the model is loaded, return an empty object.
// Otherwise, return an object with an error message.
if (body.model_loaded) {
return { error: undefined };
}
}
return { error: "Model is not loaded successfully" };
})
.catch((err) => {
return { error: `Model is not loaded successfully. ${err.message}` };
});
}
/**
* Terminates the Nitro subprocess.
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
*/
function killSubprocess(): Promise<void> {
if (subprocess) {
subprocess.kill();
subprocess = null;
console.debug("Subprocess terminated.");
} else {
return kill(PORT, "tcp").then(console.log).catch(console.log);
}
}
/**
* Check port is used or not, if used, attempt to unload model
* If unload failed, kill the port
*/
function checkAndUnloadNitro() {
return tcpPortUsed.check(PORT, LOCAL_HOST).then((inUse) => {
// If inUse - try unload or kill process, otherwise do nothing
if (inUse) {
// Attempt to unload model
return fetch(NITRO_HTTP_UNLOAD_MODEL_URL, {
method: "GET",
headers: {
"Content-Type": "application/json",
},
}).catch((err) => {
console.log(err);
// Fallback to kill the port
return killSubprocess();
});
}
});
}
/**
* Look for the Nitro binary and execute it
* Using child-process to spawn the process
* Should run exactly platform specified Nitro binary version
*/
function spawnNitroProcess() {
let binaryFolder = path.join(__dirname, "nitro"); // Current directory by default
let binaryName;
if (process.platform === "win32") {
// Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries
binaryName = "win-start.bat";
} else if (process.platform === "darwin") {
// Mac OS platform
if (process.arch === "arm64") {
binaryFolder = path.join(binaryFolder, "mac-arm64");
} else {
binaryFolder = path.join(binaryFolder, "mac-x64");
}
binaryName = "nitro";
} else {
// Linux
// Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries
binaryName = "linux-start.sh"; // For other platforms
}
const binaryPath = path.join(binaryFolder, binaryName);
// Execute the binary
subprocess = spawn(binaryPath, [1, "0.0.0.0", PORT], {
cwd: binaryFolder,
});
// Handle subprocess output
subprocess.stdout.on("data", (data) => {
console.log(`stdout: ${data}`);
});
subprocess.stderr.on("data", (data) => {
console.error(`stderr: ${data}`);
});
subprocess.on("close", (code) => {
console.log(`child process exited with code ${code}`);
subprocess = null;
});
}
/**
* Validate the model version, if it is GGUFv1, reject the promise
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
*/
function validateModelVersion(): Promise<void> {
// Read the file
return new Promise((resolve, reject) => {
fs.open(currentModelFile, "r", (err, fd) => {
if (err) {
console.error(err.message);
return;
}
// Buffer to store the byte
const buffer = Buffer.alloc(1);
// Model version will be the 5th byte of the file
fs.read(fd, buffer, 0, 1, 4, (err, bytesRead, buffer) => {
if (err) {
console.error(err.message);
} else {
// Interpret the byte as ASCII
if (buffer[0] === 0x01) {
// This is GGUFv1, which is deprecated
reject("GGUFv1 model is deprecated, please try another model.");
}
}
// Close the file descriptor
fs.close(fd, (err) => {
if (err) console.error(err.message);
});
resolve();
});
});
});
}
/**
* Cleans up any registered resources.
* Its module specific function, should be called when application is closed
*/
function dispose() {
// clean other registered resources here
killSubprocess();
}
module.exports = {
initModel,
killSubprocess,
dispose,
};