fix: Update inference nitro with n_threads equals to physcial core num

This commit is contained in:
hiro 2023-12-11 21:22:56 +07:00
parent cafdaaaccd
commit f528e9ea77

View File

@ -39,15 +39,21 @@ function stopModel(): Promise<ModelOperationResponse> {
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
* TODO: Should it be startModel instead? * TODO: Should it be startModel instead?
*/ */
function initModel(wrapper: any): Promise<ModelOperationResponse> { async function initModel(wrapper: any): Promise<ModelOperationResponse> {
currentModelFile = wrapper.modelFullPath; currentModelFile = wrapper.modelFullPath;
if (wrapper.model.engine !== "nitro") { if (wrapper.model.engine !== "nitro") {
return Promise.resolve({ error: "Not a nitro model" }); return Promise.resolve({ error: "Not a nitro model" });
} else { } else {
log.info("Started to load model " + wrapper.model.modelFullPath); // Gather system information for CPU physical cores and memory
const nitroResourceProbe = await getResourcesInfo();
console.log(
"Nitro with physical core: " + nitroResourceProbe.numCpuPhysicalCore
);
const settings = { const settings = {
llama_model_path: currentModelFile, llama_model_path: currentModelFile,
...wrapper.model.settings, ...wrapper.model.settings,
// This is critical and requires real system information
n_threads: nitroResourceProbe.numCpuPhysicalCore,
}; };
log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`); log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`);
return ( return (
@ -55,7 +61,7 @@ function initModel(wrapper: any): Promise<ModelOperationResponse> {
validateModelVersion() validateModelVersion()
.then(checkAndUnloadNitro) .then(checkAndUnloadNitro)
// 2. Spawn the Nitro subprocess // 2. Spawn the Nitro subprocess
.then(spawnNitroProcess) .then(await spawnNitroProcess(nitroResourceProbe))
// 4. Load the model into the Nitro subprocess (HTTP POST request) // 4. Load the model into the Nitro subprocess (HTTP POST request)
.then(() => loadLLMModel(settings)) .then(() => loadLLMModel(settings))
// 5. Check if the model is loaded successfully // 5. Check if the model is loaded successfully
@ -167,7 +173,7 @@ async function checkAndUnloadNitro() {
* Using child-process to spawn the process * Using child-process to spawn the process
* Should run exactly platform specified Nitro binary version * Should run exactly platform specified Nitro binary version
*/ */
async function spawnNitroProcess(): Promise<void> { async function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
return new Promise(async (resolve, reject) => { return new Promise(async (resolve, reject) => {
let binaryFolder = path.join(__dirname, "bin"); // Current directory by default let binaryFolder = path.join(__dirname, "bin"); // Current directory by default
let binaryName; let binaryName;
@ -191,12 +197,6 @@ async function spawnNitroProcess(): Promise<void> {
const binaryPath = path.join(binaryFolder, binaryName); const binaryPath = path.join(binaryFolder, binaryName);
// Gather system information for CPU physical cores and memory
const nitroResourceProbe = await getResourcesInfo();
console.log(
"Nitro with physical core: " + nitroResourceProbe.numCpuPhysicalCore
);
// Execute the binary // Execute the binary
subprocess = spawn( subprocess = spawn(
binaryPath, binaryPath,
@ -222,7 +222,7 @@ async function spawnNitroProcess(): Promise<void> {
reject(`Nitro process exited. ${code ?? ""}`); reject(`Nitro process exited. ${code ?? ""}`);
}); });
tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => { tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
resolve(); resolve(nitroResourceProbe);
}); });
}); });
} }