import fs from 'fs' import path from 'path' import { ChildProcessWithoutNullStreams, spawn } from 'child_process' import tcpPortUsed from 'tcp-port-used' import fetchRT from 'fetch-retry' import { log, getSystemResourceInfo, Model, InferenceEngine, ModelSettingParams, PromptTemplate, SystemInformation, getJanDataFolderPath, } from '@janhq/core/node' import { executableNitroFile } from './execute' import terminate from 'terminate' import decompress from 'decompress' // Polyfill fetch with retry const fetchRetry = fetchRT(fetch) /** * The response object for model init operation. */ interface ModelInitOptions { modelFolder: string model: Model } // The PORT to use for the Nitro subprocess const PORT = 3928 // The HOST address to use for the Nitro subprocess const LOCAL_HOST = '127.0.0.1' // The URL for the Nitro subprocess const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}` // The URL for the Nitro subprocess to load a model const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel` // The URL for the Nitro subprocess to validate a model const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus` // The URL for the Nitro subprocess to kill itself const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` const NITRO_PORT_FREE_CHECK_INTERVAL = 100 // The supported model format // TODO: Should be an array to support more models const SUPPORTED_MODEL_FORMAT = '.gguf' // The subprocess instance for Nitro let subprocess: ChildProcessWithoutNullStreams | undefined = undefined // The current model settings let currentSettings: (ModelSettingParams & { model?: string }) | undefined = undefined /** * Stops a Nitro subprocess. * @param wrapper - The model wrapper. * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. */ function unloadModel(): Promise { return killSubprocess() } /** * Initializes a Nitro subprocess to load a machine learning model. * @param wrapper - The model wrapper. * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package */ async function loadModel( params: ModelInitOptions, systemInfo?: SystemInformation ): Promise { if (params.model.engine !== InferenceEngine.nitro) { // Not a nitro model return Promise.resolve() } if (params.model.engine !== InferenceEngine.nitro) { return Promise.reject('Not a cortex model') } else { const nitroResourceProbe = await getSystemResourceInfo() // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt if (params.model.settings.prompt_template) { const promptTemplate = params.model.settings.prompt_template const prompt = promptTemplateConverter(promptTemplate) if (prompt?.error) { return Promise.reject(prompt.error) } params.model.settings.system_prompt = prompt.system_prompt params.model.settings.user_prompt = prompt.user_prompt params.model.settings.ai_prompt = prompt.ai_prompt } // modelFolder is the absolute path to the running model folder // e.g. ~/jan/models/llama-2 let modelFolder = params.modelFolder let llama_model_path = params.model.settings.llama_model_path // Absolute model path support if ( params.model?.sources.length && params.model.sources.every((e) => fs.existsSync(e.url)) ) { llama_model_path = params.model.sources.length === 1 ? params.model.sources[0].url : params.model.sources.find((e) => e.url.includes(llama_model_path ?? params.model.id) )?.url } if (!llama_model_path || !path.isAbsolute(llama_model_path)) { // Look for GGUF model file const modelFiles: string[] = fs.readdirSync(modelFolder) const ggufBinFile = modelFiles.find( (file) => // 1. Prioritize llama_model_path (predefined) (llama_model_path && file === llama_model_path) || // 2. Prioritize GGUF File (manual import) file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) || // 3. Fallback Model ID (for backward compatibility) file === params.model.id ) if (ggufBinFile) llama_model_path = path.join(modelFolder, ggufBinFile) } // Look for absolute source path for single model if (!llama_model_path) return Promise.reject('No GGUF model file found') currentSettings = { cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), // model.settings can override the default settings ...params.model.settings, llama_model_path, model: params.model.id, // This is critical and requires real CPU physical core count (or performance core) ...(params.model.settings.mmproj && { mmproj: path.isAbsolute(params.model.settings.mmproj) ? params.model.settings.mmproj : path.join(modelFolder, params.model.settings.mmproj), }), } return runNitroAndLoadModel(params.model.id, systemInfo) } } /** * 1. Spawn Nitro process * 2. Load model into Nitro subprocess * 3. Validate model status * @returns */ async function runNitroAndLoadModel( modelId: string, systemInfo?: SystemInformation ) { // Gather system information for CPU physical cores and memory return killSubprocess() .then(() => tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) ) .then(() => spawnNitroProcess(systemInfo)) .then(() => loadLLMModel(currentSettings)) .then(() => validateModelStatus(modelId)) .catch((err) => { // TODO: Broadcast error so app could display proper error message log(`[CORTEX]::Error: ${err}`) return { error: err } }) } /** * Parse prompt template into agrs settings * @param promptTemplate Template as string * @returns */ function promptTemplateConverter(promptTemplate: string): PromptTemplate { // Split the string using the markers const systemMarker = '{system_message}' const promptMarker = '{prompt}' if ( promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker) ) { // Find the indices of the markers const systemIndex = promptTemplate.indexOf(systemMarker) const promptIndex = promptTemplate.indexOf(promptMarker) // Extract the parts of the string const system_prompt = promptTemplate.substring(0, systemIndex) const user_prompt = promptTemplate.substring( systemIndex + systemMarker.length, promptIndex ) const ai_prompt = promptTemplate.substring( promptIndex + promptMarker.length ) // Return the split parts return { system_prompt, user_prompt, ai_prompt } } else if (promptTemplate.includes(promptMarker)) { // Extract the parts of the string for the case where only promptMarker is present const promptIndex = promptTemplate.indexOf(promptMarker) const user_prompt = promptTemplate.substring(0, promptIndex) const ai_prompt = promptTemplate.substring( promptIndex + promptMarker.length ) // Return the split parts return { user_prompt, ai_prompt } } // Return an error if none of the conditions are met return { error: 'Cannot split prompt template' } } /** * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request. * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. */ function loadLLMModel(settings: any): Promise { if (!settings?.ngl) { settings.ngl = 100 } log(`[CORTEX]::Debug: Loading model with params ${JSON.stringify(settings)}`) return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify(settings), retries: 3, retryDelay: 300, }) .then((res) => { log( `[CORTEX]::Debug: Load model success with response ${JSON.stringify( res )}` ) return Promise.resolve(res) }) .catch((err) => { log(`[CORTEX]::Error: Load model failed with error ${err}`) return Promise.reject(err) }) } /** * Validates the status of a model. * @returns {Promise} A promise that resolves to an object. * If the model is loaded successfully, the object is empty. * If the model is not loaded successfully, the object contains an error message. */ async function validateModelStatus(modelId: string): Promise { // Send a GET request to the validation URL. // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries. log(`[CORTEX]::Debug: Validating model ${modelId}`) return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { method: 'POST', body: JSON.stringify({ model: modelId, // TODO: force to use cortex llamacpp by default engine: 'cortex.llamacpp', }), headers: { 'Content-Type': 'application/json', }, retries: 5, retryDelay: 300, }).then(async (res: Response) => { log( `[CORTEX]::Debug: Validate model state with response ${JSON.stringify( res.status )}` ) // If the response is OK, check model_loaded status. if (res.ok) { const body = await res.json() // If the model is loaded, return an empty object. // Otherwise, return an object with an error message. if (body.model_loaded) { log( `[CORTEX]::Debug: Validate model state success with response ${JSON.stringify( body )}` ) return Promise.resolve() } } const errorBody = await res.text() log( `[CORTEX]::Debug: Validate model state failed with response ${errorBody} and status is ${JSON.stringify( res.statusText )}` ) return Promise.reject('Validate model status failed') }) } /** * Terminates the Nitro subprocess. * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. */ async function killSubprocess(): Promise { const controller = new AbortController() setTimeout(() => controller.abort(), 5000) log(`[CORTEX]::Debug: Request to kill cortex`) const killRequest = () => { return fetch(NITRO_HTTP_KILL_URL, { method: 'DELETE', signal: controller.signal, }) .catch(() => {}) // Do nothing with this attempt .then(() => tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) ) .then(() => log(`[CORTEX]::Debug: cortex process is terminated`)) .catch((err) => { log( `[CORTEX]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}` ) throw 'PORT_NOT_AVAILABLE' }) } if (subprocess?.pid && process.platform !== 'darwin') { log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`) const pid = subprocess.pid return new Promise((resolve, reject) => { terminate(pid, function (err) { if (err) { log('[CORTEX]::Failed to kill PID - sending request to kill') killRequest().then(resolve).catch(reject) } else { tcpPortUsed .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) .then(() => log(`[CORTEX]::Debug: cortex process is terminated`)) .then(() => resolve()) .catch(() => { log( '[CORTEX]::Failed to kill PID (Port check timeout) - sending request to kill' ) killRequest().then(resolve).catch(reject) }) } }) }) } else { return killRequest() } } /** * Spawns a Nitro subprocess. * @returns A promise that resolves when the Nitro subprocess is started. */ function spawnNitroProcess(systemInfo?: SystemInformation): Promise { log(`[CORTEX]::Debug: Spawning cortex subprocess...`) return new Promise(async (resolve, reject) => { let executableOptions = executableNitroFile( // If ngl is not set or equal to 0, run on CPU with correct instructions systemInfo?.gpuSetting ? { ...systemInfo.gpuSetting, run_mode: currentSettings?.ngl === undefined || currentSettings.ngl === 0 ? 'cpu' : systemInfo.gpuSetting.run_mode, } : undefined ) const args: string[] = ['1', LOCAL_HOST, PORT.toString()] // Execute the binary log( `[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}` ) log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`) // Add engine path to the PATH and LD_LIBRARY_PATH process.env.PATH = (process.env.PATH || '').concat( path.delimiter, executableOptions.enginePath ) log(`[CORTEX] PATH: ${process.env.PATH}`) process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( path.delimiter, executableOptions.enginePath ) subprocess = spawn( executableOptions.executablePath, ['1', LOCAL_HOST, PORT.toString()], { cwd: path.join(path.parse(executableOptions.executablePath).dir), env: { ...process.env, ENGINE_PATH: executableOptions.enginePath, CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, // Vulkan - Support 1 device at a time for now ...(executableOptions.vkVisibleDevices?.length > 0 && { GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], }), }, } ) // Handle subprocess output subprocess.stdout.on('data', (data: any) => { log(`[CORTEX]::Debug: ${data}`) }) subprocess.stderr.on('data', (data: any) => { log(`[CORTEX]::Error: ${data}`) }) subprocess.on('close', (code: any) => { log(`[CORTEX]::Debug: cortex exited with code: ${code}`) subprocess = undefined reject(`child process exited with code ${code}`) }) tcpPortUsed .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000) .then(() => { log(`[CORTEX]::Debug: cortex is ready`) resolve() }) }) } /** * Every module should have a dispose function * This will be called when the extension is unloaded and should clean up any resources * Also called when app is closed */ function dispose() { // clean other registered resources here killSubprocess() } /** * Nitro process info */ export interface NitroProcessInfo { isRunning: boolean } /** * Retrieve current nitro process */ const getCurrentNitroProcessInfo = (): NitroProcessInfo => { return { isRunning: subprocess != null, } } const addAdditionalDependencies = (data: { name: string; version: string }) => { log( `[CORTEX]::Debug: Adding additional dependencies for ${data.name} ${data.version}` ) const additionalPath = path.delimiter.concat( path.join(getJanDataFolderPath(), 'engines', data.name, data.version) ) // Set the updated PATH process.env.PATH = (process.env.PATH || '').concat( path.delimiter, additionalPath ) process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( path.delimiter, additionalPath ) } const decompressRunner = async (zipPath: string, output: string) => { console.debug(`Decompressing ${zipPath} to ${output}...`) try { const files = await decompress(zipPath, output) console.debug('Decompress finished!', files) } catch (err) { console.error(`Decompress ${zipPath} failed: ${err}`) } } export default { loadModel, unloadModel, dispose, getCurrentNitroProcessInfo, addAdditionalDependencies, decompressRunner, }