Louis ff7ec39915
fix: incompatible browser dependency (#2439)
* fix: incompatible browser dependency

* fix: update model extension to use rollup

* fix: test timeout
2024-03-21 16:54:42 +07:00

293 lines
8.3 KiB
TypeScript

import path from 'path'
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
import tcpPortUsed from 'tcp-port-used'
import fetchRT from 'fetch-retry'
import {
log,
getJanDataFolderPath,
SystemInformation,
PromptTemplate,
} from '@janhq/core/node'
import decompress from 'decompress'
// Polyfill fetch with retry
const fetchRetry = fetchRT(fetch)
const supportedPlatform = (): string[] => ['win32', 'linux']
const supportedGpuArch = (): string[] => ['ampere', 'ada']
/**
* The response object for model init operation.
*/
interface ModelLoadParams {
engine_path: string
ctx_len: number
}
// The subprocess instance for Engine
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
/**
* Initializes a engine subprocess to load a machine learning model.
* @param params - The model load settings.
*/
async function loadModel(
params: any,
systemInfo?: SystemInformation
): Promise<{ error: Error | undefined }> {
// modelFolder is the absolute path to the running model folder
// e.g. ~/jan/models/llama-2
let modelFolder = params.modelFolder
if (params.model.settings.prompt_template) {
const promptTemplate = params.model.settings.prompt_template
const prompt = promptTemplateConverter(promptTemplate)
if (prompt?.error) {
return Promise.reject(prompt.error)
}
params.model.settings.system_prompt = prompt.system_prompt
params.model.settings.user_prompt = prompt.user_prompt
params.model.settings.ai_prompt = prompt.ai_prompt
}
const settings: ModelLoadParams = {
engine_path: modelFolder,
ctx_len: params.model.settings.ctx_len ?? 2048,
...params.model.settings,
}
if (!systemInfo) {
throw new Error('Cannot get system info. Unable to start nitro x tensorrt.')
}
return runEngineAndLoadModel(settings, systemInfo)
}
/**
* Stops a Engine subprocess.
*/
function unloadModel(): Promise<any> {
const controller = new AbortController()
setTimeout(() => controller.abort(), 5000)
debugLog(`Request to kill engine`)
subprocess?.kill()
return fetch(TERMINATE_ENGINE_URL, {
method: 'DELETE',
signal: controller.signal,
})
.then(() => {
subprocess = undefined
})
.catch(() => {}) // Do nothing with this attempt
.then(() => tcpPortUsed.waitUntilFree(parseInt(ENGINE_PORT), 300, 5000)) // Wait for port available
.then(() => debugLog(`Engine process is terminated`))
.catch((err) => {
debugLog(
`Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
)
throw 'PORT_NOT_AVAILABLE'
})
}
/**
* 1. Spawn engine process
* 2. Load model into engine subprocess
* @returns
*/
async function runEngineAndLoadModel(
settings: ModelLoadParams,
systemInfo: SystemInformation
) {
return unloadModel()
.then(() => runEngine(systemInfo))
.then(() => loadModelRequest(settings))
.catch((err) => {
// TODO: Broadcast error so app could display proper error message
debugLog(`${err}`, 'Error')
return { error: err }
})
}
/**
* Loads a LLM model into the Engine subprocess by sending a HTTP POST request.
*/
async function loadModelRequest(
settings: ModelLoadParams
): Promise<{ error: Error | undefined }> {
debugLog(`Loading model with params ${JSON.stringify(settings)}`)
return fetchRetry(LOAD_MODEL_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(settings),
retries: 3,
retryDelay: 500,
})
.then((res) => {
debugLog(`Load model success with response ${JSON.stringify(res)}`)
return Promise.resolve({ error: undefined })
})
.catch((err) => {
debugLog(`Load model failed with error ${err}`, 'Error')
return Promise.resolve({ error: err })
})
}
/**
* Spawns engine subprocess.
*/
async function runEngine(systemInfo: SystemInformation): Promise<void> {
debugLog(`Spawning engine subprocess...`)
if (systemInfo.gpuSetting == null) {
return Promise.reject(
'No GPU information found. Please check your GPU setting.'
)
}
if (systemInfo.gpuSetting.gpus.length === 0) {
return Promise.reject('No GPU found. Please check your GPU setting.')
}
if (systemInfo.osInfo == null) {
return Promise.reject(
'No OS information found. Please check your OS setting.'
)
}
const platform = systemInfo.osInfo.platform
if (platform == null || supportedPlatform().includes(platform) === false) {
return Promise.reject(
'No OS architecture found. Please check your OS setting.'
)
}
const gpu = systemInfo.gpuSetting.gpus[0]
if (gpu.name.toLowerCase().includes('nvidia') === false) {
return Promise.reject('No Nvidia GPU found. Please check your GPU setting.')
}
const gpuArch = gpu.arch
if (gpuArch == null || supportedGpuArch().includes(gpuArch) === false) {
return Promise.reject(
`Your GPU: ${gpu.name} is not supported. Only ${supportedGpuArch().join(
', '
)} series are supported.`
)
}
const janDataFolderPath = await getJanDataFolderPath()
const tensorRtVersion = TENSORRT_VERSION
const provider = PROVIDER
return new Promise<void>((resolve, reject) => {
// Current directory by default
const executableFolderPath = path.join(
janDataFolderPath,
'engines',
provider,
tensorRtVersion,
gpuArch
)
const nitroExecutablePath = path.join(
executableFolderPath,
platform === 'win32' ? 'nitro.exe' : 'nitro'
)
const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT]
// Execute the binary
debugLog(`Spawn nitro at path: ${nitroExecutablePath}, and args: ${args}`)
subprocess = spawn(nitroExecutablePath, args, {
cwd: executableFolderPath,
env: {
...process.env,
},
})
// Handle subprocess output
subprocess.stdout.on('data', (data: any) => {
debugLog(`${data}`)
})
subprocess.stderr.on('data', (data: any) => {
debugLog(`${data}`)
})
subprocess.on('close', (code: any) => {
debugLog(`Engine exited with code: ${code}`)
subprocess = undefined
reject(`child process exited with code ${code}`)
})
tcpPortUsed.waitUntilUsed(parseInt(ENGINE_PORT), 300, 30000).then(() => {
debugLog(`Engine is ready`)
resolve()
})
})
}
function debugLog(message: string, level: string = 'Debug') {
log(`[TENSORRT_LLM_NITRO]::${level}:${message}`)
}
const decompressRunner = async (zipPath: string, output: string) => {
console.debug(`Decompressing ${zipPath} to ${output}...`)
try {
const files = await decompress(zipPath, output)
console.debug('Decompress finished!', files)
} catch (err) {
console.error(`Decompress ${zipPath} failed: ${err}`)
}
}
/**
* Parse prompt template into agrs settings
* @param promptTemplate Template as string
* @returns
*/
function promptTemplateConverter(promptTemplate: string): PromptTemplate {
// Split the string using the markers
const systemMarker = '{system_message}'
const promptMarker = '{prompt}'
if (
promptTemplate.includes(systemMarker) &&
promptTemplate.includes(promptMarker)
) {
// Find the indices of the markers
const systemIndex = promptTemplate.indexOf(systemMarker)
const promptIndex = promptTemplate.indexOf(promptMarker)
// Extract the parts of the string
const system_prompt = promptTemplate.substring(0, systemIndex)
const user_prompt = promptTemplate.substring(
systemIndex + systemMarker.length,
promptIndex
)
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
)
// Return the split parts
return { system_prompt, user_prompt, ai_prompt }
} else if (promptTemplate.includes(promptMarker)) {
// Extract the parts of the string for the case where only promptMarker is present
const promptIndex = promptTemplate.indexOf(promptMarker)
const user_prompt = promptTemplate.substring(0, promptIndex)
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
)
// Return the split parts
return { user_prompt, ai_prompt }
}
// Return an error if none of the conditions are met
return { error: 'Cannot split prompt template' }
}
export default {
supportedPlatform,
supportedGpuArch,
decompressRunner,
loadModel,
unloadModel,
dispose: unloadModel,
}