This reverts commit ad6fbea22df6deaba31e146dddb456e4a5d5dd75 Revert "chore: add engine logo from local instead of metadata logo (#3363)" This reverts commit ad6fbea22df6deaba31e146dddb456e4a5d5dd75. Revert "fix: LaTex formula render issue (#3353)" This reverts commit 3b2c84c4fee61b886c883c68801be3bc5a8584ad. Revert "chore: minor ui improvement (#3352)" This reverts commit 6dd387db2b5b9890f19d0c3505cf9cb770fd492f. Revert "fix: failed to relaunch app to update (#3351)" This reverts commit fcaf98a2fa4e674799602e8093914bcc04ced153. Revert "chore: add back GPU information to system monitoring bar (#3350)" This reverts commit 03455a91807c7af6c6325901997c6d7231d2cd0d. Revert "fix: empty model page not shown when delete all threads and models (#3343)" This reverts commit 9e29fcd69eb9085843896686806fd453a1285723. Revert "feat: allow user configure remote model from my model (#3348)" This reverts commit fdab8af057f80cf1ccaae0dc42c4e5161925f51e. Revert "chore: ui fix button outline for configure cloud model (#3347)" This reverts commit fe8ed1f26dc86ead92ffea4f36e2989caf7dad88. Revert "feat: move icon create new thread into top panel (#3346)" This reverts commit 46cb1b45b997181e2188f8dafb2fc0d0cc12ddcd. Revert "chore(UI): update experience model dropdown (#3342)" This reverts commit 8b44613015a907dc491113aeb99c963080424892. Revert "Chore/simple bug template and correct a copy (#3344)" This reverts commit 23cd5fd3979e7529811045da5c4912369bcc7532. Revert "chore(ui): fix alignment loader starter screen (#3338)" This reverts commit e9f5d2f837ce323b0851ea04cded913ab433388c. Revert "Increase retry upload to R2 to 5 times (#3337)" This reverts commit dcfb497934edc795955d971b6d391ee1e6309a03. Revert "fix: broken jan build - add log trace (jan.log) (#3336)" This reverts commit 77422c3a7ed240909942ac0d8c4b259af8d87a28. Revert "chore: disable quick ask (#3334)" This reverts commit 6e4b6b09ae009149f262d86d5b19bb8096267c19. Revert "fix: update legacy path (#3328)" This reverts commit 5eb112142c6431cfe0cdf11ce28810ca650a5427. Revert "chore: add cortex version (#3318)" This reverts commit 60587649c56a1f24272e763f25aa5b4042f7719a. Revert "fix: broken app due to incorrect api path (#3316)" This reverts commit 3de4eab2a0dfbf9f593d73b9dde6bca1d9df2279. Revert "feat: modal waiting cortex (#3306)" This reverts commit 1f5168d4af9080b867c19d334c398bf32e4f54b8. Revert "fix: refresh should not create new thread (#3314)" This reverts commit 624d07703c50ea332ed4eeac9dc3a26bc8190d08. Revert "fix: avoid lose title threads (#3307)" This reverts commit a4f5fda104c2d1e01ea72798f055e5b4e3cfd616. Revert "feat: change data folder (#3309)" This reverts commit b43242b9b24352c7f90995eccab753dede679616. Revert "feat: embed cortex into jan as a js module (#3305)" This reverts commit b348110fb73bd5f13c69f1b915168687dea776d0. Revert "fix: migration item in setting detail omit buttons (#3298)" This reverts commit 709204b2bc9d9ed08e2245cbb084482f5908ab3a. Revert "fix: merge gpu arch and os tensorrt models (#3299)" This reverts commit aa7dbdc9fa701debeee28d9c7eb4af6258685321. Revert "chore: update cortex new version (#3300)" This reverts commit 602097909d38b4874db8b9f19a729c65a0ac9619. Revert "fix: engine logo on model dropdown (#3291)" This reverts commit 8eb8611c28f6c4cdf1ab142a6e18c82bcc4c2073. Revert "fix: icon setting can close and open right panel (#3295)" This reverts commit be31e9315e2df5c483de3f46bd37740d277cfccd. Revert "fix: error while importing local model is not shown (#3294)" This reverts commit 26be941e8426462e1e3a28e5b9bf1f834f462f82. Revert "fix: add lower case quantization support (#3293)" This reverts commit 3135ccc27e894a4056f882cd25f0bf7e10e56f49. Revert "fix: onnx can't be selected in download model modal (#3283)" This reverts commit 2521e1db518e9e01493e89dcc98c181ccd2b48a2. Revert "feat: add chunk count (#3290)" This reverts commit bad481bf05aa38edcf553e1273f5d692a65c9225. Revert "fix: RAM always show 0% (#3287)" This reverts commit 2201e6c5f87538b953503937fe6b135fe1aa2d94. Revert "fix: remote engine should not allow reinit (#3284)" This reverts commit 98abff0da3467c090618233db12a25bfa4c1db69. Revert "chore": update minor UI (#3281)" This reverts commit 105a9aa1a1830648a32ae285f751b4078c8ac2b2. Revert "chore: update z-index tooltip (#3280)" This reverts commit 5a81865508c205ed8c54df209092553a0c40054f. Revert "feat: add nvidia engine (#3279)" This reverts commit 8372f30f0ee99606b123351e7bb62636c62c8b23. Revert "fix: migration wrong directory (#3278)" This reverts commit 7fb1354287677f577070ccb065ed3a5f9e5b9882. Revert "fix: clearer app loading prompt (#3275)" This reverts commit 44a6401000334b79b225ab6fd6afb79f9da4bd51. Revert "fix: allow user to reinit engine from settings page (#3277)" This reverts commit 57cf3c7b3d5bface785763d06813906ba6eab7c9. Revert "feat: enable copy over instructions (#3266)" This reverts commit 2074511067201f0addb9d274cc90d1e782f2bc1d. Revert "chore: toast message on model import fail with reason (#3276)" This reverts commit 3bebdfe67e1571c7414065a36d16eb5941115ee0. Revert "fix: should not let second instance terminate cortex (#3274)" This reverts commit d074a5a445b73ca195a49814a935300f9e895aaa. Revert "chore: remnove focus button (#3272)" This reverts commit 07fa79e71a401becdbc0f474c27b860654a8bd62. Revert "chore: update hub search result (#3273)" This reverts commit 10b4a9087af709d147b34f6c3ee63d2d3b75c77a. Revert "chore: temporary hidden import model (#3270)" This reverts commit db5d8aba454fd4cc1e07253ca4805d4b1b3e7fb2. Revert "fix: set cortex data folder path when starting jan (#3252)" This reverts commit 91c77eda78ecd251d480e58b853fe7b261f6de50. Revert "fix: remote model added manually does not shown in model drop down (#3261)" This reverts commit 224ca3f7cc25b2577ab123829907964b78b78aa8. Revert "feat: add more options for cortex popup (#3236)" This reverts commit 5e06ed8a122aaed9d68fbd04ce42b65bf8987e58. Revert "feat: manage cloud models from threads screen (#3223)" This reverts commit 37a3c4f844419e66cfe3f2a9ff79ba688538241f. Revert "chore: check the legacy incompatible message type (#3248)" This reverts commit c10caf8d7f1f9cf68551e41de5d54cd4450cf44a. Revert "chore: minor copy for grammar (#3235)" This reverts commit f0f23078f31f58e01ba27787d6926f5c1eb2ff0b. Revert "fix: add back normalize message function (#3234)" This reverts commit 83579df3a40ff61eac25975da8295fceaec679dc. Revert "chore: update conditional starter screen after cortex load (#3227)" This reverts commit 4d3a97f1dca9e6c3ea746586e8607541f2d1c0b3. Revert "fix: broken status parse due to empty category (#3233)" This reverts commit 68714eeaf9212a6fdacd5c6a48d8691db9cc99eb. Revert "feat: make scroll area type auto for make default visible scrollbar (#3220)" This reverts commit 13428d60e7d3ea6a24c0df8871ea13e2dec0d5fd. Revert "fix: update new api from cortex to support 0.5.0 (#3221)" This reverts commit ec9b5bf682a8676e132a08075b6ae03cf9e23132. Revert "feat: new starter screen (#3217)" This reverts commit e8ee694abd33b34112d2c7d09f8c03370c2d22cc. Revert "bump-cortex-0.5.0-1 (#3218)" This reverts commit 5369da78f5b83b1c8761cb48820ccf3111728a90. Revert "Deprecate Docker and K8s (#3219)" This reverts commit 7611a05c44982d07465bec57658d5bf965f30ad5. Revert "chore: set container max width for chat message and new hub screen (#3213)" This reverts commit 007daa71616268b0e741e7a890b319401e49a81e. Revert "feat: integrating cortex (#3001)" This reverts commit 101268f6f36df96b62982a9eeb8581ebe103a909.
326 lines
9.1 KiB
TypeScript
326 lines
9.1 KiB
TypeScript
import path from 'path'
|
|
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
|
|
import tcpPortUsed from 'tcp-port-used'
|
|
import fetchRT from 'fetch-retry'
|
|
import {
|
|
log,
|
|
getJanDataFolderPath,
|
|
SystemInformation,
|
|
PromptTemplate,
|
|
} from '@janhq/core/node'
|
|
import decompress from 'decompress'
|
|
import terminate from 'terminate'
|
|
|
|
// Polyfill fetch with retry
|
|
const fetchRetry = fetchRT(fetch)
|
|
|
|
const supportedPlatform = (): string[] => ['win32', 'linux']
|
|
const supportedGpuArch = (): string[] => ['ampere', 'ada']
|
|
const PORT_CHECK_INTERVAL = 100
|
|
|
|
/**
|
|
* The response object for model init operation.
|
|
*/
|
|
interface ModelLoadParams {
|
|
engine_path: string
|
|
ctx_len: number
|
|
}
|
|
|
|
// The subprocess instance for Engine
|
|
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
|
|
|
|
/**
|
|
* Initializes a engine subprocess to load a machine learning model.
|
|
* @param params - The model load settings.
|
|
*/
|
|
async function loadModel(
|
|
params: any,
|
|
systemInfo?: SystemInformation
|
|
): Promise<{ error: Error | undefined }> {
|
|
// modelFolder is the absolute path to the running model folder
|
|
// e.g. ~/jan/models/llama-2
|
|
let modelFolder = params.modelFolder
|
|
|
|
if (params.model.settings.prompt_template) {
|
|
const promptTemplate = params.model.settings.prompt_template
|
|
const prompt = promptTemplateConverter(promptTemplate)
|
|
if (prompt?.error) {
|
|
return Promise.reject(prompt.error)
|
|
}
|
|
params.model.settings.system_prompt = prompt.system_prompt
|
|
params.model.settings.user_prompt = prompt.user_prompt
|
|
params.model.settings.ai_prompt = prompt.ai_prompt
|
|
}
|
|
|
|
const settings: ModelLoadParams = {
|
|
engine_path: modelFolder,
|
|
ctx_len: params.model.settings.ctx_len ?? 2048,
|
|
...params.model.settings,
|
|
}
|
|
if (!systemInfo) {
|
|
throw new Error('Cannot get system info. Unable to start nitro x tensorrt.')
|
|
}
|
|
return runEngineAndLoadModel(settings, systemInfo)
|
|
}
|
|
|
|
/**
|
|
* Stops a Engine subprocess.
|
|
*/
|
|
function unloadModel(): Promise<void> {
|
|
const controller = new AbortController()
|
|
setTimeout(() => controller.abort(), 5000)
|
|
debugLog(`Request to kill engine`)
|
|
|
|
const killRequest = () => {
|
|
return fetch(TERMINATE_ENGINE_URL, {
|
|
method: 'DELETE',
|
|
signal: controller.signal,
|
|
})
|
|
.then(() => {
|
|
subprocess = undefined
|
|
})
|
|
.catch(() => {}) // Do nothing with this attempt
|
|
.then(() =>
|
|
tcpPortUsed.waitUntilFree(
|
|
parseInt(ENGINE_PORT),
|
|
PORT_CHECK_INTERVAL,
|
|
5000
|
|
)
|
|
) // Wait for port available
|
|
.then(() => debugLog(`Engine process is terminated`))
|
|
.catch((err) => {
|
|
debugLog(
|
|
`Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
|
|
)
|
|
throw 'PORT_NOT_AVAILABLE'
|
|
})
|
|
}
|
|
|
|
if (subprocess?.pid) {
|
|
log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
|
|
const pid = subprocess.pid
|
|
return new Promise((resolve, reject) => {
|
|
terminate(pid, function (err) {
|
|
if (err) {
|
|
return killRequest()
|
|
} else {
|
|
return tcpPortUsed
|
|
.waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
|
|
.then(() => resolve())
|
|
.then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
|
|
.catch(() => {
|
|
killRequest()
|
|
})
|
|
}
|
|
})
|
|
})
|
|
} else {
|
|
return killRequest()
|
|
}
|
|
}
|
|
/**
|
|
* 1. Spawn engine process
|
|
* 2. Load model into engine subprocess
|
|
* @returns
|
|
*/
|
|
async function runEngineAndLoadModel(
|
|
settings: ModelLoadParams,
|
|
systemInfo: SystemInformation
|
|
) {
|
|
return unloadModel()
|
|
.then(() => runEngine(systemInfo))
|
|
.then(() => loadModelRequest(settings))
|
|
.catch((err) => {
|
|
// TODO: Broadcast error so app could display proper error message
|
|
debugLog(`${err}`, 'Error')
|
|
return { error: err }
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Loads a LLM model into the Engine subprocess by sending a HTTP POST request.
|
|
*/
|
|
async function loadModelRequest(
|
|
settings: ModelLoadParams
|
|
): Promise<{ error: Error | undefined }> {
|
|
debugLog(`Loading model with params ${JSON.stringify(settings)}`)
|
|
return fetchRetry(LOAD_MODEL_URL, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify(settings),
|
|
retries: 3,
|
|
retryDelay: 500,
|
|
})
|
|
.then((res) => {
|
|
debugLog(`Load model success with response ${JSON.stringify(res)}`)
|
|
return Promise.resolve({ error: undefined })
|
|
})
|
|
.catch((err) => {
|
|
debugLog(`Load model failed with error ${err}`, 'Error')
|
|
return Promise.resolve({ error: err })
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Spawns engine subprocess.
|
|
*/
|
|
async function runEngine(systemInfo: SystemInformation): Promise<void> {
|
|
debugLog(`Spawning engine subprocess...`)
|
|
if (systemInfo.gpuSetting == null) {
|
|
return Promise.reject(
|
|
'No GPU information found. Please check your GPU setting.'
|
|
)
|
|
}
|
|
|
|
if (systemInfo.gpuSetting?.gpus.length === 0) {
|
|
return Promise.reject('No GPU found. Please check your GPU setting.')
|
|
}
|
|
|
|
if (systemInfo.osInfo == null) {
|
|
return Promise.reject(
|
|
'No OS information found. Please check your OS setting.'
|
|
)
|
|
}
|
|
const platform = systemInfo.osInfo.platform
|
|
if (platform == null || supportedPlatform().includes(platform) === false) {
|
|
return Promise.reject(
|
|
'No OS architecture found. Please check your OS setting.'
|
|
)
|
|
}
|
|
|
|
const gpu = systemInfo.gpuSetting?.gpus[0]
|
|
if (gpu.name.toLowerCase().includes('nvidia') === false) {
|
|
return Promise.reject('No Nvidia GPU found. Please check your GPU setting.')
|
|
}
|
|
const gpuArch = gpu.arch
|
|
if (gpuArch == null || supportedGpuArch().includes(gpuArch) === false) {
|
|
return Promise.reject(
|
|
`Your GPU: ${gpu.name} is not supported. Only ${supportedGpuArch().join(
|
|
', '
|
|
)} series are supported.`
|
|
)
|
|
}
|
|
const janDataFolderPath = await getJanDataFolderPath()
|
|
const tensorRtVersion = TENSORRT_VERSION
|
|
const provider = PROVIDER
|
|
|
|
return new Promise<void>((resolve, reject) => {
|
|
// Current directory by default
|
|
|
|
const executableFolderPath = path.join(
|
|
janDataFolderPath,
|
|
'engines',
|
|
provider,
|
|
tensorRtVersion,
|
|
gpuArch
|
|
)
|
|
const nitroExecutablePath = path.join(
|
|
executableFolderPath,
|
|
platform === 'win32' ? 'nitro.exe' : 'nitro'
|
|
)
|
|
|
|
const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT]
|
|
// Execute the binary
|
|
debugLog(`Spawn nitro at path: ${nitroExecutablePath}, and args: ${args}`)
|
|
subprocess = spawn(nitroExecutablePath, args, {
|
|
cwd: executableFolderPath,
|
|
env: {
|
|
...process.env,
|
|
},
|
|
})
|
|
|
|
// Handle subprocess output
|
|
subprocess.stdout.on('data', (data: any) => {
|
|
debugLog(`${data}`)
|
|
})
|
|
|
|
subprocess.stderr.on('data', (data: any) => {
|
|
debugLog(`${data}`)
|
|
})
|
|
|
|
subprocess.on('close', (code: any) => {
|
|
debugLog(`Engine exited with code: ${code}`)
|
|
subprocess = undefined
|
|
reject(`child process exited with code ${code}`)
|
|
})
|
|
|
|
tcpPortUsed
|
|
.waitUntilUsed(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 30000)
|
|
.then(() => {
|
|
debugLog(`Engine is ready`)
|
|
resolve()
|
|
})
|
|
})
|
|
}
|
|
|
|
function debugLog(message: string, level: string = 'Debug') {
|
|
log(`[TENSORRT_LLM_NITRO]::${level}:${message}`)
|
|
}
|
|
|
|
const decompressRunner = async (zipPath: string, output: string) => {
|
|
console.debug(`Decompressing ${zipPath} to ${output}...`)
|
|
try {
|
|
const files = await decompress(zipPath, output)
|
|
console.debug('Decompress finished!', files)
|
|
} catch (err) {
|
|
console.error(`Decompress ${zipPath} failed: ${err}`)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse prompt template into agrs settings
|
|
* @param promptTemplate Template as string
|
|
* @returns
|
|
*/
|
|
function promptTemplateConverter(promptTemplate: string): PromptTemplate {
|
|
// Split the string using the markers
|
|
const systemMarker = '{system_message}'
|
|
const promptMarker = '{prompt}'
|
|
|
|
if (
|
|
promptTemplate.includes(systemMarker) &&
|
|
promptTemplate.includes(promptMarker)
|
|
) {
|
|
// Find the indices of the markers
|
|
const systemIndex = promptTemplate.indexOf(systemMarker)
|
|
const promptIndex = promptTemplate.indexOf(promptMarker)
|
|
|
|
// Extract the parts of the string
|
|
const system_prompt = promptTemplate.substring(0, systemIndex)
|
|
const user_prompt = promptTemplate.substring(
|
|
systemIndex + systemMarker.length,
|
|
promptIndex
|
|
)
|
|
const ai_prompt = promptTemplate.substring(
|
|
promptIndex + promptMarker.length
|
|
)
|
|
|
|
// Return the split parts
|
|
return { system_prompt, user_prompt, ai_prompt }
|
|
} else if (promptTemplate.includes(promptMarker)) {
|
|
// Extract the parts of the string for the case where only promptMarker is present
|
|
const promptIndex = promptTemplate.indexOf(promptMarker)
|
|
const user_prompt = promptTemplate.substring(0, promptIndex)
|
|
const ai_prompt = promptTemplate.substring(
|
|
promptIndex + promptMarker.length
|
|
)
|
|
|
|
// Return the split parts
|
|
return { user_prompt, ai_prompt }
|
|
}
|
|
|
|
// Return an error if none of the conditions are met
|
|
return { error: 'Cannot split prompt template' }
|
|
}
|
|
|
|
export default {
|
|
supportedPlatform,
|
|
supportedGpuArch,
|
|
decompressRunner,
|
|
loadModel,
|
|
unloadModel,
|
|
dispose: unloadModel,
|
|
}
|