* feat: tensorrt-llm-extension * fix: loading * feat: add download tensorrt llm runner Signed-off-by: James <james@jan.ai> * feat: update to rollupjs instead of webpack for monitoring extension Signed-off-by: James <james@jan.ai> * feat: move update nvidia info to monitor extension Signed-off-by: James <james@jan.ai> * allow download tensorrt Signed-off-by: James <james@jan.ai> * update Signed-off-by: James <james@jan.ai> * allow download tensor rt based on gpu setting Signed-off-by: James <james@jan.ai> * update downloaded models Signed-off-by: James <james@jan.ai> * feat: add extension compatibility * dynamic tensor rt engines Signed-off-by: James <james@jan.ai> * update models Signed-off-by: James <james@jan.ai> * chore: remove ts-ignore * feat: getting installation state from extension Signed-off-by: James <james@jan.ai> * chore: adding type for decompress Signed-off-by: James <james@jan.ai> * feat: update according Louis's comment Signed-off-by: James <james@jan.ai> * feat: add progress for installing extension Signed-off-by: James <james@jan.ai> * chore: remove args from extension installation * fix: model download does not work properly * fix: do not allow user to stop tensorrtllm inference * fix: extension installed style * fix: download tensorrt does not update state Signed-off-by: James <james@jan.ai> * chore: replace int4 by fl16 * feat: modal for installing extension Signed-off-by: James <james@jan.ai> * fix: start download immediately after press install Signed-off-by: James <james@jan.ai> * fix: error switching between engines * feat: rename inference provider to ai engine and refactor to core * fix: missing ulid * fix: core bundler * feat: add cancel extension installing Signed-off-by: James <james@jan.ai> * remove mocking for mac Signed-off-by: James <james@jan.ai> * fix: show models only when extension is ready * add tensorrt badge for model Signed-off-by: James <james@jan.ai> * fix: copy * fix: add compatible check (#2342) * fix: add compatible check Signed-off-by: James <james@jan.ai> * fix: copy * fix: font * fix: copy * fix: broken monitoring extension * chore: bump engine * fix: copy * fix: model copy * fix: copy * fix: model json --------- Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: Louis <louis@jan.ai> * fix: vulkan support * fix: installation button padding * fix: empty script * fix: remove hard code string --------- Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: NamH <NamNh0122@gmail.com>
311 lines
8.6 KiB
TypeScript
311 lines
8.6 KiB
TypeScript
/**
|
|
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
|
|
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
|
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
|
* @version 1.0.0
|
|
* @module inference-extension/src/index
|
|
*/
|
|
|
|
import {
|
|
ChatCompletionRole,
|
|
ContentType,
|
|
MessageRequest,
|
|
MessageRequestType,
|
|
MessageStatus,
|
|
ThreadContent,
|
|
ThreadMessage,
|
|
events,
|
|
executeOnMain,
|
|
fs,
|
|
Model,
|
|
joinPath,
|
|
InferenceExtension,
|
|
log,
|
|
InferenceEngine,
|
|
MessageEvent,
|
|
ModelEvent,
|
|
InferenceEvent,
|
|
ModelSettingParams,
|
|
getJanDataFolderPath,
|
|
} from '@janhq/core'
|
|
import { requestInference } from './helpers/sse'
|
|
import { ulid } from 'ulid'
|
|
|
|
/**
|
|
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
|
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
|
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
|
*/
|
|
export default class JanInferenceNitroExtension extends InferenceExtension {
|
|
private static readonly _homeDir = 'file://engines'
|
|
private static readonly _settingsDir = 'file://settings'
|
|
private static readonly _engineMetadataFileName = 'nitro.json'
|
|
|
|
/**
|
|
* Checking the health for Nitro's process each 5 secs.
|
|
*/
|
|
private static readonly _intervalHealthCheck = 5 * 1000
|
|
|
|
private _currentModel: Model | undefined
|
|
|
|
private _engineSettings: ModelSettingParams = {
|
|
ctx_len: 2048,
|
|
ngl: 100,
|
|
cpu_threads: 1,
|
|
cont_batching: false,
|
|
embedding: true,
|
|
}
|
|
|
|
controller = new AbortController()
|
|
isCancelled = false
|
|
|
|
/**
|
|
* The interval id for the health check. Used to stop the health check.
|
|
*/
|
|
private getNitroProcesHealthIntervalId: NodeJS.Timeout | undefined = undefined
|
|
|
|
/**
|
|
* Tracking the current state of nitro process.
|
|
*/
|
|
private nitroProcessInfo: any = undefined
|
|
|
|
private inferenceUrl = ''
|
|
|
|
/**
|
|
* Subscribes to events emitted by the @janhq/core package.
|
|
*/
|
|
async onLoad() {
|
|
if (!(await fs.existsSync(JanInferenceNitroExtension._homeDir))) {
|
|
try {
|
|
await fs.mkdirSync(JanInferenceNitroExtension._homeDir)
|
|
} catch (e) {
|
|
console.debug(e)
|
|
}
|
|
}
|
|
|
|
// init inference url
|
|
// @ts-ignore
|
|
const electronApi = window?.electronAPI
|
|
this.inferenceUrl = INFERENCE_URL
|
|
if (!electronApi) {
|
|
this.inferenceUrl = `${window.core?.api?.baseApiUrl}/v1/chat/completions`
|
|
}
|
|
console.debug('Inference url: ', this.inferenceUrl)
|
|
|
|
if (!(await fs.existsSync(JanInferenceNitroExtension._settingsDir)))
|
|
await fs.mkdirSync(JanInferenceNitroExtension._settingsDir)
|
|
this.writeDefaultEngineSettings()
|
|
|
|
// Events subscription
|
|
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
|
|
this.onMessageRequest(data)
|
|
)
|
|
|
|
events.on(ModelEvent.OnModelInit, (model: Model) => this.onModelInit(model))
|
|
|
|
events.on(ModelEvent.OnModelStop, (model: Model) => this.onModelStop(model))
|
|
|
|
events.on(InferenceEvent.OnInferenceStopped, () =>
|
|
this.onInferenceStopped()
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Stops the model inference.
|
|
*/
|
|
onUnload(): void {}
|
|
|
|
private async writeDefaultEngineSettings() {
|
|
try {
|
|
const engineFile = await joinPath([
|
|
JanInferenceNitroExtension._homeDir,
|
|
JanInferenceNitroExtension._engineMetadataFileName,
|
|
])
|
|
if (await fs.existsSync(engineFile)) {
|
|
const engine = await fs.readFileSync(engineFile, 'utf-8')
|
|
this._engineSettings =
|
|
typeof engine === 'object' ? engine : JSON.parse(engine)
|
|
} else {
|
|
await fs.writeFileSync(
|
|
engineFile,
|
|
JSON.stringify(this._engineSettings, null, 2)
|
|
)
|
|
}
|
|
} catch (err) {
|
|
console.error(err)
|
|
}
|
|
}
|
|
|
|
private async onModelInit(model: Model) {
|
|
if (model.engine !== InferenceEngine.nitro) return
|
|
|
|
const modelFolder = await joinPath([
|
|
await getJanDataFolderPath(),
|
|
'models',
|
|
model.id,
|
|
])
|
|
this._currentModel = model
|
|
const nitroInitResult = await executeOnMain(NODE, 'runModel', {
|
|
modelFolder,
|
|
model,
|
|
})
|
|
|
|
if (nitroInitResult?.error) {
|
|
events.emit(ModelEvent.OnModelFail, {
|
|
...model,
|
|
error: nitroInitResult.error,
|
|
})
|
|
return
|
|
}
|
|
|
|
events.emit(ModelEvent.OnModelReady, model)
|
|
|
|
this.getNitroProcesHealthIntervalId = setInterval(
|
|
() => this.periodicallyGetNitroHealth(),
|
|
JanInferenceNitroExtension._intervalHealthCheck
|
|
)
|
|
}
|
|
|
|
private async onModelStop(model: Model) {
|
|
if (model.engine !== 'nitro') return
|
|
|
|
await executeOnMain(NODE, 'stopModel')
|
|
events.emit(ModelEvent.OnModelStopped, {})
|
|
|
|
// stop the periocally health check
|
|
if (this.getNitroProcesHealthIntervalId) {
|
|
clearInterval(this.getNitroProcesHealthIntervalId)
|
|
this.getNitroProcesHealthIntervalId = undefined
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Periodically check for nitro process's health.
|
|
*/
|
|
private async periodicallyGetNitroHealth(): Promise<void> {
|
|
const health = await executeOnMain(NODE, 'getCurrentNitroProcessInfo')
|
|
|
|
const isRunning = this.nitroProcessInfo?.isRunning ?? false
|
|
if (isRunning && health.isRunning === false) {
|
|
console.debug('Nitro process is stopped')
|
|
events.emit(ModelEvent.OnModelStopped, {})
|
|
}
|
|
this.nitroProcessInfo = health
|
|
}
|
|
|
|
private async onInferenceStopped() {
|
|
this.isCancelled = true
|
|
this.controller?.abort()
|
|
}
|
|
|
|
/**
|
|
* Makes a single response inference request.
|
|
* @param {MessageRequest} data - The data for the inference request.
|
|
* @returns {Promise<any>} A promise that resolves with the inference response.
|
|
*/
|
|
async inference(data: MessageRequest): Promise<ThreadMessage> {
|
|
const timestamp = Date.now()
|
|
const message: ThreadMessage = {
|
|
thread_id: data.threadId,
|
|
created: timestamp,
|
|
updated: timestamp,
|
|
status: MessageStatus.Ready,
|
|
id: '',
|
|
role: ChatCompletionRole.Assistant,
|
|
object: 'thread.message',
|
|
content: [],
|
|
}
|
|
|
|
return new Promise(async (resolve, reject) => {
|
|
if (!this._currentModel) return Promise.reject('No model loaded')
|
|
|
|
requestInference(
|
|
this.inferenceUrl,
|
|
data.messages ?? [],
|
|
this._currentModel
|
|
).subscribe({
|
|
next: (_content: any) => {},
|
|
complete: async () => {
|
|
resolve(message)
|
|
},
|
|
error: async (err: any) => {
|
|
reject(err)
|
|
},
|
|
})
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Handles a new message request by making an inference request and emitting events.
|
|
* Function registered in event manager, should be static to avoid binding issues.
|
|
* Pass instance as a reference.
|
|
* @param {MessageRequest} data - The data for the new message request.
|
|
*/
|
|
private async onMessageRequest(data: MessageRequest) {
|
|
if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) {
|
|
return
|
|
}
|
|
|
|
const timestamp = Date.now()
|
|
const message: ThreadMessage = {
|
|
id: ulid(),
|
|
thread_id: data.threadId,
|
|
type: data.type,
|
|
assistant_id: data.assistantId,
|
|
role: ChatCompletionRole.Assistant,
|
|
content: [],
|
|
status: MessageStatus.Pending,
|
|
created: timestamp,
|
|
updated: timestamp,
|
|
object: 'thread.message',
|
|
}
|
|
|
|
if (data.type !== MessageRequestType.Summary) {
|
|
events.emit(MessageEvent.OnMessageResponse, message)
|
|
}
|
|
|
|
this.isCancelled = false
|
|
this.controller = new AbortController()
|
|
|
|
// @ts-ignore
|
|
const model: Model = {
|
|
...(this._currentModel || {}),
|
|
...(data.model || {}),
|
|
}
|
|
requestInference(
|
|
this.inferenceUrl,
|
|
data.messages ?? [],
|
|
model,
|
|
this.controller
|
|
).subscribe({
|
|
next: (content: any) => {
|
|
const messageContent: ThreadContent = {
|
|
type: ContentType.Text,
|
|
text: {
|
|
value: content.trim(),
|
|
annotations: [],
|
|
},
|
|
}
|
|
message.content = [messageContent]
|
|
events.emit(MessageEvent.OnMessageUpdate, message)
|
|
},
|
|
complete: async () => {
|
|
message.status = message.content.length
|
|
? MessageStatus.Ready
|
|
: MessageStatus.Error
|
|
events.emit(MessageEvent.OnMessageUpdate, message)
|
|
},
|
|
error: async (err: any) => {
|
|
if (this.isCancelled || message.content.length) {
|
|
message.status = MessageStatus.Stopped
|
|
events.emit(MessageEvent.OnMessageUpdate, message)
|
|
return
|
|
}
|
|
message.status = MessageStatus.Error
|
|
events.emit(MessageEvent.OnMessageUpdate, message)
|
|
log(`[APP]::Error: ${err.message}`)
|
|
},
|
|
})
|
|
}
|
|
}
|