Louis d85d02693b
feat: Nitro-Tensorrt-LLM Extension (#2280)
* feat: tensorrt-llm-extension

* fix: loading

* feat: add download tensorrt llm runner

Signed-off-by: James <james@jan.ai>

* feat: update to rollupjs instead of webpack for monitoring extension

Signed-off-by: James <james@jan.ai>

* feat: move update nvidia info to monitor extension

Signed-off-by: James <james@jan.ai>

* allow download tensorrt

Signed-off-by: James <james@jan.ai>

* update

Signed-off-by: James <james@jan.ai>

* allow download tensor rt based on gpu setting

Signed-off-by: James <james@jan.ai>

* update downloaded models

Signed-off-by: James <james@jan.ai>

* feat: add extension compatibility

* dynamic tensor rt engines

Signed-off-by: James <james@jan.ai>

* update models

Signed-off-by: James <james@jan.ai>

* chore: remove ts-ignore

* feat: getting installation state from extension

Signed-off-by: James <james@jan.ai>

* chore: adding type for decompress

Signed-off-by: James <james@jan.ai>

* feat: update according Louis's comment

Signed-off-by: James <james@jan.ai>

* feat: add progress for installing extension

Signed-off-by: James <james@jan.ai>

* chore: remove args from extension installation

* fix: model download does not work properly

* fix: do not allow user to stop tensorrtllm inference

* fix: extension installed style

* fix: download tensorrt does not update state

Signed-off-by: James <james@jan.ai>

* chore: replace int4 by fl16

* feat: modal for installing extension

Signed-off-by: James <james@jan.ai>

* fix: start download immediately after press install

Signed-off-by: James <james@jan.ai>

* fix: error switching between engines

* feat: rename inference provider to ai engine and refactor to core

* fix: missing ulid

* fix: core bundler

* feat: add cancel extension installing

Signed-off-by: James <james@jan.ai>

* remove mocking for mac

Signed-off-by: James <james@jan.ai>

* fix: show models only when extension is ready

* add tensorrt badge for model

Signed-off-by: James <james@jan.ai>

* fix: copy

* fix: add compatible check (#2342)

* fix: add compatible check

Signed-off-by: James <james@jan.ai>

* fix: copy

* fix: font

* fix: copy

* fix: broken monitoring extension

* chore: bump engine

* fix: copy

* fix: model copy

* fix: copy

* fix: model json

---------

Signed-off-by: James <james@jan.ai>
Co-authored-by: James <james@jan.ai>
Co-authored-by: Louis <louis@jan.ai>

* fix: vulkan support

* fix: installation button padding

* fix: empty script

* fix: remove hard code string

---------

Signed-off-by: James <james@jan.ai>
Co-authored-by: James <james@jan.ai>
Co-authored-by: NamH <NamNh0122@gmail.com>
2024-03-14 14:07:22 +07:00

311 lines
8.6 KiB
TypeScript

/**
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
* @version 1.0.0
* @module inference-extension/src/index
*/
import {
ChatCompletionRole,
ContentType,
MessageRequest,
MessageRequestType,
MessageStatus,
ThreadContent,
ThreadMessage,
events,
executeOnMain,
fs,
Model,
joinPath,
InferenceExtension,
log,
InferenceEngine,
MessageEvent,
ModelEvent,
InferenceEvent,
ModelSettingParams,
getJanDataFolderPath,
} from '@janhq/core'
import { requestInference } from './helpers/sse'
import { ulid } from 'ulid'
/**
* A class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
*/
export default class JanInferenceNitroExtension extends InferenceExtension {
private static readonly _homeDir = 'file://engines'
private static readonly _settingsDir = 'file://settings'
private static readonly _engineMetadataFileName = 'nitro.json'
/**
* Checking the health for Nitro's process each 5 secs.
*/
private static readonly _intervalHealthCheck = 5 * 1000
private _currentModel: Model | undefined
private _engineSettings: ModelSettingParams = {
ctx_len: 2048,
ngl: 100,
cpu_threads: 1,
cont_batching: false,
embedding: true,
}
controller = new AbortController()
isCancelled = false
/**
* The interval id for the health check. Used to stop the health check.
*/
private getNitroProcesHealthIntervalId: NodeJS.Timeout | undefined = undefined
/**
* Tracking the current state of nitro process.
*/
private nitroProcessInfo: any = undefined
private inferenceUrl = ''
/**
* Subscribes to events emitted by the @janhq/core package.
*/
async onLoad() {
if (!(await fs.existsSync(JanInferenceNitroExtension._homeDir))) {
try {
await fs.mkdirSync(JanInferenceNitroExtension._homeDir)
} catch (e) {
console.debug(e)
}
}
// init inference url
// @ts-ignore
const electronApi = window?.electronAPI
this.inferenceUrl = INFERENCE_URL
if (!electronApi) {
this.inferenceUrl = `${window.core?.api?.baseApiUrl}/v1/chat/completions`
}
console.debug('Inference url: ', this.inferenceUrl)
if (!(await fs.existsSync(JanInferenceNitroExtension._settingsDir)))
await fs.mkdirSync(JanInferenceNitroExtension._settingsDir)
this.writeDefaultEngineSettings()
// Events subscription
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
this.onMessageRequest(data)
)
events.on(ModelEvent.OnModelInit, (model: Model) => this.onModelInit(model))
events.on(ModelEvent.OnModelStop, (model: Model) => this.onModelStop(model))
events.on(InferenceEvent.OnInferenceStopped, () =>
this.onInferenceStopped()
)
}
/**
* Stops the model inference.
*/
onUnload(): void {}
private async writeDefaultEngineSettings() {
try {
const engineFile = await joinPath([
JanInferenceNitroExtension._homeDir,
JanInferenceNitroExtension._engineMetadataFileName,
])
if (await fs.existsSync(engineFile)) {
const engine = await fs.readFileSync(engineFile, 'utf-8')
this._engineSettings =
typeof engine === 'object' ? engine : JSON.parse(engine)
} else {
await fs.writeFileSync(
engineFile,
JSON.stringify(this._engineSettings, null, 2)
)
}
} catch (err) {
console.error(err)
}
}
private async onModelInit(model: Model) {
if (model.engine !== InferenceEngine.nitro) return
const modelFolder = await joinPath([
await getJanDataFolderPath(),
'models',
model.id,
])
this._currentModel = model
const nitroInitResult = await executeOnMain(NODE, 'runModel', {
modelFolder,
model,
})
if (nitroInitResult?.error) {
events.emit(ModelEvent.OnModelFail, {
...model,
error: nitroInitResult.error,
})
return
}
events.emit(ModelEvent.OnModelReady, model)
this.getNitroProcesHealthIntervalId = setInterval(
() => this.periodicallyGetNitroHealth(),
JanInferenceNitroExtension._intervalHealthCheck
)
}
private async onModelStop(model: Model) {
if (model.engine !== 'nitro') return
await executeOnMain(NODE, 'stopModel')
events.emit(ModelEvent.OnModelStopped, {})
// stop the periocally health check
if (this.getNitroProcesHealthIntervalId) {
clearInterval(this.getNitroProcesHealthIntervalId)
this.getNitroProcesHealthIntervalId = undefined
}
}
/**
* Periodically check for nitro process's health.
*/
private async periodicallyGetNitroHealth(): Promise<void> {
const health = await executeOnMain(NODE, 'getCurrentNitroProcessInfo')
const isRunning = this.nitroProcessInfo?.isRunning ?? false
if (isRunning && health.isRunning === false) {
console.debug('Nitro process is stopped')
events.emit(ModelEvent.OnModelStopped, {})
}
this.nitroProcessInfo = health
}
private async onInferenceStopped() {
this.isCancelled = true
this.controller?.abort()
}
/**
* Makes a single response inference request.
* @param {MessageRequest} data - The data for the inference request.
* @returns {Promise<any>} A promise that resolves with the inference response.
*/
async inference(data: MessageRequest): Promise<ThreadMessage> {
const timestamp = Date.now()
const message: ThreadMessage = {
thread_id: data.threadId,
created: timestamp,
updated: timestamp,
status: MessageStatus.Ready,
id: '',
role: ChatCompletionRole.Assistant,
object: 'thread.message',
content: [],
}
return new Promise(async (resolve, reject) => {
if (!this._currentModel) return Promise.reject('No model loaded')
requestInference(
this.inferenceUrl,
data.messages ?? [],
this._currentModel
).subscribe({
next: (_content: any) => {},
complete: async () => {
resolve(message)
},
error: async (err: any) => {
reject(err)
},
})
})
}
/**
* Handles a new message request by making an inference request and emitting events.
* Function registered in event manager, should be static to avoid binding issues.
* Pass instance as a reference.
* @param {MessageRequest} data - The data for the new message request.
*/
private async onMessageRequest(data: MessageRequest) {
if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) {
return
}
const timestamp = Date.now()
const message: ThreadMessage = {
id: ulid(),
thread_id: data.threadId,
type: data.type,
assistant_id: data.assistantId,
role: ChatCompletionRole.Assistant,
content: [],
status: MessageStatus.Pending,
created: timestamp,
updated: timestamp,
object: 'thread.message',
}
if (data.type !== MessageRequestType.Summary) {
events.emit(MessageEvent.OnMessageResponse, message)
}
this.isCancelled = false
this.controller = new AbortController()
// @ts-ignore
const model: Model = {
...(this._currentModel || {}),
...(data.model || {}),
}
requestInference(
this.inferenceUrl,
data.messages ?? [],
model,
this.controller
).subscribe({
next: (content: any) => {
const messageContent: ThreadContent = {
type: ContentType.Text,
text: {
value: content.trim(),
annotations: [],
},
}
message.content = [messageContent]
events.emit(MessageEvent.OnMessageUpdate, message)
},
complete: async () => {
message.status = message.content.length
? MessageStatus.Ready
: MessageStatus.Error
events.emit(MessageEvent.OnMessageUpdate, message)
},
error: async (err: any) => {
if (this.isCancelled || message.content.length) {
message.status = MessageStatus.Stopped
events.emit(MessageEvent.OnMessageUpdate, message)
return
}
message.status = MessageStatus.Error
events.emit(MessageEvent.OnMessageUpdate, message)
log(`[APP]::Error: ${err.message}`)
},
})
}
}