* feat: tensorrt-llm-extension * fix: loading * feat: add download tensorrt llm runner Signed-off-by: James <james@jan.ai> * feat: update to rollupjs instead of webpack for monitoring extension Signed-off-by: James <james@jan.ai> * feat: move update nvidia info to monitor extension Signed-off-by: James <james@jan.ai> * allow download tensorrt Signed-off-by: James <james@jan.ai> * update Signed-off-by: James <james@jan.ai> * allow download tensor rt based on gpu setting Signed-off-by: James <james@jan.ai> * update downloaded models Signed-off-by: James <james@jan.ai> * feat: add extension compatibility * dynamic tensor rt engines Signed-off-by: James <james@jan.ai> * update models Signed-off-by: James <james@jan.ai> * chore: remove ts-ignore * feat: getting installation state from extension Signed-off-by: James <james@jan.ai> * chore: adding type for decompress Signed-off-by: James <james@jan.ai> * feat: update according Louis's comment Signed-off-by: James <james@jan.ai> * feat: add progress for installing extension Signed-off-by: James <james@jan.ai> * chore: remove args from extension installation * fix: model download does not work properly * fix: do not allow user to stop tensorrtllm inference * fix: extension installed style * fix: download tensorrt does not update state Signed-off-by: James <james@jan.ai> * chore: replace int4 by fl16 * feat: modal for installing extension Signed-off-by: James <james@jan.ai> * fix: start download immediately after press install Signed-off-by: James <james@jan.ai> * fix: error switching between engines * feat: rename inference provider to ai engine and refactor to core * fix: missing ulid * fix: core bundler * feat: add cancel extension installing Signed-off-by: James <james@jan.ai> * remove mocking for mac Signed-off-by: James <james@jan.ai> * fix: show models only when extension is ready * add tensorrt badge for model Signed-off-by: James <james@jan.ai> * fix: copy * fix: add compatible check (#2342) * fix: add compatible check Signed-off-by: James <james@jan.ai> * fix: copy * fix: font * fix: copy * fix: broken monitoring extension * chore: bump engine * fix: copy * fix: model copy * fix: copy * fix: model json --------- Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: Louis <louis@jan.ai> * fix: vulkan support * fix: installation button padding * fix: empty script * fix: remove hard code string --------- Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: NamH <NamNh0122@gmail.com>
117 lines
2.9 KiB
TypeScript
117 lines
2.9 KiB
TypeScript
import { requestInference } from './helpers/sse'
|
|
import { ulid } from 'ulid'
|
|
import { AIEngine } from './AIEngine'
|
|
import {
|
|
ChatCompletionRole,
|
|
ContentType,
|
|
InferenceEvent,
|
|
MessageEvent,
|
|
MessageRequest,
|
|
MessageRequestType,
|
|
MessageStatus,
|
|
Model,
|
|
ModelInfo,
|
|
ThreadContent,
|
|
ThreadMessage,
|
|
} from '../../types'
|
|
import { events } from '../../events'
|
|
|
|
/**
|
|
* Base OAI Inference Provider
|
|
* Applicable to all OAI compatible inference providers
|
|
*/
|
|
export abstract class OAIEngine extends AIEngine {
|
|
// The inference engine
|
|
abstract inferenceUrl: string
|
|
abstract nodeModule: string
|
|
|
|
// Controller to handle stop requests
|
|
controller = new AbortController()
|
|
isCancelled = false
|
|
|
|
// The loaded model instance
|
|
loadedModel: Model | undefined
|
|
|
|
/**
|
|
* On extension load, subscribe to events.
|
|
*/
|
|
onLoad() {
|
|
super.onLoad()
|
|
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data))
|
|
events.on(InferenceEvent.OnInferenceStopped, () => this.onInferenceStopped())
|
|
}
|
|
|
|
/**
|
|
* On extension unload
|
|
*/
|
|
onUnload(): void {}
|
|
|
|
/*
|
|
* Inference request
|
|
*/
|
|
inference(data: MessageRequest) {
|
|
if (data.model?.engine?.toString() !== this.provider) return
|
|
|
|
const timestamp = Date.now()
|
|
const message: ThreadMessage = {
|
|
id: ulid(),
|
|
thread_id: data.threadId,
|
|
type: data.type,
|
|
assistant_id: data.assistantId,
|
|
role: ChatCompletionRole.Assistant,
|
|
content: [],
|
|
status: MessageStatus.Pending,
|
|
created: timestamp,
|
|
updated: timestamp,
|
|
object: 'thread.message',
|
|
}
|
|
|
|
if (data.type !== MessageRequestType.Summary) {
|
|
events.emit(MessageEvent.OnMessageResponse, message)
|
|
}
|
|
|
|
this.isCancelled = false
|
|
this.controller = new AbortController()
|
|
|
|
const model: ModelInfo = {
|
|
...(this.loadedModel ? this.loadedModel : {}),
|
|
...data.model,
|
|
}
|
|
|
|
requestInference(this.inferenceUrl, data.messages ?? [], model, this.controller).subscribe({
|
|
next: (content: any) => {
|
|
const messageContent: ThreadContent = {
|
|
type: ContentType.Text,
|
|
text: {
|
|
value: content.trim(),
|
|
annotations: [],
|
|
},
|
|
}
|
|
message.content = [messageContent]
|
|
events.emit(MessageEvent.OnMessageUpdate, message)
|
|
},
|
|
complete: async () => {
|
|
message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error
|
|
events.emit(MessageEvent.OnMessageUpdate, message)
|
|
},
|
|
error: async (err: any) => {
|
|
if (this.isCancelled || message.content.length) {
|
|
message.status = MessageStatus.Stopped
|
|
events.emit(MessageEvent.OnMessageUpdate, message)
|
|
return
|
|
}
|
|
message.status = MessageStatus.Error
|
|
events.emit(MessageEvent.OnMessageUpdate, message)
|
|
},
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Stops the inference.
|
|
*/
|
|
onInferenceStopped() {
|
|
this.isCancelled = true
|
|
this.controller?.abort()
|
|
}
|
|
}
|