feat: Nitro-Tensorrt-LLM Extension (#2280)

* feat: tensorrt-llm-extension

* fix: loading

* feat: add download tensorrt llm runner

Signed-off-by: James <james@jan.ai>

* feat: update to rollupjs instead of webpack for monitoring extension

Signed-off-by: James <james@jan.ai>

* feat: move update nvidia info to monitor extension

Signed-off-by: James <james@jan.ai>

* allow download tensorrt

Signed-off-by: James <james@jan.ai>

* update

Signed-off-by: James <james@jan.ai>

* allow download tensor rt based on gpu setting

Signed-off-by: James <james@jan.ai>

* update downloaded models

Signed-off-by: James <james@jan.ai>

* feat: add extension compatibility

* dynamic tensor rt engines

Signed-off-by: James <james@jan.ai>

* update models

Signed-off-by: James <james@jan.ai>

* chore: remove ts-ignore

* feat: getting installation state from extension

Signed-off-by: James <james@jan.ai>

* chore: adding type for decompress

Signed-off-by: James <james@jan.ai>

* feat: update according Louis's comment

Signed-off-by: James <james@jan.ai>

* feat: add progress for installing extension

Signed-off-by: James <james@jan.ai>

* chore: remove args from extension installation

* fix: model download does not work properly

* fix: do not allow user to stop tensorrtllm inference

* fix: extension installed style

* fix: download tensorrt does not update state

Signed-off-by: James <james@jan.ai>

* chore: replace int4 by fl16

* feat: modal for installing extension

Signed-off-by: James <james@jan.ai>

* fix: start download immediately after press install

Signed-off-by: James <james@jan.ai>

* fix: error switching between engines

* feat: rename inference provider to ai engine and refactor to core

* fix: missing ulid

* fix: core bundler

* feat: add cancel extension installing

Signed-off-by: James <james@jan.ai>

* remove mocking for mac

Signed-off-by: James <james@jan.ai>

* fix: show models only when extension is ready

* add tensorrt badge for model

Signed-off-by: James <james@jan.ai>

* fix: copy

* fix: add compatible check (#2342)

* fix: add compatible check

Signed-off-by: James <james@jan.ai>

* fix: copy

* fix: font

* fix: copy

* fix: broken monitoring extension

* chore: bump engine

* fix: copy

* fix: model copy

* fix: copy

* fix: model json

---------

Signed-off-by: James <james@jan.ai>
Co-authored-by: James <james@jan.ai>
Co-authored-by: Louis <louis@jan.ai>

* fix: vulkan support

* fix: installation button padding

* fix: empty script

* fix: remove hard code string

---------

Signed-off-by: James <james@jan.ai>
Co-authored-by: James <james@jan.ai>
Co-authored-by: NamH <NamNh0122@gmail.com>
This commit is contained in:
Louis 2024-03-14 14:07:22 +07:00 committed by GitHub
parent 24c6dd05be
commit d85d02693b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
71 changed files with 2497 additions and 626 deletions

20
.gitignore vendored
View File

@ -22,16 +22,16 @@ package-lock.json
core/lib/**
# Nitro binary files
extensions/inference-nitro-extension/bin/*/nitro
extensions/inference-nitro-extension/bin/*/*.metal
extensions/inference-nitro-extension/bin/*/*.exe
extensions/inference-nitro-extension/bin/*/*.dll
extensions/inference-nitro-extension/bin/*/*.exp
extensions/inference-nitro-extension/bin/*/*.lib
extensions/inference-nitro-extension/bin/saved-*
extensions/inference-nitro-extension/bin/*.tar.gz
extensions/inference-nitro-extension/bin/vulkaninfoSDK.exe
extensions/inference-nitro-extension/bin/vulkaninfo
extensions/*-extension/bin/*/nitro
extensions/*-extension/bin/*/*.metal
extensions/*-extension/bin/*/*.exe
extensions/*-extension/bin/*/*.dll
extensions/*-extension/bin/*/*.exp
extensions/*-extension/bin/*/*.lib
extensions/*-extension/bin/saved-*
extensions/*-extension/bin/*.tar.gz
extensions/*-extension/bin/vulkaninfoSDK.exe
extensions/*-extension/bin/vulkaninfo
# Turborepo

View File

@ -45,11 +45,12 @@
"start": "rollup -c rollup.config.ts -w"
},
"devDependencies": {
"jest": "^29.7.0",
"@types/jest": "^29.5.12",
"@types/node": "^12.0.2",
"eslint-plugin-jest": "^27.9.0",
"eslint": "8.57.0",
"eslint-plugin-jest": "^27.9.0",
"jest": "^29.7.0",
"rimraf": "^3.0.2",
"rollup": "^2.38.5",
"rollup-plugin-commonjs": "^9.1.8",
"rollup-plugin-json": "^3.1.0",
@ -58,7 +59,10 @@
"rollup-plugin-typescript2": "^0.36.0",
"ts-jest": "^29.1.2",
"tslib": "^2.6.2",
"typescript": "^5.3.3",
"rimraf": "^3.0.2"
"typescript": "^5.3.3"
},
"dependencies": {
"rxjs": "^7.8.1",
"ulid": "^2.3.0"
}
}

View File

@ -64,7 +64,7 @@ export default [
// Allow json resolution
json(),
// Compile TypeScript files
typescript({ useTsconfigDeclarationDir: true }),
typescript({ useTsconfigDeclarationDir: true, exclude: ['src/*.ts', 'src/extensions/**'] }),
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
commonjs(),
// Allow node_modules resolution, so you can use 'external' to control

View File

@ -33,6 +33,8 @@ export enum AppRoute {
stopServer = 'stopServer',
log = 'log',
logServer = 'logServer',
systemInformations = 'systemInformations',
showToast = 'showToast',
}
export enum AppEvent {
@ -56,6 +58,7 @@ export enum DownloadEvent {
onFileDownloadUpdate = 'onFileDownloadUpdate',
onFileDownloadError = 'onFileDownloadError',
onFileDownloadSuccess = 'onFileDownloadSuccess',
onFileUnzipSuccess = 'onFileUnzipSuccess',
}
export enum LocalImportModelEvent {

View File

@ -1,4 +1,4 @@
import { FileStat } from './types'
import { DownloadRequest, FileStat, NetworkConfig } from './types'
/**
* Execute a extension module function in main process
@ -17,18 +17,16 @@ const executeOnMain: (extension: string, method: string, ...args: any[]) => Prom
/**
* Downloads a file from a URL and saves it to the local file system.
* @param {string} url - The URL of the file to download.
* @param {string} fileName - The name to use for the downloaded file.
* @param {object} network - Optional object to specify proxy/whether to ignore SSL certificates.
*
* @param {DownloadRequest} downloadRequest - The request to download the file.
* @param {NetworkConfig} network - Optional object to specify proxy/whether to ignore SSL certificates.
*
* @returns {Promise<any>} A promise that resolves when the file is downloaded.
*/
const downloadFile: (
url: string,
fileName: string,
network?: { proxy?: string; ignoreSSL?: boolean }
) => Promise<any> = (url, fileName, network) => {
return global.core?.api?.downloadFile(url, fileName, network)
}
const downloadFile: (downloadRequest: DownloadRequest, network?: NetworkConfig) => Promise<any> = (
downloadRequest,
network
) => global.core?.api?.downloadFile(downloadRequest, network)
/**
* Aborts the download of a specific file.
@ -108,6 +106,20 @@ const log: (message: string, fileName?: string) => void = (message, fileName) =>
const isSubdirectory: (from: string, to: string) => Promise<boolean> = (from: string, to: string) =>
global.core.api?.isSubdirectory(from, to)
/**
* Get system information
* @returns {Promise<any>} - A promise that resolves with the system information.
*/
const systemInformations: () => Promise<any> = () => global.core.api?.systemInformations()
/**
* Show toast message from browser processes.
* @param title
* @param message
* @returns
*/
const showToast: (title: string, message: string) => void = (title, message) =>
global.core.api?.showToast(title, message)
/**
* Register extension point function type definition
*/
@ -134,5 +146,7 @@ export {
log,
isSubdirectory,
getUserHomePath,
systemInformations,
showToast,
FileStat,
}

View File

@ -10,6 +10,22 @@ export enum ExtensionTypeEnum {
export interface ExtensionType {
type(): ExtensionTypeEnum | undefined
}
export interface Compatibility {
platform: string[]
version: string
}
const ALL_INSTALLATION_STATE = [
'NotRequired', // not required.
'Installed', // require and installed. Good to go.
'NotInstalled', // require to be installed.
'Corrupted', // require but corrupted. Need to redownload.
] as const
export type InstallationStateTuple = typeof ALL_INSTALLATION_STATE
export type InstallationState = InstallationStateTuple[number]
/**
* Represents a base extension.
* This class should be extended by any class that represents an extension.
@ -33,4 +49,32 @@ export abstract class BaseExtension implements ExtensionType {
* Any cleanup logic for the extension should be put here.
*/
abstract onUnload(): void
/**
* The compatibility of the extension.
* This is used to check if the extension is compatible with the current environment.
* @property {Array} platform
*/
compatibility(): Compatibility | undefined {
return undefined
}
/**
* Determine if the prerequisites for the extension are installed.
*
* @returns {boolean} true if the prerequisites are installed, false otherwise.
*/
async installationState(): Promise<InstallationState> {
return 'NotRequired'
}
/**
* Install the prerequisites for the extension.
*
* @returns {Promise<void>}
*/
// @ts-ignore
async install(...args): Promise<void> {
return
}
}

View File

@ -0,0 +1,60 @@
import { getJanDataFolderPath, joinPath } from '../../core'
import { events } from '../../events'
import { BaseExtension } from '../../extension'
import { fs } from '../../fs'
import { Model, ModelEvent } from '../../types'
/**
* Base AIEngine
* Applicable to all AI Engines
*/
export abstract class AIEngine extends BaseExtension {
// The inference engine
abstract provider: string
// The model folder
modelFolder: string = 'models'
abstract models(): Promise<Model[]>
/**
* On extension load, subscribe to events.
*/
onLoad() {
this.prePopulateModels()
}
/**
* Pre-populate models to App Data Folder
*/
prePopulateModels(): Promise<void> {
return this.models().then((models) => {
const prePoluateOperations = models.map((model) =>
getJanDataFolderPath()
.then((janDataFolder) =>
// Attempt to create the model folder
joinPath([janDataFolder, this.modelFolder, model.id]).then((path) =>
fs
.mkdirSync(path)
.catch()
.then(() => path)
)
)
.then((path) => joinPath([path, 'model.json']))
.then((path) => {
// Do not overwite existing model.json
return fs.existsSync(path).then((exist: any) => {
if (!exist) return fs.writeFileSync(path, JSON.stringify(model, null, 2))
})
})
.catch((e: Error) => {
console.error('Error', e)
})
)
Promise.all(prePoluateOperations).then(() =>
// Emit event to update models
// So the UI can update the models list
events.emit(ModelEvent.OnModelsUpdate, {})
)
})
}
}

View File

@ -0,0 +1,63 @@
import { executeOnMain, getJanDataFolderPath, joinPath } from '../../core'
import { events } from '../../events'
import { Model, ModelEvent } from '../../types'
import { OAIEngine } from './OAIEngine'
/**
* Base OAI Local Inference Provider
* Added the implementation of loading and unloading model (applicable to local inference providers)
*/
export abstract class LocalOAIEngine extends OAIEngine {
// The inference engine
loadModelFunctionName: string = 'loadModel'
unloadModelFunctionName: string = 'unloadModel'
isRunning: boolean = false
/**
* On extension load, subscribe to events.
*/
onLoad() {
super.onLoad()
// These events are applicable to local inference providers
events.on(ModelEvent.OnModelInit, (model: Model) => this.onModelInit(model))
events.on(ModelEvent.OnModelStop, (model: Model) => this.onModelStop(model))
}
/**
* Load the model.
*/
async onModelInit(model: Model) {
if (model.engine.toString() !== this.provider) return
const modelFolder = await joinPath([await getJanDataFolderPath(), this.modelFolder, model.id])
const res = await executeOnMain(this.nodeModule, this.loadModelFunctionName, {
modelFolder,
model,
})
if (res?.error) {
events.emit(ModelEvent.OnModelFail, {
...model,
error: res.error,
})
return
} else {
this.loadedModel = model
events.emit(ModelEvent.OnModelReady, model)
this.isRunning = true
}
}
/**
* Stops the model.
*/
onModelStop(model: Model) {
if (model.engine?.toString() !== this.provider) return
this.isRunning = false
executeOnMain(this.nodeModule, this.unloadModelFunctionName).then(() => {
events.emit(ModelEvent.OnModelStopped, {})
})
}
}

View File

@ -0,0 +1,116 @@
import { requestInference } from './helpers/sse'
import { ulid } from 'ulid'
import { AIEngine } from './AIEngine'
import {
ChatCompletionRole,
ContentType,
InferenceEvent,
MessageEvent,
MessageRequest,
MessageRequestType,
MessageStatus,
Model,
ModelInfo,
ThreadContent,
ThreadMessage,
} from '../../types'
import { events } from '../../events'
/**
* Base OAI Inference Provider
* Applicable to all OAI compatible inference providers
*/
export abstract class OAIEngine extends AIEngine {
// The inference engine
abstract inferenceUrl: string
abstract nodeModule: string
// Controller to handle stop requests
controller = new AbortController()
isCancelled = false
// The loaded model instance
loadedModel: Model | undefined
/**
* On extension load, subscribe to events.
*/
onLoad() {
super.onLoad()
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data))
events.on(InferenceEvent.OnInferenceStopped, () => this.onInferenceStopped())
}
/**
* On extension unload
*/
onUnload(): void {}
/*
* Inference request
*/
inference(data: MessageRequest) {
if (data.model?.engine?.toString() !== this.provider) return
const timestamp = Date.now()
const message: ThreadMessage = {
id: ulid(),
thread_id: data.threadId,
type: data.type,
assistant_id: data.assistantId,
role: ChatCompletionRole.Assistant,
content: [],
status: MessageStatus.Pending,
created: timestamp,
updated: timestamp,
object: 'thread.message',
}
if (data.type !== MessageRequestType.Summary) {
events.emit(MessageEvent.OnMessageResponse, message)
}
this.isCancelled = false
this.controller = new AbortController()
const model: ModelInfo = {
...(this.loadedModel ? this.loadedModel : {}),
...data.model,
}
requestInference(this.inferenceUrl, data.messages ?? [], model, this.controller).subscribe({
next: (content: any) => {
const messageContent: ThreadContent = {
type: ContentType.Text,
text: {
value: content.trim(),
annotations: [],
},
}
message.content = [messageContent]
events.emit(MessageEvent.OnMessageUpdate, message)
},
complete: async () => {
message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error
events.emit(MessageEvent.OnMessageUpdate, message)
},
error: async (err: any) => {
if (this.isCancelled || message.content.length) {
message.status = MessageStatus.Stopped
events.emit(MessageEvent.OnMessageUpdate, message)
return
}
message.status = MessageStatus.Error
events.emit(MessageEvent.OnMessageUpdate, message)
},
})
}
/**
* Stops the inference.
*/
onInferenceStopped() {
this.isCancelled = true
this.controller?.abort()
}
}

View File

@ -0,0 +1,67 @@
import { Observable } from 'rxjs'
import { ModelRuntimeParams } from '../../../types'
/**
* Sends a request to the inference server to generate a response based on the recent messages.
* @param recentMessages - An array of recent messages to use as context for the inference.
* @returns An Observable that emits the generated response as a string.
*/
export function requestInference(
inferenceUrl: string,
recentMessages: any[],
model: {
id: string
parameters: ModelRuntimeParams
},
controller?: AbortController
): Observable<string> {
return new Observable((subscriber) => {
const requestBody = JSON.stringify({
messages: recentMessages,
model: model.id,
stream: true,
...model.parameters,
})
fetch(inferenceUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Access-Control-Allow-Origin': '*',
'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
},
body: requestBody,
signal: controller?.signal,
})
.then(async (response) => {
if (model.parameters.stream === false) {
const data = await response.json()
subscriber.next(data.choices[0]?.message?.content ?? '')
} else {
const stream = response.body
const decoder = new TextDecoder('utf-8')
const reader = stream?.getReader()
let content = ''
while (true && reader) {
const { done, value } = await reader.read()
if (done) {
break
}
const text = decoder.decode(value)
const lines = text.trim().split('\n')
for (const line of lines) {
if (line.startsWith('data: ') && !line.includes('data: [DONE]')) {
const data = JSON.parse(line.replace('data: ', ''))
content += data.choices[0]?.delta?.content ?? ''
if (content.startsWith('assistant: ')) {
content = content.replace('assistant: ', '')
}
subscriber.next(content)
}
}
}
}
subscriber.complete()
})
.catch((err) => subscriber.error(err))
})
}

View File

@ -0,0 +1,3 @@
export * from './AIEngine'
export * from './OAIEngine'
export * from './LocalOAIEngine'

View File

@ -28,3 +28,8 @@ export { ModelExtension } from './model'
* Hugging Face extension for converting HF models to GGUF.
*/
export { HuggingFaceExtension } from './huggingface'
/**
* Base AI Engines.
*/
export * from './ai-engines'

View File

@ -1,5 +1,5 @@
import { BaseExtension, ExtensionTypeEnum } from '../extension'
import { ImportingModel, Model, ModelInterface, OptionType } from '../index'
import { GpuSetting, ImportingModel, Model, ModelInterface, OptionType } from '../index'
/**
* Model extension for managing models.
@ -14,6 +14,7 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
abstract downloadModel(
model: Model,
gpuSettings?: GpuSetting,
network?: { proxy: string; ignoreSSL?: boolean }
): Promise<void>
abstract cancelModelDownload(modelId: string): Promise<void>

View File

@ -1,5 +1,5 @@
import { BaseExtension, ExtensionTypeEnum } from '../extension'
import { MonitoringInterface } from '../index'
import { GpuSetting, MonitoringInterface } from '../index'
/**
* Monitoring extension for system monitoring.
@ -13,6 +13,7 @@ export abstract class MonitoringExtension extends BaseExtension implements Monit
return ExtensionTypeEnum.SystemMonitoring
}
abstract getGpuSetting(): Promise<GpuSetting>
abstract getResourcesInfo(): Promise<any>
abstract getCurrentLoad(): Promise<any>
}

View File

@ -5,7 +5,7 @@ import { getJanDataFolderPath } from '../../helper'
import { DownloadManager } from '../../helper/download'
import { createWriteStream, renameSync } from 'fs'
import { Processor } from './Processor'
import { DownloadState } from '../../../types'
import { DownloadRequest, DownloadState, NetworkConfig } from '../../../types'
export class Downloader implements Processor {
observer?: Function
@ -20,24 +20,27 @@ export class Downloader implements Processor {
return func(this.observer, ...args)
}
downloadFile(observer: any, url: string, localPath: string, network: any) {
downloadFile(observer: any, downloadRequest: DownloadRequest, network?: NetworkConfig) {
const request = require('request')
const progress = require('request-progress')
const strictSSL = !network?.ignoreSSL
const proxy = network?.proxy?.startsWith('http') ? network.proxy : undefined
const { localPath, url } = downloadRequest
let normalizedPath = localPath
if (typeof localPath === 'string') {
localPath = normalizeFilePath(localPath)
normalizedPath = normalizeFilePath(localPath)
}
const array = localPath.split(sep)
const array = normalizedPath.split(sep)
const fileName = array.pop() ?? ''
const modelId = array.pop() ?? ''
const destination = resolve(getJanDataFolderPath(), localPath)
const destination = resolve(getJanDataFolderPath(), normalizedPath)
const rq = request({ url, strictSSL, proxy })
// Put request to download manager instance
DownloadManager.instance.setRequest(localPath, rq)
DownloadManager.instance.setRequest(normalizedPath, rq)
// Downloading file to a temp file first
const downloadingTempFile = `${destination}.download`
@ -56,16 +59,25 @@ export class Downloader implements Processor {
total: 0,
transferred: 0,
},
children: [],
downloadState: 'downloading',
extensionId: downloadRequest.extensionId,
downloadType: downloadRequest.downloadType,
localPath: normalizedPath,
}
DownloadManager.instance.downloadProgressMap[modelId] = initialDownloadState
if (downloadRequest.downloadType === 'extension') {
observer?.(DownloadEvent.onFileDownloadUpdate, initialDownloadState)
}
progress(rq, {})
.on('progress', (state: any) => {
const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
const downloadState: DownloadState = {
...currentDownloadState,
...state,
modelId,
fileName,
fileName: fileName,
downloadState: 'downloading',
}
console.debug('progress: ', downloadState)
@ -76,22 +88,22 @@ export class Downloader implements Processor {
const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
const downloadState: DownloadState = {
...currentDownloadState,
fileName: fileName,
error: error.message,
downloadState: 'error',
}
if (currentDownloadState) {
DownloadManager.instance.downloadProgressMap[modelId] = downloadState
}
observer?.(DownloadEvent.onFileDownloadError, downloadState)
DownloadManager.instance.downloadProgressMap[modelId] = downloadState
})
.on('end', () => {
const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
if (currentDownloadState && DownloadManager.instance.networkRequests[localPath]) {
if (currentDownloadState && DownloadManager.instance.networkRequests[normalizedPath]) {
// Finished downloading, rename temp file to actual file
renameSync(downloadingTempFile, destination)
const downloadState: DownloadState = {
...currentDownloadState,
fileName: fileName,
downloadState: 'end',
}
observer?.(DownloadEvent.onFileDownloadSuccess, downloadState)

View File

@ -1,7 +1,16 @@
import fs from 'fs'
import {
existsSync,
readdirSync,
readFileSync,
writeFileSync,
mkdirSync,
appendFileSync,
createWriteStream,
rmdirSync,
} from 'fs'
import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
import { join } from 'path'
import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../index'
import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../types'
import { getEngineConfiguration, getJanDataFolderPath } from '../../../helper'
import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
@ -9,12 +18,12 @@ import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
export const getBuilder = async (configuration: RouteConfiguration) => {
const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
try {
if (!fs.existsSync(directoryPath)) {
if (!existsSync(directoryPath)) {
console.debug('model folder not found')
return []
}
const files: string[] = fs.readdirSync(directoryPath)
const files: string[] = readdirSync(directoryPath)
const allDirectories: string[] = []
for (const file of files) {
@ -46,8 +55,8 @@ export const getBuilder = async (configuration: RouteConfiguration) => {
}
const readModelMetadata = (path: string): string | undefined => {
if (fs.existsSync(path)) {
return fs.readFileSync(path, 'utf-8')
if (existsSync(path)) {
return readFileSync(path, 'utf-8')
} else {
return undefined
}
@ -81,7 +90,7 @@ export const deleteBuilder = async (configuration: RouteConfiguration, id: strin
}
const objectPath = join(directoryPath, id)
fs.rmdirSync(objectPath, { recursive: true })
rmdirSync(objectPath, { recursive: true })
return {
id: id,
object: configuration.delete.object,
@ -96,20 +105,19 @@ export const getMessages = async (threadId: string): Promise<ThreadMessage[]> =>
const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
const messageFile = 'messages.jsonl'
try {
const files: string[] = fs.readdirSync(threadDirPath)
const files: string[] = readdirSync(threadDirPath)
if (!files.includes(messageFile)) {
console.error(`${threadDirPath} not contains message file`)
return []
}
const messageFilePath = join(threadDirPath, messageFile)
if (!fs.existsSync(messageFilePath)) {
if (!existsSync(messageFilePath)) {
console.debug('message file not found')
return []
}
const lines = fs
.readFileSync(messageFilePath, 'utf-8')
const lines = readFileSync(messageFilePath, 'utf-8')
.toString()
.split('\n')
.filter((line: any) => line !== '')
@ -157,11 +165,11 @@ export const createThread = async (thread: any) => {
const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
const threadJsonPath = join(threadDirPath, threadMetadataFileName)
if (!fs.existsSync(threadDirPath)) {
fs.mkdirSync(threadDirPath)
if (!existsSync(threadDirPath)) {
mkdirSync(threadDirPath)
}
await fs.writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
return updatedThread
} catch (err) {
return {
@ -191,7 +199,7 @@ export const updateThread = async (threadId: string, thread: any) => {
const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
const threadJsonPath = join(threadDirPath, threadMetadataFileName)
await fs.writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
return updatedThread
} catch (err) {
return {
@ -233,10 +241,10 @@ export const createMessage = async (threadId: string, message: any) => {
const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
const threadMessagePath = join(threadDirPath, threadMessagesFileName)
if (!fs.existsSync(threadDirPath)) {
fs.mkdirSync(threadDirPath)
if (!existsSync(threadDirPath)) {
mkdirSync(threadDirPath)
}
fs.appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
return threadMessage
} catch (err) {
return {
@ -259,8 +267,8 @@ export const downloadModel = async (
}
const directoryPath = join(getJanDataFolderPath(), 'models', modelId)
if (!fs.existsSync(directoryPath)) {
fs.mkdirSync(directoryPath)
if (!existsSync(directoryPath)) {
mkdirSync(directoryPath)
}
// path to model binary
@ -281,7 +289,7 @@ export const downloadModel = async (
.on('end', function () {
console.debug('end')
})
.pipe(fs.createWriteStream(modelBinaryPath))
.pipe(createWriteStream(modelBinaryPath))
}
return {

View File

@ -4,16 +4,43 @@ export type FileStat = {
}
export type DownloadState = {
modelId: string
modelId: string // TODO: change to download id
fileName: string
time: DownloadTime
speed: number
percent: number
percent: number
size: DownloadSize
children?: DownloadState[]
error?: string
downloadState: 'downloading' | 'error' | 'end'
children?: DownloadState[]
error?: string
extensionId?: string
downloadType?: DownloadType
localPath?: string
}
export type DownloadType = 'model' | 'extension'
export type DownloadRequest = {
/**
* The URL to download the file from.
*/
url: string
/**
* The local path to save the file to.
*/
localPath: string
/**
* The extension ID of the extension that initiated the download.
*
* Can be extension name.
*/
extensionId?: string
downloadType?: DownloadType
}
type DownloadTime = {

View File

@ -0,0 +1,8 @@
export type FileDownloadRequest = {
downloadId: string
url: string
localPath: string
fileName: string
displayName: string
metadata: Record<string, string | number>
}

View File

@ -1,3 +1,5 @@
export * from './systemResourceInfo'
export * from './promptTemplate'
export * from './appUpdate'
export * from './fileDownloadRequest'
export * from './networkConfig'

View File

@ -0,0 +1,4 @@
export type NetworkConfig = {
proxy?: string
ignoreSSL?: boolean
}

View File

@ -2,3 +2,31 @@ export type SystemResourceInfo = {
numCpuPhysicalCore: number
memAvailable: number
}
export type RunMode = 'cpu' | 'gpu'
export type GpuSetting = {
notify: boolean
run_mode: RunMode
nvidia_driver: {
exist: boolean
version: string
}
cuda: {
exist: boolean
version: string
}
gpus: GpuSettingInfo[]
gpu_highest_vram: string
gpus_in_use: string[]
is_initial: boolean
// TODO: This needs to be set based on user toggle in settings
vulkan: boolean
}
export type GpuSettingInfo = {
id: string
vram: string
name: string
arch?: string
}

View File

@ -19,6 +19,7 @@ export enum InferenceEngine {
nitro = 'nitro',
openai = 'openai',
triton_trtllm = 'triton_trtllm',
nitro_tensorrt_llm = 'nitro-tensorrt-llm',
tool_retrieval_enabled = 'tool_retrieval_enabled',
}

View File

@ -1,3 +1,4 @@
import { GpuSetting } from '../miscellaneous'
import { Model } from './modelEntity'
/**
@ -10,7 +11,11 @@ export interface ModelInterface {
* @param network - Optional object to specify proxy/whether to ignore SSL certificates.
* @returns A Promise that resolves when the model has been downloaded.
*/
downloadModel(model: Model, network?: { ignoreSSL?: boolean; proxy?: string }): Promise<void>
downloadModel(
model: Model,
gpuSettings?: GpuSetting,
network?: { ignoreSSL?: boolean; proxy?: string }
): Promise<void>
/**
* Cancels the download of a specific model.

View File

@ -1 +1,2 @@
export * from './monitoringInterface'
export * from './resourceInfo'

View File

@ -0,0 +1,6 @@
export type ResourceInfo = {
mem: {
totalMemory: number
usedMemory: number
}
}

View File

@ -13,7 +13,7 @@
"declarationDir": "dist/types",
"outDir": "dist/lib",
"importHelpers": true,
"types": ["@types/jest"]
"types": ["@types/jest"],
},
"include": ["src"]
"include": ["src"],
}

View File

@ -13,6 +13,7 @@ import {
events,
DownloadEvent,
log,
DownloadRequest,
} from '@janhq/core'
import { ggufMetadata } from 'hyllama'
@ -148,7 +149,11 @@ export default class JanHuggingFaceExtension extends HuggingFaceExtension {
if (this.interrupted) return
if (!(await fs.existsSync(localPath))) {
downloadFile(url, localPath, network)
const downloadRequest: DownloadRequest = {
url,
localPath,
}
downloadFile(downloadRequest, network)
filePaths.push(filePath)
}
}

View File

@ -1,3 +1,3 @@
@echo off
set /p NITRO_VERSION=<./bin/version.txt
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan && .\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan

View File

@ -8,7 +8,7 @@
"license": "AGPL-3.0",
"scripts": {
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro && download https://delta.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
"downloadnitro:win32": "download.bat",
"downloadnitro": "run-script-os",

View File

@ -108,9 +108,6 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
events.on(InferenceEvent.OnInferenceStopped, () =>
this.onInferenceStopped()
)
// Attempt to fetch nvidia info
await executeOnMain(NODE, 'updateNvidiaInfo', {})
}
/**

View File

@ -1,237 +0,0 @@
import { writeFileSync, existsSync, readFileSync } from 'fs'
import { exec, spawn } from 'child_process'
import path from 'path'
import { getJanDataFolderPath, log } from '@janhq/core/node'
/**
* Default GPU settings
* TODO: This needs to be refactored to support multiple accelerators
**/
const DEFALT_SETTINGS = {
notify: true,
run_mode: 'cpu',
nvidia_driver: {
exist: false,
version: '',
},
cuda: {
exist: false,
version: '',
},
gpus: [],
gpu_highest_vram: '',
gpus_in_use: [],
is_initial: true,
// TODO: This needs to be set based on user toggle in settings
vulkan: false
}
/**
* Path to the settings file
**/
export const GPU_INFO_FILE = path.join(
getJanDataFolderPath(),
'settings',
'settings.json'
)
/**
* Current nitro process
*/
let nitroProcessInfo: NitroProcessInfo | undefined = undefined
/**
* Nitro process info
*/
export interface NitroProcessInfo {
isRunning: boolean
}
/**
* This will retrive GPU informations and persist settings.json
* Will be called when the extension is loaded to turn on GPU acceleration if supported
*/
export async function updateNvidiaInfo() {
if (process.platform !== 'darwin') {
let data
try {
data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
} catch (error) {
data = DEFALT_SETTINGS
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
}
updateNvidiaDriverInfo()
updateGpuInfo()
}
}
/**
* Retrieve current nitro process
*/
export const getNitroProcessInfo = (subprocess: any): NitroProcessInfo => {
nitroProcessInfo = {
isRunning: subprocess != null,
}
return nitroProcessInfo
}
/**
* Validate nvidia and cuda for linux and windows
*/
export async function updateNvidiaDriverInfo(): Promise<void> {
exec(
'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
(error, stdout) => {
let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
if (!error) {
const firstLine = stdout.split('\n')[0].trim()
data['nvidia_driver'].exist = true
data['nvidia_driver'].version = firstLine
} else {
data['nvidia_driver'].exist = false
}
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
Promise.resolve()
}
)
}
/**
* Check if file exists in paths
*/
export function checkFileExistenceInPaths(
file: string,
paths: string[]
): boolean {
return paths.some((p) => existsSync(path.join(p, file)))
}
/**
* Validate cuda for linux and windows
*/
export function updateCudaExistence(
data: Record<string, any> = DEFALT_SETTINGS
): Record<string, any> {
let filesCuda12: string[]
let filesCuda11: string[]
let paths: string[]
let cudaVersion: string = ''
if (process.platform === 'win32') {
filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
filesCuda11 = ['cublas64_11.dll', 'cudart64_11.dll', 'cublasLt64_11.dll']
paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
} else {
filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
paths = process.env.LD_LIBRARY_PATH
? process.env.LD_LIBRARY_PATH.split(path.delimiter)
: []
paths.push('/usr/lib/x86_64-linux-gnu/')
}
let cudaExists = filesCuda12.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
)
if (!cudaExists) {
cudaExists = filesCuda11.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
)
if (cudaExists) {
cudaVersion = '11'
}
} else {
cudaVersion = '12'
}
data['cuda'].exist = cudaExists
data['cuda'].version = cudaVersion
console.debug(data['is_initial'], data['gpus_in_use'])
if (cudaExists && data['is_initial'] && data['gpus_in_use'].length > 0) {
data.run_mode = 'gpu'
}
data.is_initial = false
return data
}
/**
* Get GPU information
*/
export async function updateGpuInfo(): Promise<void> {
let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
// Cuda
if (data['vulkan'] === true) {
// Vulkan
exec(
process.platform === 'win32'
? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
: `${__dirname}/../bin/vulkaninfo --summary`,
(error, stdout) => {
if (!error) {
const output = stdout.toString()
log(output)
const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
let gpus = []
let match
while ((match = gpuRegex.exec(output)) !== null) {
const id = match[1]
const name = match[2]
gpus.push({ id, vram: 0, name })
}
data.gpus = gpus
if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
}
data = updateCudaExistence(data)
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
}
Promise.resolve()
}
)
} else {
exec(
'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
(error, stdout) => {
if (!error) {
log(stdout)
// Get GPU info and gpu has higher memory first
let highestVram = 0
let highestVramId = '0'
let gpus = stdout
.trim()
.split('\n')
.map((line) => {
let [id, vram, name] = line.split(', ')
vram = vram.replace(/\r/g, '')
if (parseFloat(vram) > highestVram) {
highestVram = parseFloat(vram)
highestVramId = id
}
return { id, vram, name }
})
data.gpus = gpus
data.gpu_highest_vram = highestVramId
} else {
data.gpus = []
data.gpu_highest_vram = ''
}
if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
data.gpus_in_use = [data['gpu_highest_vram']]
}
data = updateCudaExistence(data)
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
Promise.resolve()
}
)
}
}

View File

@ -1,12 +1,19 @@
import { getJanDataFolderPath } from '@janhq/core/node'
import { readFileSync } from 'fs'
import * as path from 'path'
import { GPU_INFO_FILE } from './accelerator'
export interface NitroExecutableOptions {
executablePath: string
cudaVisibleDevices: string
vkVisibleDevices: string
}
export const GPU_INFO_FILE = path.join(
getJanDataFolderPath(),
'settings',
'settings.json'
)
/**
* Find which executable file to run based on the current platform.
* @returns The name of the executable file to run.

View File

@ -4,7 +4,6 @@ import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
import tcpPortUsed from 'tcp-port-used'
import fetchRT from 'fetch-retry'
import { log, getSystemResourceInfo } from '@janhq/core/node'
import { getNitroProcessInfo, updateNvidiaInfo } from './accelerator'
import {
Model,
InferenceEngine,
@ -385,11 +384,26 @@ function dispose() {
killSubprocess()
}
/**
* Nitro process info
*/
export interface NitroProcessInfo {
isRunning: boolean
}
/**
* Retrieve current nitro process
*/
const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
return {
isRunning: subprocess != null,
}
}
export default {
runModel,
stopModel,
killSubprocess,
dispose,
updateNvidiaInfo,
getCurrentNitroProcessInfo: () => getNitroProcessInfo(subprocess),
getCurrentNitroProcessInfo,
}

View File

@ -17,6 +17,8 @@ import {
ImportingModel,
LocalImportModelEvent,
baseName,
GpuSetting,
DownloadRequest,
} from '@janhq/core'
import { extractFileName } from './helpers/path'
@ -29,10 +31,14 @@ export default class JanModelExtension extends ModelExtension {
private static readonly _modelMetadataFileName = 'model.json'
private static readonly _supportedModelFormat = '.gguf'
private static readonly _incompletedModelFileName = '.download'
private static readonly _offlineInferenceEngine = InferenceEngine.nitro
private static readonly _offlineInferenceEngine = [
InferenceEngine.nitro,
InferenceEngine.nitro_tensorrt_llm,
]
private static readonly _tensorRtEngineFormat = '.engine'
private static readonly _configDirName = 'config'
private static readonly _defaultModelFileName = 'default-model.json'
private static readonly _supportedGpuArch = ['turing', 'ampere', 'ada']
/**
* Called when the extension is loaded.
@ -89,12 +95,52 @@ export default class JanModelExtension extends ModelExtension {
*/
async downloadModel(
model: Model,
gpuSettings?: GpuSetting,
network?: { ignoreSSL?: boolean; proxy?: string }
): Promise<void> {
// create corresponding directory
const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id])
if (!(await fs.existsSync(modelDirPath))) await fs.mkdirSync(modelDirPath)
if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
if (!gpuSettings || gpuSettings.gpus.length === 0) {
console.error('No GPU found. Please check your GPU setting.')
return
}
const firstGpu = gpuSettings.gpus[0]
if (!firstGpu.name.toLowerCase().includes('nvidia')) {
console.error('No Nvidia GPU found. Please check your GPU setting.')
return
}
const gpuArch = firstGpu.arch
if (gpuArch === undefined) {
console.error(
'No GPU architecture found. Please check your GPU setting.'
)
return
}
if (!JanModelExtension._supportedGpuArch.includes(gpuArch)) {
console.error(
`Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
)
return
}
const os = 'windows' // TODO: remove this hard coded value
const newSources = model.sources.map((source) => {
const newSource = { ...source }
newSource.url = newSource.url
.replace(/<os>/g, os)
.replace(/<gpuarch>/g, gpuArch)
return newSource
})
model.sources = newSources
}
console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
if (model.sources.length > 1) {
// path to model binaries
for (const source of model.sources) {
@ -105,8 +151,11 @@ export default class JanModelExtension extends ModelExtension {
if (source.filename) {
path = await joinPath([modelDirPath, source.filename])
}
downloadFile(source.url, path, network)
const downloadRequest: DownloadRequest = {
url: source.url,
localPath: path,
}
downloadFile(downloadRequest, network)
}
// TODO: handle multiple binaries for web later
} else {
@ -115,7 +164,11 @@ export default class JanModelExtension extends ModelExtension {
JanModelExtension._supportedModelFormat
)
const path = await joinPath([modelDirPath, fileName])
downloadFile(model.sources[0]?.url, path, network)
const downloadRequest: DownloadRequest = {
url: model.sources[0]?.url,
localPath: path,
}
downloadFile(downloadRequest, network)
if (window && window.core?.api && window.core.api.baseApiUrl) {
this.startPollingDownloadProgress(model.id)
@ -238,7 +291,7 @@ export default class JanModelExtension extends ModelExtension {
async getDownloadedModels(): Promise<Model[]> {
return await this.getModelsMetadata(
async (modelDir: string, model: Model) => {
if (model.engine !== JanModelExtension._offlineInferenceEngine)
if (!JanModelExtension._offlineInferenceEngine.includes(model.engine))
return true
// model binaries (sources) are absolute path & exist
@ -247,22 +300,32 @@ export default class JanModelExtension extends ModelExtension {
)
if (existFiles.every((exist) => exist)) return true
return await fs
const result = await fs
.readdirSync(await joinPath([JanModelExtension._homeDir, modelDir]))
.then((files: string[]) => {
// Model binary exists in the directory
// Model binary name can match model ID or be a .gguf file and not be an incompleted model file
return (
files.includes(modelDir) ||
files.filter(
(file) =>
files.filter((file) => {
if (
file.endsWith(JanModelExtension._incompletedModelFileName)
) {
return false
}
return (
file
.toLowerCase()
.includes(JanModelExtension._supportedModelFormat) &&
!file.endsWith(JanModelExtension._incompletedModelFileName)
)?.length >= model.sources.length
.includes(JanModelExtension._supportedModelFormat) ||
file
.toLowerCase()
.includes(JanModelExtension._tensorRtEngineFormat)
)
})?.length > 0 // TODO: NamH find better way (can use basename to check the file name with source url)
)
})
return result
}
)
}

View File

@ -0,0 +1,2 @@
@echo off
.\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin

View File

@ -3,21 +3,40 @@
"version": "1.0.10",
"description": "This extension provides system health and OS level data",
"main": "dist/index.js",
"module": "dist/module.js",
"node": "dist/node/index.cjs.js",
"author": "Jan <service@jan.ai>",
"license": "AGPL-3.0",
"scripts": {
"build": "tsc -b . && webpack --config webpack.config.js",
"build": "tsc --module commonjs && rollup -c rollup.config.ts && npm run download-artifacts",
"download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
"download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
"download-artifacts:win32": "download.bat",
"download-artifacts:linux": "download https://delta.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
"build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../pre-install"
},
"exports": {
".": "./dist/index.js",
"./main": "./dist/node/index.cjs.js"
},
"devDependencies": {
"@rollup/plugin-commonjs": "^25.0.7",
"@rollup/plugin-json": "^6.1.0",
"@rollup/plugin-node-resolve": "^15.2.3",
"@types/node": "^20.11.4",
"@types/node-os-utils": "^1.3.4",
"run-script-os": "^1.1.6",
"cpx": "^1.5.0",
"rimraf": "^3.0.2",
"webpack": "^5.88.2",
"webpack-cli": "^5.1.4",
"ts-loader": "^9.5.0"
"rollup": "^2.38.5",
"rollup-plugin-define": "^1.0.1",
"rollup-plugin-sourcemaps": "^0.6.3",
"rollup-plugin-typescript2": "^0.36.0",
"typescript": "^5.3.3",
"download-cli": "^1.1.1"
},
"dependencies": {
"@janhq/core": "file:../../core",
"@rollup/plugin-replace": "^5.0.5",
"node-os-utils": "^1.3.7"
},
"files": [

View File

@ -0,0 +1,68 @@
import resolve from '@rollup/plugin-node-resolve'
import commonjs from '@rollup/plugin-commonjs'
import sourceMaps from 'rollup-plugin-sourcemaps'
import typescript from 'rollup-plugin-typescript2'
import json from '@rollup/plugin-json'
import replace from '@rollup/plugin-replace'
const packageJson = require('./package.json')
export default [
{
input: `src/index.ts`,
output: [{ file: packageJson.main, format: 'es', sourcemap: true }],
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
external: [],
watch: {
include: 'src/**',
},
plugins: [
replace({
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
}),
// Allow json resolution
json(),
// Compile TypeScript files
typescript({ useTsconfigDeclarationDir: true }),
// Compile TypeScript files
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
commonjs(),
// Allow node_modules resolution, so you can use 'external' to control
// which external modules to include in the bundle
// https://github.com/rollup/rollup-plugin-node-resolve#usage
resolve({
extensions: ['.js', '.ts', '.svelte'],
}),
// Resolve source maps to the original source
sourceMaps(),
],
},
{
input: `src/node/index.ts`,
output: [
{ file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
],
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
external: ['@janhq/core/node'],
watch: {
include: 'src/node/**',
},
plugins: [
// Allow json resolution
json(),
// Compile TypeScript files
typescript({ useTsconfigDeclarationDir: true }),
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
commonjs(),
// Allow node_modules resolution, so you can use 'external' to control
// which external modules to include in the bundle
// https://github.com/rollup/rollup-plugin-node-resolve#usage
resolve({
extensions: ['.ts', '.js', '.json'],
}),
// Resolve source maps to the original source
sourceMaps(),
],
},
]

View File

@ -1 +1,18 @@
declare const MODULE: string
declare const NODE: string
type CpuGpuInfo = {
cpu: {
usage: number
}
gpu: GpuInfo[]
}
type GpuInfo = {
id: string
name: string
temperature: string
utilization: string
memoryTotal: string
memoryFree: string
memoryUtilization: string
}

View File

@ -1,4 +1,4 @@
import { MonitoringExtension, executeOnMain } from '@janhq/core'
import { GpuSetting, MonitoringExtension, executeOnMain } from '@janhq/core'
/**
* JanMonitoringExtension is a extension that provides system monitoring functionality.
@ -8,19 +8,30 @@ export default class JanMonitoringExtension extends MonitoringExtension {
/**
* Called when the extension is loaded.
*/
async onLoad() {}
async onLoad() {
// Attempt to fetch nvidia info
await executeOnMain(NODE, 'updateNvidiaInfo')
}
/**
* Called when the extension is unloaded.
*/
onUnload(): void {}
/**
* Returns the GPU configuration.
* @returns A Promise that resolves to an object containing the GPU configuration.
*/
async getGpuSetting(): Promise<GpuSetting | undefined> {
return executeOnMain(NODE, 'getGpuConfig')
}
/**
* Returns information about the system resources.
* @returns A Promise that resolves to an object containing information about the system resources.
*/
getResourcesInfo(): Promise<any> {
return executeOnMain(MODULE, 'getResourcesInfo')
return executeOnMain(NODE, 'getResourcesInfo')
}
/**
@ -28,6 +39,6 @@ export default class JanMonitoringExtension extends MonitoringExtension {
* @returns A Promise that resolves to an object containing information about the current system load.
*/
getCurrentLoad(): Promise<any> {
return executeOnMain(MODULE, 'getCurrentLoad')
return executeOnMain(NODE, 'getCurrentLoad')
}
}

View File

@ -1,92 +0,0 @@
const nodeOsUtils = require('node-os-utils')
const getJanDataFolderPath = require('@janhq/core/node').getJanDataFolderPath
const path = require('path')
const { readFileSync } = require('fs')
const exec = require('child_process').exec
const NVIDIA_INFO_FILE = path.join(
getJanDataFolderPath(),
'settings',
'settings.json'
)
const getResourcesInfo = () =>
new Promise((resolve) => {
nodeOsUtils.mem.used().then((ramUsedInfo) => {
const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
const response = {
mem: {
totalMemory,
usedMemory,
},
}
resolve(response)
})
})
const getCurrentLoad = () =>
new Promise((resolve, reject) => {
nodeOsUtils.cpu.usage().then((cpuPercentage) => {
let data = {
run_mode: 'cpu',
gpus_in_use: [],
}
if (process.platform !== 'darwin') {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8'))
}
if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
const gpuIds = data['gpus_in_use'].join(',')
if (gpuIds !== '' && data['vulkan'] !== true) {
exec(
`nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
(error, stdout, _) => {
if (error) {
console.error(`exec error: ${error}`)
reject(error)
return
}
const gpuInfo = stdout
.trim()
.split('\n')
.map((line) => {
const [
id,
name,
temperature,
utilization,
memoryTotal,
memoryFree,
memoryUtilization,
] = line.split(', ').map((item) => item.replace(/\r/g, ''))
return {
id,
name,
temperature,
utilization,
memoryTotal,
memoryFree,
memoryUtilization,
}
})
resolve({
cpu: { usage: cpuPercentage },
gpu: gpuInfo,
})
}
)
} else {
// Handle the case where gpuIds is empty
resolve({ cpu: { usage: cpuPercentage }, gpu: [] })
}
} else {
// Handle the case where run_mode is not 'gpu' or no GPUs are in use
resolve({ cpu: { usage: cpuPercentage }, gpu: [] })
}
})
})
module.exports = {
getResourcesInfo,
getCurrentLoad,
}

View File

@ -0,0 +1,317 @@
import { GpuSetting, GpuSettingInfo, ResourceInfo } from '@janhq/core'
import { getJanDataFolderPath, log } from '@janhq/core/node'
import { mem, cpu } from 'node-os-utils'
import { exec } from 'child_process'
import { writeFileSync, existsSync, readFileSync } from 'fs'
import path from 'path'
/**
* Path to the settings file
**/
export const GPU_INFO_FILE = path.join(
getJanDataFolderPath(),
'settings',
'settings.json'
)
/**
* Default GPU settings
* TODO: This needs to be refactored to support multiple accelerators
**/
const DEFAULT_SETTINGS: GpuSetting = {
notify: true,
run_mode: 'cpu',
nvidia_driver: {
exist: false,
version: '',
},
cuda: {
exist: false,
version: '',
},
gpus: [],
gpu_highest_vram: '',
gpus_in_use: [],
is_initial: true,
// TODO: This needs to be set based on user toggle in settings
vulkan: false,
}
export const getGpuConfig = async (): Promise<GpuSetting | undefined> => {
if (process.platform === 'darwin') return undefined
return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
}
export const getResourcesInfo = async (): Promise<ResourceInfo> => {
const ramUsedInfo = await mem.used()
const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
const resourceInfo: ResourceInfo = {
mem: {
totalMemory,
usedMemory,
},
}
return resourceInfo
}
export const getCurrentLoad = () =>
new Promise<CpuGpuInfo>(async (resolve, reject) => {
const cpuPercentage = await cpu.usage()
let data = {
run_mode: 'cpu',
gpus_in_use: [],
}
if (process.platform !== 'darwin') {
data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
}
if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
const gpuIds = data.gpus_in_use.join(',')
if (gpuIds !== '' && data['vulkan'] !== true) {
exec(
`nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
(error, stdout, _) => {
if (error) {
console.error(`exec error: ${error}`)
throw new Error(error.message)
}
const gpuInfo: GpuInfo[] = stdout
.trim()
.split('\n')
.map((line) => {
const [
id,
name,
temperature,
utilization,
memoryTotal,
memoryFree,
memoryUtilization,
] = line.split(', ').map((item) => item.replace(/\r/g, ''))
return {
id,
name,
temperature,
utilization,
memoryTotal,
memoryFree,
memoryUtilization,
}
})
resolve({
cpu: { usage: cpuPercentage },
gpu: gpuInfo,
})
}
)
} else {
// Handle the case where gpuIds is empty
resolve({
cpu: { usage: cpuPercentage },
gpu: [],
})
}
} else {
// Handle the case where run_mode is not 'gpu' or no GPUs are in use
resolve({
cpu: { usage: cpuPercentage },
gpu: [],
})
}
})
/**
* This will retrive GPU informations and persist settings.json
* Will be called when the extension is loaded to turn on GPU acceleration if supported
*/
export const updateNvidiaInfo = async () => {
// ignore if macos
if (process.platform === 'darwin') return
try {
JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
} catch (error) {
writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
}
await updateNvidiaDriverInfo()
await updateGpuInfo()
}
const updateNvidiaDriverInfo = async () =>
new Promise((resolve, reject) => {
exec(
'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
(error, stdout) => {
const data: GpuSetting = JSON.parse(
readFileSync(GPU_INFO_FILE, 'utf-8')
)
if (!error) {
const firstLine = stdout.split('\n')[0].trim()
data.nvidia_driver.exist = true
data.nvidia_driver.version = firstLine
} else {
data.nvidia_driver.exist = false
}
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
resolve({})
}
)
})
const getGpuArch = (gpuName: string): string => {
if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
if (gpuName.includes('20')) return 'turing'
else if (gpuName.includes('30')) return 'ampere'
else if (gpuName.includes('40')) return 'ada'
else return 'unknown'
}
const updateGpuInfo = async () =>
new Promise((resolve, reject) => {
let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
// Cuda
if (data.vulkan === true) {
// Vulkan
exec(
process.platform === 'win32'
? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
: `${__dirname}/../bin/vulkaninfo --summary`,
(error, stdout) => {
if (!error) {
const output = stdout.toString()
log(output)
const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
const gpus: GpuSettingInfo[] = []
let match
while ((match = gpuRegex.exec(output)) !== null) {
const id = match[1]
const name = match[2]
const arch = getGpuArch(name)
gpus.push({ id, vram: '0', name, arch })
}
data.gpus = gpus
if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
}
data = updateCudaExistence(data)
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
resolve({})
} else {
reject(error)
}
}
)
} else {
exec(
'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
(error, stdout) => {
if (!error) {
log(stdout)
// Get GPU info and gpu has higher memory first
let highestVram = 0
let highestVramId = '0'
const gpus: GpuSettingInfo[] = stdout
.trim()
.split('\n')
.map((line) => {
let [id, vram, name] = line.split(', ')
const arch = getGpuArch(name)
vram = vram.replace(/\r/g, '')
if (parseFloat(vram) > highestVram) {
highestVram = parseFloat(vram)
highestVramId = id
}
return { id, vram, name, arch }
})
data.gpus = gpus
data.gpu_highest_vram = highestVramId
} else {
data.gpus = []
data.gpu_highest_vram = ''
}
if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
data.gpus_in_use = [data.gpu_highest_vram]
}
data = updateCudaExistence(data)
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
resolve({})
}
)
}
})
/**
* Check if file exists in paths
*/
const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
return paths.some((p) => existsSync(path.join(p, file)))
}
/**
* Validate cuda for linux and windows
*/
const updateCudaExistence = (
data: GpuSetting = DEFAULT_SETTINGS
): GpuSetting => {
let filesCuda12: string[]
let filesCuda11: string[]
let paths: string[]
let cudaVersion: string = ''
if (process.platform === 'win32') {
filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
filesCuda11 = ['cublas64_11.dll', 'cudart64_11.dll', 'cublasLt64_11.dll']
paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
} else {
filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
paths = process.env.LD_LIBRARY_PATH
? process.env.LD_LIBRARY_PATH.split(path.delimiter)
: []
paths.push('/usr/lib/x86_64-linux-gnu/')
}
let cudaExists = filesCuda12.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
)
if (!cudaExists) {
cudaExists = filesCuda11.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
)
if (cudaExists) {
cudaVersion = '11'
}
} else {
cudaVersion = '12'
}
data.cuda.exist = cudaExists
data.cuda.version = cudaVersion
console.debug(data.is_initial, data.gpus_in_use)
if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
data.run_mode = 'gpu'
}
data.is_initial = false
return data
}

View File

@ -1,35 +0,0 @@
const path = require('path')
const webpack = require('webpack')
const packageJson = require('./package.json')
module.exports = {
experiments: { outputModule: true },
entry: './src/index.ts', // Adjust the entry point to match your project's main file
mode: 'production',
module: {
rules: [
{
test: /\.tsx?$/,
use: 'ts-loader',
exclude: /node_modules/,
},
],
},
output: {
filename: 'index.js', // Adjust the output file name as needed
path: path.resolve(__dirname, 'dist'),
library: { type: 'module' }, // Specify ESM output format
},
plugins: [
new webpack.DefinePlugin({
MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
}),
],
resolve: {
extensions: ['.ts', '.js'],
},
optimization: {
minimize: false,
},
// Add loaders and other configuration as needed for your project
}

View File

@ -0,0 +1,79 @@
# Tensorrt-LLM Extension
Created using Jan extension example
# Create a Jan Extension using Typescript
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
## Create Your Own Extension
To create your own extension, you can use this repository as a template! Just follow the below instructions:
1. Click the Use this template button at the top of the repository
2. Select Create a new repository
3. Select an owner and name for your new repository
4. Click Create repository
5. Clone your new repository
## Initial Setup
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
> [!NOTE]
>
> You'll need to have a reasonably modern version of
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
> [`nodenv`](https://github.com/nodenv/nodenv) or
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
> root of your repository to install the version specified in
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
1. :hammer_and_wrench: Install the dependencies
```bash
npm install
```
1. :building_construction: Package the TypeScript for distribution
```bash
npm run bundle
```
1. :white_check_mark: Check your artifact
There will be a tgz file in your extension directory now
## Update the Extension Metadata
The [`package.json`](package.json) file defines metadata about your extension, such as
extension name, main entry, description and version.
When you copy this repository, update `package.json` with the name, description for your extension.
## Update the Extension Code
The [`src/`](./src/) directory is the heart of your extension! This contains the
source code that will be run when your extension functions are invoked. You can replace the
contents of this directory with your own code.
There are a few things to keep in mind when writing your extension code:
- Most Jan Extension functions are processed asynchronously.
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
```typescript
import { events, MessageEvent, MessageRequest } from '@janhq/core'
function onStart(): Promise<any> {
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
this.inference(data)
)
}
```
For more information about the Jan Extension Core module, see the
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
So, what are you waiting for? Go ahead and start customizing your extension!

View File

@ -0,0 +1,49 @@
[
{
"sources": [
{
"filename": "config.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/config.json"
},
{
"filename": "rank0.engine",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/rank0.engine"
},
{
"filename": "tokenizer.model",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
},
{
"filename": "special_tokens_map.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
},
{
"filename": "tokenizer.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
},
{
"filename": "tokenizer_config.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
}
],
"id": "llamacorn-1.1b-chat-fp16",
"object": "model",
"name": "LlamaCorn 1.1B Chat FP16",
"version": "1.0",
"description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM",
"format": "TensorRT-LLM",
"settings": {
"ctx_len": 2048
},
"parameters": {
"stream": true,
"max_tokens": 4096
},
"metadata": {
"author": "LLama",
"tags": ["TensorRT-LLM", "1B", "Finetuned"],
"size": 2151000000
},
"engine": "nitro-tensorrt-llm"
}
]

View File

@ -0,0 +1,75 @@
{
"name": "@janhq/tensorrt-llm-extension",
"version": "0.0.2",
"description": "Enables accelerated inference leveraging Nvidia's TensorRT-LLM for optimal GPU hardware optimizations. Compatible with models in TensorRT-LLM format. Requires Nvidia GPU driver and CUDA Toolkit installation.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
"author": "Jan <service@jan.ai>",
"license": "AGPL-3.0",
"config": {
"host": "127.0.0.1",
"port": "3928"
},
"compatibility": {
"platform": [
"win32",
"linux"
],
"app": [
"0.1.0"
]
},
"scripts": {
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
"build:publish:win32": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
"build:publish:linux": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
"build:publish:darwin": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
"build:publish": "run-script-os"
},
"exports": {
".": "./dist/index.js",
"./main": "./dist/node/index.cjs.js"
},
"devDependencies": {
"@rollup/plugin-commonjs": "^25.0.7",
"@rollup/plugin-json": "^6.1.0",
"@rollup/plugin-node-resolve": "^15.2.3",
"@rollup/plugin-replace": "^5.0.5",
"@types/node": "^20.11.4",
"@types/os-utils": "^0.0.4",
"@types/tcp-port-used": "^1.0.4",
"@types/decompress": "4.2.7",
"cpx": "^1.5.0",
"download-cli": "^1.1.1",
"rimraf": "^3.0.2",
"rollup": "^2.38.5",
"rollup-plugin-define": "^1.0.1",
"rollup-plugin-sourcemaps": "^0.6.3",
"rollup-plugin-typescript2": "^0.36.0",
"run-script-os": "^1.1.6",
"typescript": "^5.2.2"
},
"dependencies": {
"@janhq/core": "file:../../core",
"decompress": "^4.2.1",
"fetch-retry": "^5.0.6",
"path-browserify": "^1.0.1",
"rxjs": "^7.8.1",
"tcp-port-used": "^1.0.2",
"ulid": "^2.3.0"
},
"engines": {
"node": ">=18.0.0"
},
"files": [
"dist/*",
"package.json",
"README.md"
],
"bundleDependencies": [
"tcp-port-used",
"fetch-retry",
"decompress",
"@janhq/core"
]
}

View File

@ -0,0 +1,73 @@
import resolve from '@rollup/plugin-node-resolve'
import commonjs from '@rollup/plugin-commonjs'
import sourceMaps from 'rollup-plugin-sourcemaps'
import typescript from 'rollup-plugin-typescript2'
import json from '@rollup/plugin-json'
import replace from '@rollup/plugin-replace'
const packageJson = require('./package.json')
export default [
{
input: `src/index.ts`,
output: [{ file: packageJson.main, format: 'es', sourcemap: true }],
watch: {
include: 'src/**',
},
plugins: [
replace({
EXTENSION_NAME: JSON.stringify(packageJson.name),
TENSORRT_VERSION: JSON.stringify('0.1.5'),
DOWNLOAD_RUNNER_URL:
process.platform === 'darwin' || process.platform === 'win32'
? JSON.stringify(
'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v<version>/nitro-windows-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
)
: JSON.stringify(
'https://github.com/janhq/nitro-tensorrt-llm/releases/download/linux-v<version>/nitro-linux-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
),
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
INFERENCE_URL: JSON.stringify(
process.env.INFERENCE_URL ||
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/v1/chat/completions`
),
COMPATIBILITY: JSON.stringify(packageJson.compatibility),
}),
json(),
typescript({ useTsconfigDeclarationDir: true }),
commonjs(),
resolve({
extensions: ['.js', '.ts', '.svelte'],
}),
sourceMaps(),
],
},
{
input: `src/node/index.ts`,
output: [
{ file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
],
external: ['@janhq/core/node'],
watch: {
include: 'src/node/**',
},
plugins: [
replace({
LOAD_MODEL_URL: JSON.stringify(
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel`
),
TERMINATE_ENGINE_URL: JSON.stringify(
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/processmanager/destroy`
),
ENGINE_HOST: JSON.stringify(packageJson.config?.host ?? '127.0.0.1'),
ENGINE_PORT: JSON.stringify(packageJson.config?.port ?? '3928'),
}),
json(),
typescript({ useTsconfigDeclarationDir: true }),
commonjs(),
resolve({
extensions: ['.ts', '.js', '.json'],
}),
sourceMaps(),
],
},
]

View File

@ -0,0 +1,10 @@
declare const NODE: string
declare const INFERENCE_URL: string
declare const LOAD_MODEL_URL: string
declare const TERMINATE_ENGINE_URL: string
declare const ENGINE_HOST: string
declare const ENGINE_PORT: string
declare const DOWNLOAD_RUNNER_URL: string
declare const TENSORRT_VERSION: string
declare const COMPATIBILITY: object
declare const EXTENSION_NAME: string

View File

@ -0,0 +1,147 @@
/**
* @module tensorrt-llm-extension/src/index
*/
import {
Compatibility,
DownloadEvent,
DownloadRequest,
DownloadState,
GpuSetting,
InstallationState,
Model,
baseName,
downloadFile,
events,
executeOnMain,
joinPath,
showToast,
systemInformations,
LocalOAIEngine,
fs,
} from '@janhq/core'
import models from '../models.json'
/**
* TensorRTLLMExtension - Implementation of LocalOAIEngine
* @extends BaseOAILocalInferenceProvider
* Provide pre-populated models for TensorRTLLM
*/
export default class TensorRTLLMExtension extends LocalOAIEngine {
/**
* Override custom function name for loading and unloading model
* Which are implemented from node module
*/
override provider = 'nitro-tensorrt-llm'
override inferenceUrl = INFERENCE_URL
override nodeModule = NODE
private supportedGpuArch = ['turing', 'ampere', 'ada']
compatibility() {
return COMPATIBILITY as unknown as Compatibility
}
/**
* models implemented by the extension
* define pre-populated models
*/
async models(): Promise<Model[]> {
if ((await this.installationState()) === 'Installed')
return models as unknown as Model[]
return []
}
override async install(): Promise<void> {
const info = await systemInformations()
console.debug(
`TensorRTLLMExtension installing pre-requisites... ${JSON.stringify(info)}`
)
const gpuSetting: GpuSetting | undefined = info.gpuSetting
if (gpuSetting === undefined || gpuSetting.gpus.length === 0) {
console.error('No GPU setting found. Please check your GPU setting.')
return
}
// TODO: we only check for the first graphics card. Need to refactor this later.
const firstGpu = gpuSetting.gpus[0]
if (!firstGpu.name.toLowerCase().includes('nvidia')) {
console.error('No Nvidia GPU found. Please check your GPU setting.')
return
}
if (firstGpu.arch === undefined) {
console.error('No GPU architecture found. Please check your GPU setting.')
return
}
if (!this.supportedGpuArch.includes(firstGpu.arch)) {
console.error(
`Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
)
return
}
const binaryFolderPath = await executeOnMain(
this.nodeModule,
'binaryFolder'
)
if (!(await fs.existsSync(binaryFolderPath))) {
await fs.mkdirSync(binaryFolderPath)
}
const placeholderUrl = DOWNLOAD_RUNNER_URL
const tensorrtVersion = TENSORRT_VERSION
const url = placeholderUrl
.replace(/<version>/g, tensorrtVersion)
.replace(/<gpuarch>/g, firstGpu.arch)
const tarball = await baseName(url)
const tarballFullPath = await joinPath([binaryFolderPath, tarball])
const downloadRequest: DownloadRequest = {
url,
localPath: tarballFullPath,
extensionId: EXTENSION_NAME,
downloadType: 'extension',
}
downloadFile(downloadRequest)
// TODO: wrap this into a Promise
const onFileDownloadSuccess = async (state: DownloadState) => {
// if other download, ignore
if (state.fileName !== tarball) return
events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
await executeOnMain(this.nodeModule, 'decompressRunner', tarballFullPath)
events.emit(DownloadEvent.onFileUnzipSuccess, state)
// Prepopulate models as soon as it's ready
this.prePopulateModels().then(() => {
showToast(
'Extension installed successfully.',
'New models are added to Model Hub.'
)
})
}
events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
}
override async installationState(): Promise<InstallationState> {
// For now, we just check the executable of nitro x tensor rt
const isNitroExecutableAvailable = await executeOnMain(
this.nodeModule,
'isNitroExecutableAvailable'
)
return isNitroExecutableAvailable ? 'Installed' : 'NotInstalled'
}
override onInferenceStopped() {
if (!this.isRunning) return
showToast(
'Unable to Stop Inference',
'The model does not support stopping inference.'
)
return Promise.resolve()
}
}

View File

@ -0,0 +1,191 @@
import path from 'path'
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
import tcpPortUsed from 'tcp-port-used'
import fetchRT from 'fetch-retry'
import { log } from '@janhq/core/node'
import { existsSync } from 'fs'
import decompress from 'decompress'
// Polyfill fetch with retry
const fetchRetry = fetchRT(fetch)
/**
* The response object for model init operation.
*/
interface ModelLoadParams {
engine_path: string
ctx_len: number
}
// The subprocess instance for Engine
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
/**
* Initializes a engine subprocess to load a machine learning model.
* @param params - The model load settings.
*/
async function loadModel(params: any): Promise<{ error: Error | undefined }> {
// modelFolder is the absolute path to the running model folder
// e.g. ~/jan/models/llama-2
let modelFolder = params.modelFolder
const settings: ModelLoadParams = {
engine_path: modelFolder,
ctx_len: params.model.settings.ctx_len ?? 2048,
}
return runEngineAndLoadModel(settings)
}
/**
* Stops a Engine subprocess.
*/
function unloadModel(): Promise<any> {
const controller = new AbortController()
setTimeout(() => controller.abort(), 5000)
debugLog(`Request to kill engine`)
subprocess?.kill()
return fetch(TERMINATE_ENGINE_URL, {
method: 'DELETE',
signal: controller.signal,
})
.then(() => {
subprocess = undefined
})
.catch(() => {}) // Do nothing with this attempt
.then(() => tcpPortUsed.waitUntilFree(parseInt(ENGINE_PORT), 300, 5000)) // Wait for port available
.then(() => debugLog(`Engine process is terminated`))
.catch((err) => {
debugLog(
`Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
)
throw 'PORT_NOT_AVAILABLE'
})
}
/**
* 1. Spawn engine process
* 2. Load model into engine subprocess
* @returns
*/
async function runEngineAndLoadModel(settings: ModelLoadParams) {
return unloadModel()
.then(runEngine)
.then(() => loadModelRequest(settings))
.catch((err) => {
// TODO: Broadcast error so app could display proper error message
debugLog(`${err}`, 'Error')
return { error: err }
})
}
/**
* Loads a LLM model into the Engine subprocess by sending a HTTP POST request.
*/
function loadModelRequest(
settings: ModelLoadParams
): Promise<{ error: Error | undefined }> {
debugLog(`Loading model with params ${JSON.stringify(settings)}`)
return fetchRetry(LOAD_MODEL_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(settings),
retries: 3,
retryDelay: 500,
})
.then((res) => {
debugLog(`Load model success with response ${JSON.stringify(res)}`)
return Promise.resolve({ error: undefined })
})
.catch((err) => {
debugLog(`Load model failed with error ${err}`, 'Error')
return Promise.resolve({ error: err })
})
}
/**
* Spawns engine subprocess.
*/
function runEngine(): Promise<any> {
debugLog(`Spawning engine subprocess...`)
return new Promise<void>((resolve, reject) => {
// Current directory by default
let binaryFolder = path.join(__dirname, '..', 'bin')
// Binary path
const binary = path.join(
binaryFolder,
process.platform === 'win32' ? 'nitro.exe' : 'nitro'
)
const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT]
// Execute the binary
debugLog(`Spawn nitro at path: ${binary}, and args: ${args}`)
subprocess = spawn(binary, args, {
cwd: binaryFolder,
env: {
...process.env,
},
})
// Handle subprocess output
subprocess.stdout.on('data', (data: any) => {
debugLog(`${data}`)
})
subprocess.stderr.on('data', (data: any) => {
debugLog(`${data}`)
})
subprocess.on('close', (code: any) => {
debugLog(`Engine exited with code: ${code}`)
subprocess = undefined
reject(`child process exited with code ${code}`)
})
tcpPortUsed.waitUntilUsed(parseInt(ENGINE_PORT), 300, 30000).then(() => {
debugLog(`Engine is ready`)
resolve()
})
})
}
function debugLog(message: string, level: string = 'Debug') {
log(`[TENSORRT_LLM_NITRO]::${level}:${message}`)
}
const binaryFolder = async (): Promise<string> => {
return path.join(__dirname, '..', 'bin')
}
const decompressRunner = async (zipPath: string) => {
const output = path.join(__dirname, '..', 'bin')
console.debug(`Decompressing ${zipPath} to ${output}...`)
try {
const files = await decompress(zipPath, output)
console.debug('Decompress finished!', files)
} catch (err) {
console.error(`Decompress ${zipPath} failed: ${err}`)
}
}
const isNitroExecutableAvailable = async (): Promise<boolean> => {
const binary = path.join(
__dirname,
'..',
'bin',
process.platform === 'win32' ? 'nitro.exe' : 'nitro'
)
return existsSync(binary)
}
export default {
binaryFolder,
decompressRunner,
loadModel,
unloadModel,
dispose: unloadModel,
isNitroExecutableAvailable,
}

View File

@ -0,0 +1,20 @@
{
"compilerOptions": {
"moduleResolution": "node",
"target": "es5",
"module": "ES2020",
"lib": ["es2015", "es2016", "es2017", "dom"],
"strict": true,
"sourceMap": true,
"declaration": true,
"allowSyntheticDefaultImports": true,
"experimentalDecorators": true,
"emitDecoratorMetadata": true,
"declarationDir": "dist/types",
"outDir": "dist",
"importHelpers": true,
"resolveJsonModule": true,
"typeRoots": ["node_modules/@types"]
},
"include": ["src"]
}

View File

@ -73,8 +73,9 @@ const DropdownListSidebar = ({
const [copyId, setCopyId] = useState('')
// TODO: Update filter condition for the local model
const localModel = downloadedModels.filter(
(model) => model.engine === InferenceEngine.nitro
(model) => model.engine !== InferenceEngine.openai
)
const remoteModel = downloadedModels.filter(
(model) => model.engine === InferenceEngine.openai

View File

@ -0,0 +1,87 @@
import { useCallback, useEffect } from 'react'
import { abortDownload } from '@janhq/core'
import {
Button,
Modal,
ModalContent,
ModalHeader,
ModalTitle,
Progress,
} from '@janhq/uikit'
import { atom, useAtom, useAtomValue } from 'jotai'
import {
formatDownloadPercentage,
formatExtensionsName,
} from '@/utils/converter'
import {
InstallingExtensionState,
installingExtensionAtom,
} from '@/helpers/atoms/Extension.atom'
export const showInstallingExtensionModalAtom = atom(false)
const InstallingExtensionModal: React.FC = () => {
const [showInstallingExtensionModal, setShowInstallingExtensionModal] =
useAtom(showInstallingExtensionModalAtom)
const installingExtensions = useAtomValue(installingExtensionAtom)
useEffect(() => {
if (installingExtensions.length === 0) {
setShowInstallingExtensionModal(false)
}
}, [installingExtensions, setShowInstallingExtensionModal])
const onAbortInstallingExtensionClick = useCallback(
(item: InstallingExtensionState) => {
if (item.localPath) {
abortDownload(item.localPath)
}
},
[]
)
return (
<Modal
open={showInstallingExtensionModal}
onOpenChange={() => setShowInstallingExtensionModal(false)}
>
<ModalContent>
<ModalHeader>
<ModalTitle>Installing Extension</ModalTitle>
</ModalHeader>
{Object.values(installingExtensions).map((item) => (
<div className="pt-2" key={item.extensionId}>
<Progress
className="mb-2 h-2"
value={
formatDownloadPercentage(item.percentage, {
hidePercentage: true,
}) as number
}
/>
<div className="flex items-center justify-between gap-x-2">
<div className="flex gap-x-2">
<p className="line-clamp-1">
{formatExtensionsName(item.extensionId)}
</p>
<span>{formatDownloadPercentage(item.percentage)}</span>
</div>
<Button
themes="outline"
size="sm"
onClick={() => onAbortInstallingExtensionClick(item)}
>
Cancel
</Button>
</div>
</div>
))}
</ModalContent>
</Modal>
)
}
export default InstallingExtensionModal

View File

@ -0,0 +1,52 @@
import { Fragment, useCallback } from 'react'
import { Progress } from '@janhq/uikit'
import { useAtomValue, useSetAtom } from 'jotai'
import { showInstallingExtensionModalAtom } from './InstallingExtensionModal'
import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom'
const InstallingExtension: React.FC = () => {
const installingExtensions = useAtomValue(installingExtensionAtom)
const setShowInstallingExtensionModal = useSetAtom(
showInstallingExtensionModalAtom
)
const shouldShowInstalling = installingExtensions.length > 0
let totalPercentage = 0
let totalExtensions = 0
for (const installation of installingExtensions) {
totalPercentage += installation.percentage
totalExtensions++
}
const progress = (totalPercentage / totalExtensions) * 100
const onClick = useCallback(() => {
setShowInstallingExtensionModal(true)
}, [setShowInstallingExtensionModal])
return (
<Fragment>
{shouldShowInstalling ? (
<div
className="flex cursor-pointer flex-row items-center space-x-2"
onClick={onClick}
>
<p className="text-xs font-semibold text-muted-foreground">
Installing Extension
</p>
<div className="flex flex-row items-center justify-center space-x-2 rounded-md bg-secondary px-2 py-[2px]">
<Progress className="h-2 w-24" value={progress} />
<span className="text-xs font-bold text-muted-foreground">
{progress.toFixed(2)}%
</span>
</div>
</div>
) : null}
</Fragment>
)
}
export default InstallingExtension

View File

@ -16,6 +16,7 @@ import ProgressBar from '@/containers/ProgressBar'
import { appDownloadProgress } from '@/containers/Providers/Jotai'
import ImportingModelState from './ImportingModelState'
import InstallingExtension from './InstallingExtension'
import SystemMonitor from './SystemMonitor'
import UpdatedFailedModal from './UpdateFailedModal'
@ -46,6 +47,7 @@ const BottomBar = () => {
<ImportingModelState />
<DownloadingState />
<UpdatedFailedModal />
<InstallingExtension />
</div>
<div className="flex items-center gap-x-3">
<SystemMonitor />

View File

@ -22,6 +22,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
import InstallingExtensionModal from './BottomBar/InstallingExtension/InstallingExtensionModal'
import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
const BaseLayout = (props: PropsWithChildren) => {
@ -68,6 +70,7 @@ const BaseLayout = (props: PropsWithChildren) => {
{importModelStage === 'IMPORTING_MODEL' && <ImportingModelModal />}
{importModelStage === 'EDIT_MODEL_INFO' && <EditModelInfoModal />}
{importModelStage === 'CONFIRM_CANCEL' && <CancelModelImportModal />}
<InstallingExtensionModal />
</div>
)
}

View File

@ -7,6 +7,10 @@ import { useSetAtom } from 'jotai'
import { setDownloadStateAtom } from '@/hooks/useDownloadState'
import { formatExtensionsName } from '@/utils/converter'
import { toaster } from '../Toast'
import AppUpdateListener from './AppUpdateListener'
import ClipboardListener from './ClipboardListener'
import EventHandler from './EventHandler'
@ -14,46 +18,89 @@ import EventHandler from './EventHandler'
import ModelImportListener from './ModelImportListener'
import QuickAskListener from './QuickAskListener'
import {
InstallingExtensionState,
removeInstallingExtensionAtom,
setInstallingExtensionAtom,
} from '@/helpers/atoms/Extension.atom'
const EventListenerWrapper = ({ children }: PropsWithChildren) => {
const setDownloadState = useSetAtom(setDownloadStateAtom)
const setInstallingExtension = useSetAtom(setInstallingExtensionAtom)
const removeInstallingExtension = useSetAtom(removeInstallingExtensionAtom)
const onFileDownloadUpdate = useCallback(
async (state: DownloadState) => {
console.debug('onFileDownloadUpdate', state)
setDownloadState(state)
if (state.downloadType === 'extension') {
const installingExtensionState: InstallingExtensionState = {
extensionId: state.extensionId!,
percentage: state.percent,
localPath: state.localPath,
}
setInstallingExtension(state.extensionId!, installingExtensionState)
} else {
setDownloadState(state)
}
},
[setDownloadState]
[setDownloadState, setInstallingExtension]
)
const onFileDownloadError = useCallback(
(state: DownloadState) => {
console.debug('onFileDownloadError', state)
setDownloadState(state)
if (state.downloadType === 'extension') {
removeInstallingExtension(state.extensionId!)
} else {
setDownloadState(state)
}
},
[setDownloadState]
[setDownloadState, removeInstallingExtension]
)
const onFileDownloadSuccess = useCallback(
(state: DownloadState) => {
console.debug('onFileDownloadSuccess', state)
setDownloadState(state)
if (state.downloadType !== 'extension') {
setDownloadState(state)
}
},
[setDownloadState]
)
const onFileUnzipSuccess = useCallback(
(state: DownloadState) => {
console.debug('onFileUnzipSuccess', state)
toaster({
title: 'Success',
description: `Install ${formatExtensionsName(state.extensionId!)} successfully.`,
type: 'success',
})
removeInstallingExtension(state.extensionId!)
},
[removeInstallingExtension]
)
useEffect(() => {
console.debug('EventListenerWrapper: registering event listeners...')
events.on(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
events.on(DownloadEvent.onFileDownloadError, onFileDownloadError)
events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
events.on(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
return () => {
console.debug('EventListenerWrapper: unregistering event listeners...')
events.off(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
events.off(DownloadEvent.onFileDownloadError, onFileDownloadError)
events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
events.off(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
}
}, [onFileDownloadUpdate, onFileDownloadError, onFileDownloadSuccess])
}, [
onFileDownloadUpdate,
onFileDownloadError,
onFileDownloadSuccess,
onFileUnzipSuccess,
])
return (
<AppUpdateListener>

View File

@ -23,7 +23,9 @@ export class ExtensionManager {
* @param type - The type of the extension to retrieve.
* @returns The extension, if found.
*/
get<T extends BaseExtension>(type: ExtensionTypeEnum): T | undefined {
get<T extends BaseExtension>(
type: ExtensionTypeEnum | string
): T | undefined {
return this.extensions.get(type) as T | undefined
}

View File

@ -0,0 +1,40 @@
import { atom } from 'jotai'
type ExtensionId = string
export type InstallingExtensionState = {
extensionId: ExtensionId
percentage: number
localPath?: string
}
export const installingExtensionAtom = atom<InstallingExtensionState[]>([])
export const setInstallingExtensionAtom = atom(
null,
(get, set, extensionId: string, state: InstallingExtensionState) => {
const current = get(installingExtensionAtom)
const isExists = current.some((e) => e.extensionId === extensionId)
if (isExists) {
const newCurrent = current.map((e) => {
if (e.extensionId === extensionId) {
return state
}
return e
})
set(installingExtensionAtom, newCurrent)
} else {
set(installingExtensionAtom, [...current, state])
}
}
)
export const removeInstallingExtensionAtom = atom(
null,
(get, set, extensionId: string) => {
const current = get(installingExtensionAtom)
const newCurrent = current.filter((e) => e.extensionId !== extensionId)
set(installingExtensionAtom, newCurrent)
}
)

View File

@ -40,6 +40,16 @@ export function useActiveModel() {
console.debug(`Model ${modelId} is already initialized. Ignore..`)
return
}
let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
// Switch between engines
if (model && activeModel && activeModel.engine !== model.engine) {
stopModel()
// TODO: Refactor inference provider would address this
await new Promise((res) => setTimeout(res, 1000))
}
// TODO: incase we have multiple assistants, the configuration will be from assistant
setLoadModelError(undefined)
@ -47,8 +57,6 @@ export function useActiveModel() {
setStateModel({ state: 'start', loading: true, model: modelId })
let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
if (!model) {
toaster({
title: `Model ${modelId} not found!`,

View File

@ -8,12 +8,15 @@ import {
joinPath,
ModelArtifact,
DownloadState,
GpuSetting,
} from '@janhq/core'
import { useAtomValue, useSetAtom } from 'jotai'
import { setDownloadStateAtom } from './useDownloadState'
import useGpuSetting from './useGpuSetting'
import { extensionManager } from '@/extension/ExtensionManager'
import {
ignoreSslAtom,
@ -29,6 +32,8 @@ export default function useDownloadModel() {
const setDownloadState = useSetAtom(setDownloadStateAtom)
const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
const { getGpuSettings } = useGpuSetting()
const downloadModel = useCallback(
async (model: Model) => {
const childProgresses: DownloadState[] = model.sources.map(
@ -68,10 +73,22 @@ export default function useDownloadModel() {
})
addDownloadingModel(model)
await localDownloadModel(model, ignoreSSL, proxyEnabled ? proxy : '')
const gpuSettings = await getGpuSettings()
await localDownloadModel(
model,
ignoreSSL,
proxyEnabled ? proxy : '',
gpuSettings
)
},
[ignoreSSL, proxy, proxyEnabled, addDownloadingModel, setDownloadState]
[
ignoreSSL,
proxy,
proxyEnabled,
getGpuSettings,
addDownloadingModel,
setDownloadState,
]
)
const abortModelDownload = useCallback(async (model: Model) => {
@ -90,8 +107,9 @@ export default function useDownloadModel() {
const localDownloadModel = async (
model: Model,
ignoreSSL: boolean,
proxy: string
proxy: string,
gpuSettings?: GpuSetting
) =>
extensionManager
.get<ModelExtension>(ExtensionTypeEnum.Model)
?.downloadModel(model, { ignoreSSL, proxy })
?.downloadModel(model, gpuSettings, { ignoreSSL, proxy })

View File

@ -18,123 +18,129 @@ export const modelDownloadStateAtom = atom<Record<string, DownloadState>>({})
export const setDownloadStateAtom = atom(
null,
(get, set, state: DownloadState) => {
const currentState = { ...get(modelDownloadStateAtom) }
try {
const currentState = { ...get(modelDownloadStateAtom) }
if (state.downloadState === 'end') {
const modelDownloadState = currentState[state.modelId]
if (state.downloadState === 'end') {
const modelDownloadState = currentState[state.modelId]
const updatedChildren: DownloadState[] =
modelDownloadState.children!.filter(
(m) => m.fileName !== state.fileName
const updatedChildren: DownloadState[] = (
modelDownloadState.children ?? []
).filter((m) => m.fileName !== state.fileName)
updatedChildren.push(state)
modelDownloadState.children = updatedChildren
currentState[state.modelId] = modelDownloadState
const isAllChildrenDownloadEnd = modelDownloadState.children?.every(
(m) => m.downloadState === 'end'
)
updatedChildren.push(state)
modelDownloadState.children = updatedChildren
currentState[state.modelId] = modelDownloadState
const isAllChildrenDownloadEnd = modelDownloadState.children?.every(
(m) => m.downloadState === 'end'
)
if (isAllChildrenDownloadEnd) {
// download successfully
delete currentState[state.modelId]
set(removeDownloadingModelAtom, state.modelId)
if (isAllChildrenDownloadEnd) {
// download successfully
const model = get(configuredModelsAtom).find(
(e) => e.id === state.modelId
)
if (model) set(downloadedModelsAtom, (prev) => [...prev, model])
toaster({
title: 'Download Completed',
description: `Download ${state.modelId} completed`,
type: 'success',
})
}
} else if (state.downloadState === 'error') {
// download error
delete currentState[state.modelId]
set(removeDownloadingModelAtom, state.modelId)
const model = get(configuredModelsAtom).find(
(e) => e.id === state.modelId
)
if (model) set(downloadedModelsAtom, (prev) => [...prev, model])
toaster({
title: 'Download Completed',
description: `Download ${state.modelId} completed`,
type: 'success',
})
}
} else if (state.downloadState === 'error') {
// download error
delete currentState[state.modelId]
set(removeDownloadingModelAtom, state.modelId)
if (state.error === 'aborted') {
toaster({
title: 'Cancel Download',
description: `Model ${state.modelId} download cancelled`,
type: 'warning',
})
} else {
let error = state.error
if (
typeof error?.includes === 'function' &&
state.error?.includes('certificate')
) {
error +=
'. To fix enable "Ignore SSL Certificates" in Advanced settings.'
if (state.error === 'aborted') {
toaster({
title: 'Cancel Download',
description: `Model ${state.modelId} download cancelled`,
type: 'warning',
})
} else {
let error = state.error
if (
typeof error?.includes === 'function' &&
state.error?.includes('certificate')
) {
error +=
'. To fix enable "Ignore SSL Certificates" in Advanced settings.'
}
toaster({
title: 'Download Failed',
description: `Model ${state.modelId} download failed: ${error}`,
type: 'error',
})
}
} else {
// download in progress
if (state.size.total === 0) {
// this is initial state, just set the state
currentState[state.modelId] = state
set(modelDownloadStateAtom, currentState)
return
}
toaster({
title: 'Download Failed',
description: `Model ${state.modelId} download failed: ${error}`,
type: 'error',
})
}
} else {
// download in progress
if (state.size.total === 0) {
// this is initial state, just set the state
currentState[state.modelId] = state
set(modelDownloadStateAtom, currentState)
return
}
const modelDownloadState = currentState[state.modelId]
if (!modelDownloadState) {
console.debug('setDownloadStateAtom: modelDownloadState not found')
return
}
const modelDownloadState = currentState[state.modelId]
if (!modelDownloadState) {
console.debug('setDownloadStateAtom: modelDownloadState not found')
return
}
// delete the children if the filename is matched and replace the new state
const updatedChildren: DownloadState[] =
modelDownloadState.children!.filter(
(m) => m.fileName !== state.fileName
// delete the children if the filename is matched and replace the new state
const updatedChildren: DownloadState[] = (
modelDownloadState.children ?? []
).filter((m) => m.fileName !== state.fileName)
updatedChildren.push(state)
// re-calculate the overall progress if we have all the children download data
const isAnyChildDownloadNotReady = updatedChildren.some(
(m) =>
m.size.total === 0 &&
!modelDownloadState.children?.some(
(e) => e.fileName === m.fileName && e.downloadState === 'end'
) &&
modelDownloadState.children?.some((e) => e.fileName === m.fileName)
)
updatedChildren.push(state)
modelDownloadState.children = updatedChildren
if (isAnyChildDownloadNotReady) {
// just update the children
currentState[state.modelId] = modelDownloadState
set(modelDownloadStateAtom, currentState)
return
}
// re-calculate the overall progress if we have all the children download data
const isAnyChildDownloadNotReady = updatedChildren.some(
(m) => m.size.total === 0
)
const parentTotalSize = modelDownloadState.size.total
if (parentTotalSize === 0) {
// calculate the total size of the parent by sum all children total size
const totalSize = updatedChildren.reduce(
(acc, m) => acc + m.size.total,
0
)
modelDownloadState.children = updatedChildren
modelDownloadState.size.total = totalSize
}
if (isAnyChildDownloadNotReady) {
// just update the children
currentState[state.modelId] = modelDownloadState
set(modelDownloadStateAtom, currentState)
return
}
const parentTotalSize = modelDownloadState.size.total
if (parentTotalSize === 0) {
// calculate the total size of the parent by sum all children total size
const totalSize = updatedChildren.reduce(
(acc, m) => acc + m.size.total,
// calculate the total transferred size by sum all children transferred size
const transferredSize = updatedChildren.reduce(
(acc, m) => acc + m.size.transferred,
0
)
modelDownloadState.size.total = totalSize
modelDownloadState.size.transferred = transferredSize
modelDownloadState.percent =
parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
currentState[state.modelId] = modelDownloadState
}
// calculate the total transferred size by sum all children transferred size
const transferredSize = updatedChildren.reduce(
(acc, m) => acc + m.size.transferred,
0
)
modelDownloadState.size.transferred = transferredSize
modelDownloadState.percent =
parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
currentState[state.modelId] = modelDownloadState
set(modelDownloadStateAtom, currentState)
} catch (e) {
console.debug('setDownloadStateAtom: state', state)
console.debug('setDownloadStateAtom: error', e)
}
set(modelDownloadStateAtom, currentState)
}
)

View File

@ -0,0 +1,21 @@
import { useCallback } from 'react'
import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
import { extensionManager } from '@/extension'
export default function useGpuSetting() {
const getGpuSettings = useCallback(async () => {
const gpuSetting = await extensionManager
?.get<MonitoringExtension>(ExtensionTypeEnum.SystemMonitoring)
?.getGpuSetting()
if (!gpuSetting) {
console.debug('No GPU setting found')
return undefined
}
return gpuSetting
}, [])
return { getGpuSettings }
}

View File

@ -38,6 +38,7 @@ const nextConfig = {
isMac: process.platform === 'darwin',
isWindows: process.platform === 'win32',
isLinux: process.platform === 'linux',
PLATFORM: JSON.stringify(process.platform),
}),
]
return config

View File

@ -3,6 +3,8 @@ import { useState } from 'react'
import { Model } from '@janhq/core'
import { Badge } from '@janhq/uikit'
import { twMerge } from 'tailwind-merge'
import ExploreModelItemHeader from '@/screens/ExploreModels/ExploreModelItemHeader'
type Props = {
@ -75,7 +77,16 @@ const ExploreModelItem: React.FC<Props> = ({ model }) => {
<span className="font-semibold text-muted-foreground">
Format
</span>
<p className="mt-2 font-medium uppercase">{model.format}</p>
<p
className={twMerge(
'mt-2 font-medium',
!model.format?.includes(' ') &&
!model.format?.includes('-') &&
'uppercase'
)}
>
{model.format}
</p>
</div>
</div>
</div>

View File

@ -152,6 +152,7 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
<div className="flex items-center justify-between p-4">
<div className="flex items-center gap-2">
<span className="font-bold">{model.name}</span>
<EngineBadge engine={model.engine} />
</div>
<div className="inline-flex items-center space-x-2">
<span className="mr-4 font-semibold text-muted-foreground">
@ -172,4 +173,21 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
)
}
type EngineBadgeProps = {
engine: string
}
const EngineBadge: React.FC<EngineBadgeProps> = ({ engine }) => {
switch (engine) {
case 'nitro-tensorrt-llm':
return (
<div className="flex items-center justify-center rounded-md bg-[#EFF6FF] px-2 py-[2px] font-semibold text-primary">
TensorRT-LLM
</div>
)
default:
return null
}
}
export default ExploreModelItemHeader

View File

@ -0,0 +1,225 @@
import { useCallback, useEffect, useState } from 'react'
import {
Compatibility,
GpuSetting,
InstallationState,
abortDownload,
systemInformations,
} from '@janhq/core'
import {
Button,
Progress,
Tooltip,
TooltipArrow,
TooltipContent,
TooltipPortal,
TooltipTrigger,
} from '@janhq/uikit'
import { InfoCircledIcon } from '@radix-ui/react-icons'
import { useAtomValue } from 'jotai'
import { extensionManager } from '@/extension'
import Extension from '@/extension/Extension'
import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom'
type Props = {
item: Extension
}
const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
const [compatibility, setCompatibility] = useState<Compatibility | undefined>(
undefined
)
const [installState, setInstallState] =
useState<InstallationState>('NotRequired')
const installingExtensions = useAtomValue(installingExtensionAtom)
const [isGpuSupported, setIsGpuSupported] = useState<boolean>(false)
const isInstalling = installingExtensions.some(
(e) => e.extensionId === item.name
)
const progress = isInstalling
? installingExtensions.find((e) => e.extensionId === item.name)
?.percentage ?? -1
: -1
useEffect(() => {
const getSystemInfos = async () => {
const info = await systemInformations()
if (!info) {
setIsGpuSupported(false)
return
}
const gpuSettings: GpuSetting | undefined = info.gpuSetting
if (!gpuSettings || gpuSettings.gpus.length === 0) {
setIsGpuSupported(false)
return
}
const arch = gpuSettings.gpus[0].arch
if (!arch) {
setIsGpuSupported(false)
return
}
const supportedGpuArch = ['turing', 'ampere', 'ada']
setIsGpuSupported(supportedGpuArch.includes(arch))
}
getSystemInfos()
}, [])
useEffect(() => {
const getExtensionInstallationState = async () => {
const extension = extensionManager.get(item.name ?? '')
if (!extension) return
if (typeof extension?.installationState === 'function') {
const installState = await extension.installationState()
setInstallState(installState)
}
}
getExtensionInstallationState()
}, [item.name, isInstalling])
useEffect(() => {
const extension = extensionManager.get(item.name ?? '')
if (!extension) return
setCompatibility(extension.compatibility())
}, [setCompatibility, item.name])
const onInstallClick = useCallback(async () => {
const extension = extensionManager.get(item.name ?? '')
if (!extension) return
await extension.install()
}, [item.name])
const onCancelInstallingClick = () => {
const extension = installingExtensions.find(
(e) => e.extensionId === item.name
)
if (extension?.localPath) {
abortDownload(extension.localPath)
}
}
return (
<div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none">
<div className="flex-1 flex-shrink-0 space-y-1.5">
<div className="flex items-center gap-x-2">
<h6 className="text-sm font-semibold capitalize">
TensorRT-LLM Extension
</h6>
<p className="whitespace-pre-wrap text-sm font-semibold leading-relaxed">
v{item.version}
</p>
</div>
<p className="whitespace-pre-wrap leading-relaxed">
{item.description}
</p>
</div>
{(!compatibility || compatibility['platform']?.includes(PLATFORM)) &&
isGpuSupported ? (
<div className="flex min-w-[150px] flex-row justify-end">
<InstallStateIndicator
installProgress={progress}
installState={installState}
onInstallClick={onInstallClick}
onCancelClick={onCancelInstallingClick}
/>
</div>
) : (
<div className="rounded-md bg-secondary px-3 py-1.5 text-sm font-semibold text-gray-400">
<div className="flex flex-row items-center justify-center gap-1">
Incompatible{' '}
<Tooltip>
<TooltipTrigger className="w-full">
<InfoCircledIcon />
</TooltipTrigger>
<TooltipPortal>
<TooltipContent side="top">
{compatibility ? (
<span>
Only available on{' '}
{compatibility?.platform
?.map((e: string) =>
e === 'win32'
? 'Windows'
: e === 'linux'
? 'Linux'
: 'MacOS'
)
.join(', ')}
</span>
) : (
<span>
Your GPUs are not compatible with this extension
</span>
)}
<TooltipArrow />
</TooltipContent>
</TooltipPortal>
</Tooltip>
</div>
</div>
)}
</div>
)
}
type InstallStateProps = {
installProgress: number
installState: InstallationState
onInstallClick: () => void
onCancelClick: () => void
}
const InstallStateIndicator: React.FC<InstallStateProps> = ({
installProgress,
installState,
onInstallClick,
onCancelClick,
}) => {
// TODO: NamH support dark mode for this
if (installProgress !== -1) {
const progress = installProgress * 100
return (
<div className="flex h-10 flex-row items-center justify-center space-x-2 rounded-md bg-[#EFF8FF] px-4 text-primary">
<button onClick={onCancelClick} className="font-semibold text-primary">
Cancel
</button>
<div className="flex w-[113px] flex-row items-center justify-center space-x-2 rounded-md bg-[#D1E9FF] px-2 py-[2px]">
<Progress className="h-1 w-[69px]" value={progress} />
<span className="text-xs font-bold text-primary">
{progress.toFixed(0)}%
</span>
</div>
</div>
)
}
// TODO: NamH check for dark mode here
switch (installState) {
case 'Installed':
return (
<div className="rounded-md bg-secondary px-3 py-1.5 text-sm font-semibold text-gray-400">
Installed
</div>
)
case 'NotInstalled':
return (
<Button themes="secondaryBlue" size="sm" onClick={onInstallClick}>
Install
</Button>
)
default:
return <div></div>
}
}
export default TensorRtExtensionItem

View File

@ -4,13 +4,18 @@ import React, { useState, useEffect, useRef } from 'react'
import { Button, ScrollArea } from '@janhq/uikit'
import Loader from '@/containers/Loader'
import { formatExtensionsName } from '@/utils/converter'
import TensorRtExtensionItem from './TensorRtExtensionItem'
import { extensionManager } from '@/extension'
import Extension from '@/extension/Extension'
const ExtensionCatalog = () => {
const [activeExtensions, setActiveExtensions] = useState<Extension[]>([])
const [showLoading, setShowLoading] = useState(false)
const fileInputRef = useRef<HTMLInputElement | null>(null)
/**
* Fetches the active extensions and their preferences from the `extensions` and `preferences` modules.
@ -63,65 +68,76 @@ const ExtensionCatalog = () => {
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0]
if (file) {
setShowLoading(true)
install(event)
}
}
return (
<ScrollArea className="h-full w-full px-4">
<div className="block w-full">
{activeExtensions.map((item, i) => {
return (
<div
key={i}
className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none"
>
<div className="w-4/5 flex-shrink-0 space-y-1.5">
<div className="flex gap-x-2">
<h6 className="text-sm font-semibold capitalize">
{formatExtensionsName(item.name ?? item.description ?? '')}
</h6>
<p className="whitespace-pre-wrap font-semibold leading-relaxed ">
v{item.version}
<>
<ScrollArea className="h-full w-full px-4">
<div className="block w-full">
{activeExtensions.map((item, i) => {
// TODO: this is bad code, rewrite it
if (item.name === '@janhq/tensorrt-llm-extension') {
return <TensorRtExtensionItem key={i} item={item} />
}
return (
<div
key={i}
className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none"
>
<div className="w-4/5 flex-shrink-0 space-y-1.5">
<div className="flex items-center gap-x-2">
<h6 className="text-sm font-semibold capitalize">
{formatExtensionsName(
item.name ?? item.description ?? ''
)}
</h6>
<p className="whitespace-pre-wrap text-sm font-semibold leading-relaxed ">
v{item.version}
</p>
</div>
<p className="whitespace-pre-wrap leading-relaxed ">
{item.description}
</p>
</div>
<p className="whitespace-pre-wrap leading-relaxed ">
{item.description}
</p>
</div>
)
})}
{/* Manual Installation */}
<div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-0 last:border-none">
<div className="w-4/5 flex-shrink-0 space-y-1.5">
<div className="flex gap-x-2">
<h6 className="text-sm font-semibold capitalize">
Manual Installation
</h6>
</div>
<p className="whitespace-pre-wrap leading-relaxed ">
Select a extension file to install (.tgz)
</p>
</div>
)
})}
{/* Manual Installation */}
<div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-0 last:border-none">
<div className="w-4/5 flex-shrink-0 space-y-1.5">
<div className="flex gap-x-2">
<h6 className="text-sm font-semibold capitalize">
Manual Installation
</h6>
<div>
<input
type="file"
style={{ display: 'none' }}
ref={fileInputRef}
onChange={handleFileChange}
/>
<Button
themes="secondaryBlue"
size="sm"
onClick={() => fileInputRef.current?.click()}
>
Select
</Button>
</div>
<p className="whitespace-pre-wrap leading-relaxed ">
Select a extension file to install (.tgz)
</p>
</div>
<div>
<input
type="file"
style={{ display: 'none' }}
ref={fileInputRef}
onChange={handleFileChange}
/>
<Button
themes="secondaryBlue"
size="sm"
onClick={() => fileInputRef.current?.click()}
>
Select
</Button>
</div>
</div>
</div>
</ScrollArea>
</ScrollArea>
{showLoading && <Loader description="Installing..." />}
</>
)
}

View File

@ -0,0 +1,24 @@
import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
import { toaster } from '@/containers/Toast'
import { extensionManager } from '@/extension'
export const appService = {
systemInformations: async () => {
const gpuSetting = await extensionManager
?.get<MonitoringExtension>(ExtensionTypeEnum.SystemMonitoring)
?.getGpuSetting()
return {
gpuSetting,
// TODO: Other system information
}
},
showToast: (title: string, description: string) => {
toaster({
title,
description: description,
})
},
}

View File

@ -1,5 +1,7 @@
import { appService } from './appService'
import { EventEmitter } from './eventsService'
import { restAPI } from './restService'
export const setupCoreServices = () => {
if (typeof window === 'undefined') {
console.debug('undefine', window)
@ -10,7 +12,10 @@ export const setupCoreServices = () => {
if (!window.core) {
window.core = {
events: new EventEmitter(),
api: window.electronAPI ?? restAPI,
api: {
...(window.electronAPI ? window.electronAPI : restAPI),
...appService,
},
}
}
}

View File

@ -11,6 +11,7 @@ declare global {
declare const isMac: boolean
declare const isWindows: boolean
declare const isLinux: boolean
declare const PLATFORM: string
interface Core {
api: APIFunctions
events: EventEmitter