feat: Nitro-Tensorrt-LLM Extension (#2280)
* feat: tensorrt-llm-extension * fix: loading * feat: add download tensorrt llm runner Signed-off-by: James <james@jan.ai> * feat: update to rollupjs instead of webpack for monitoring extension Signed-off-by: James <james@jan.ai> * feat: move update nvidia info to monitor extension Signed-off-by: James <james@jan.ai> * allow download tensorrt Signed-off-by: James <james@jan.ai> * update Signed-off-by: James <james@jan.ai> * allow download tensor rt based on gpu setting Signed-off-by: James <james@jan.ai> * update downloaded models Signed-off-by: James <james@jan.ai> * feat: add extension compatibility * dynamic tensor rt engines Signed-off-by: James <james@jan.ai> * update models Signed-off-by: James <james@jan.ai> * chore: remove ts-ignore * feat: getting installation state from extension Signed-off-by: James <james@jan.ai> * chore: adding type for decompress Signed-off-by: James <james@jan.ai> * feat: update according Louis's comment Signed-off-by: James <james@jan.ai> * feat: add progress for installing extension Signed-off-by: James <james@jan.ai> * chore: remove args from extension installation * fix: model download does not work properly * fix: do not allow user to stop tensorrtllm inference * fix: extension installed style * fix: download tensorrt does not update state Signed-off-by: James <james@jan.ai> * chore: replace int4 by fl16 * feat: modal for installing extension Signed-off-by: James <james@jan.ai> * fix: start download immediately after press install Signed-off-by: James <james@jan.ai> * fix: error switching between engines * feat: rename inference provider to ai engine and refactor to core * fix: missing ulid * fix: core bundler * feat: add cancel extension installing Signed-off-by: James <james@jan.ai> * remove mocking for mac Signed-off-by: James <james@jan.ai> * fix: show models only when extension is ready * add tensorrt badge for model Signed-off-by: James <james@jan.ai> * fix: copy * fix: add compatible check (#2342) * fix: add compatible check Signed-off-by: James <james@jan.ai> * fix: copy * fix: font * fix: copy * fix: broken monitoring extension * chore: bump engine * fix: copy * fix: model copy * fix: copy * fix: model json --------- Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: Louis <louis@jan.ai> * fix: vulkan support * fix: installation button padding * fix: empty script * fix: remove hard code string --------- Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: NamH <NamNh0122@gmail.com>
This commit is contained in:
parent
24c6dd05be
commit
d85d02693b
20
.gitignore
vendored
20
.gitignore
vendored
@ -22,16 +22,16 @@ package-lock.json
|
||||
core/lib/**
|
||||
|
||||
# Nitro binary files
|
||||
extensions/inference-nitro-extension/bin/*/nitro
|
||||
extensions/inference-nitro-extension/bin/*/*.metal
|
||||
extensions/inference-nitro-extension/bin/*/*.exe
|
||||
extensions/inference-nitro-extension/bin/*/*.dll
|
||||
extensions/inference-nitro-extension/bin/*/*.exp
|
||||
extensions/inference-nitro-extension/bin/*/*.lib
|
||||
extensions/inference-nitro-extension/bin/saved-*
|
||||
extensions/inference-nitro-extension/bin/*.tar.gz
|
||||
extensions/inference-nitro-extension/bin/vulkaninfoSDK.exe
|
||||
extensions/inference-nitro-extension/bin/vulkaninfo
|
||||
extensions/*-extension/bin/*/nitro
|
||||
extensions/*-extension/bin/*/*.metal
|
||||
extensions/*-extension/bin/*/*.exe
|
||||
extensions/*-extension/bin/*/*.dll
|
||||
extensions/*-extension/bin/*/*.exp
|
||||
extensions/*-extension/bin/*/*.lib
|
||||
extensions/*-extension/bin/saved-*
|
||||
extensions/*-extension/bin/*.tar.gz
|
||||
extensions/*-extension/bin/vulkaninfoSDK.exe
|
||||
extensions/*-extension/bin/vulkaninfo
|
||||
|
||||
|
||||
# Turborepo
|
||||
|
||||
@ -45,11 +45,12 @@
|
||||
"start": "rollup -c rollup.config.ts -w"
|
||||
},
|
||||
"devDependencies": {
|
||||
"jest": "^29.7.0",
|
||||
"@types/jest": "^29.5.12",
|
||||
"@types/node": "^12.0.2",
|
||||
"eslint-plugin-jest": "^27.9.0",
|
||||
"eslint": "8.57.0",
|
||||
"eslint-plugin-jest": "^27.9.0",
|
||||
"jest": "^29.7.0",
|
||||
"rimraf": "^3.0.2",
|
||||
"rollup": "^2.38.5",
|
||||
"rollup-plugin-commonjs": "^9.1.8",
|
||||
"rollup-plugin-json": "^3.1.0",
|
||||
@ -58,7 +59,10 @@
|
||||
"rollup-plugin-typescript2": "^0.36.0",
|
||||
"ts-jest": "^29.1.2",
|
||||
"tslib": "^2.6.2",
|
||||
"typescript": "^5.3.3",
|
||||
"rimraf": "^3.0.2"
|
||||
"typescript": "^5.3.3"
|
||||
},
|
||||
"dependencies": {
|
||||
"rxjs": "^7.8.1",
|
||||
"ulid": "^2.3.0"
|
||||
}
|
||||
}
|
||||
|
||||
@ -64,7 +64,7 @@ export default [
|
||||
// Allow json resolution
|
||||
json(),
|
||||
// Compile TypeScript files
|
||||
typescript({ useTsconfigDeclarationDir: true }),
|
||||
typescript({ useTsconfigDeclarationDir: true, exclude: ['src/*.ts', 'src/extensions/**'] }),
|
||||
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
|
||||
commonjs(),
|
||||
// Allow node_modules resolution, so you can use 'external' to control
|
||||
|
||||
@ -33,6 +33,8 @@ export enum AppRoute {
|
||||
stopServer = 'stopServer',
|
||||
log = 'log',
|
||||
logServer = 'logServer',
|
||||
systemInformations = 'systemInformations',
|
||||
showToast = 'showToast',
|
||||
}
|
||||
|
||||
export enum AppEvent {
|
||||
@ -56,6 +58,7 @@ export enum DownloadEvent {
|
||||
onFileDownloadUpdate = 'onFileDownloadUpdate',
|
||||
onFileDownloadError = 'onFileDownloadError',
|
||||
onFileDownloadSuccess = 'onFileDownloadSuccess',
|
||||
onFileUnzipSuccess = 'onFileUnzipSuccess',
|
||||
}
|
||||
|
||||
export enum LocalImportModelEvent {
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { FileStat } from './types'
|
||||
import { DownloadRequest, FileStat, NetworkConfig } from './types'
|
||||
|
||||
/**
|
||||
* Execute a extension module function in main process
|
||||
@ -17,18 +17,16 @@ const executeOnMain: (extension: string, method: string, ...args: any[]) => Prom
|
||||
|
||||
/**
|
||||
* Downloads a file from a URL and saves it to the local file system.
|
||||
* @param {string} url - The URL of the file to download.
|
||||
* @param {string} fileName - The name to use for the downloaded file.
|
||||
* @param {object} network - Optional object to specify proxy/whether to ignore SSL certificates.
|
||||
*
|
||||
* @param {DownloadRequest} downloadRequest - The request to download the file.
|
||||
* @param {NetworkConfig} network - Optional object to specify proxy/whether to ignore SSL certificates.
|
||||
*
|
||||
* @returns {Promise<any>} A promise that resolves when the file is downloaded.
|
||||
*/
|
||||
const downloadFile: (
|
||||
url: string,
|
||||
fileName: string,
|
||||
network?: { proxy?: string; ignoreSSL?: boolean }
|
||||
) => Promise<any> = (url, fileName, network) => {
|
||||
return global.core?.api?.downloadFile(url, fileName, network)
|
||||
}
|
||||
const downloadFile: (downloadRequest: DownloadRequest, network?: NetworkConfig) => Promise<any> = (
|
||||
downloadRequest,
|
||||
network
|
||||
) => global.core?.api?.downloadFile(downloadRequest, network)
|
||||
|
||||
/**
|
||||
* Aborts the download of a specific file.
|
||||
@ -108,6 +106,20 @@ const log: (message: string, fileName?: string) => void = (message, fileName) =>
|
||||
const isSubdirectory: (from: string, to: string) => Promise<boolean> = (from: string, to: string) =>
|
||||
global.core.api?.isSubdirectory(from, to)
|
||||
|
||||
/**
|
||||
* Get system information
|
||||
* @returns {Promise<any>} - A promise that resolves with the system information.
|
||||
*/
|
||||
const systemInformations: () => Promise<any> = () => global.core.api?.systemInformations()
|
||||
|
||||
/**
|
||||
* Show toast message from browser processes.
|
||||
* @param title
|
||||
* @param message
|
||||
* @returns
|
||||
*/
|
||||
const showToast: (title: string, message: string) => void = (title, message) =>
|
||||
global.core.api?.showToast(title, message)
|
||||
/**
|
||||
* Register extension point function type definition
|
||||
*/
|
||||
@ -134,5 +146,7 @@ export {
|
||||
log,
|
||||
isSubdirectory,
|
||||
getUserHomePath,
|
||||
systemInformations,
|
||||
showToast,
|
||||
FileStat,
|
||||
}
|
||||
|
||||
@ -10,6 +10,22 @@ export enum ExtensionTypeEnum {
|
||||
export interface ExtensionType {
|
||||
type(): ExtensionTypeEnum | undefined
|
||||
}
|
||||
|
||||
export interface Compatibility {
|
||||
platform: string[]
|
||||
version: string
|
||||
}
|
||||
|
||||
const ALL_INSTALLATION_STATE = [
|
||||
'NotRequired', // not required.
|
||||
'Installed', // require and installed. Good to go.
|
||||
'NotInstalled', // require to be installed.
|
||||
'Corrupted', // require but corrupted. Need to redownload.
|
||||
] as const
|
||||
|
||||
export type InstallationStateTuple = typeof ALL_INSTALLATION_STATE
|
||||
export type InstallationState = InstallationStateTuple[number]
|
||||
|
||||
/**
|
||||
* Represents a base extension.
|
||||
* This class should be extended by any class that represents an extension.
|
||||
@ -33,4 +49,32 @@ export abstract class BaseExtension implements ExtensionType {
|
||||
* Any cleanup logic for the extension should be put here.
|
||||
*/
|
||||
abstract onUnload(): void
|
||||
|
||||
/**
|
||||
* The compatibility of the extension.
|
||||
* This is used to check if the extension is compatible with the current environment.
|
||||
* @property {Array} platform
|
||||
*/
|
||||
compatibility(): Compatibility | undefined {
|
||||
return undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if the prerequisites for the extension are installed.
|
||||
*
|
||||
* @returns {boolean} true if the prerequisites are installed, false otherwise.
|
||||
*/
|
||||
async installationState(): Promise<InstallationState> {
|
||||
return 'NotRequired'
|
||||
}
|
||||
|
||||
/**
|
||||
* Install the prerequisites for the extension.
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
// @ts-ignore
|
||||
async install(...args): Promise<void> {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
60
core/src/extensions/ai-engines/AIEngine.ts
Normal file
60
core/src/extensions/ai-engines/AIEngine.ts
Normal file
@ -0,0 +1,60 @@
|
||||
import { getJanDataFolderPath, joinPath } from '../../core'
|
||||
import { events } from '../../events'
|
||||
import { BaseExtension } from '../../extension'
|
||||
import { fs } from '../../fs'
|
||||
import { Model, ModelEvent } from '../../types'
|
||||
|
||||
/**
|
||||
* Base AIEngine
|
||||
* Applicable to all AI Engines
|
||||
*/
|
||||
export abstract class AIEngine extends BaseExtension {
|
||||
// The inference engine
|
||||
abstract provider: string
|
||||
// The model folder
|
||||
modelFolder: string = 'models'
|
||||
|
||||
abstract models(): Promise<Model[]>
|
||||
|
||||
/**
|
||||
* On extension load, subscribe to events.
|
||||
*/
|
||||
onLoad() {
|
||||
this.prePopulateModels()
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-populate models to App Data Folder
|
||||
*/
|
||||
prePopulateModels(): Promise<void> {
|
||||
return this.models().then((models) => {
|
||||
const prePoluateOperations = models.map((model) =>
|
||||
getJanDataFolderPath()
|
||||
.then((janDataFolder) =>
|
||||
// Attempt to create the model folder
|
||||
joinPath([janDataFolder, this.modelFolder, model.id]).then((path) =>
|
||||
fs
|
||||
.mkdirSync(path)
|
||||
.catch()
|
||||
.then(() => path)
|
||||
)
|
||||
)
|
||||
.then((path) => joinPath([path, 'model.json']))
|
||||
.then((path) => {
|
||||
// Do not overwite existing model.json
|
||||
return fs.existsSync(path).then((exist: any) => {
|
||||
if (!exist) return fs.writeFileSync(path, JSON.stringify(model, null, 2))
|
||||
})
|
||||
})
|
||||
.catch((e: Error) => {
|
||||
console.error('Error', e)
|
||||
})
|
||||
)
|
||||
Promise.all(prePoluateOperations).then(() =>
|
||||
// Emit event to update models
|
||||
// So the UI can update the models list
|
||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
63
core/src/extensions/ai-engines/LocalOAIEngine.ts
Normal file
63
core/src/extensions/ai-engines/LocalOAIEngine.ts
Normal file
@ -0,0 +1,63 @@
|
||||
import { executeOnMain, getJanDataFolderPath, joinPath } from '../../core'
|
||||
import { events } from '../../events'
|
||||
import { Model, ModelEvent } from '../../types'
|
||||
import { OAIEngine } from './OAIEngine'
|
||||
|
||||
/**
|
||||
* Base OAI Local Inference Provider
|
||||
* Added the implementation of loading and unloading model (applicable to local inference providers)
|
||||
*/
|
||||
export abstract class LocalOAIEngine extends OAIEngine {
|
||||
// The inference engine
|
||||
loadModelFunctionName: string = 'loadModel'
|
||||
unloadModelFunctionName: string = 'unloadModel'
|
||||
isRunning: boolean = false
|
||||
|
||||
/**
|
||||
* On extension load, subscribe to events.
|
||||
*/
|
||||
onLoad() {
|
||||
super.onLoad()
|
||||
// These events are applicable to local inference providers
|
||||
events.on(ModelEvent.OnModelInit, (model: Model) => this.onModelInit(model))
|
||||
events.on(ModelEvent.OnModelStop, (model: Model) => this.onModelStop(model))
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the model.
|
||||
*/
|
||||
async onModelInit(model: Model) {
|
||||
if (model.engine.toString() !== this.provider) return
|
||||
|
||||
const modelFolder = await joinPath([await getJanDataFolderPath(), this.modelFolder, model.id])
|
||||
|
||||
const res = await executeOnMain(this.nodeModule, this.loadModelFunctionName, {
|
||||
modelFolder,
|
||||
model,
|
||||
})
|
||||
|
||||
if (res?.error) {
|
||||
events.emit(ModelEvent.OnModelFail, {
|
||||
...model,
|
||||
error: res.error,
|
||||
})
|
||||
return
|
||||
} else {
|
||||
this.loadedModel = model
|
||||
events.emit(ModelEvent.OnModelReady, model)
|
||||
this.isRunning = true
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Stops the model.
|
||||
*/
|
||||
onModelStop(model: Model) {
|
||||
if (model.engine?.toString() !== this.provider) return
|
||||
|
||||
this.isRunning = false
|
||||
|
||||
executeOnMain(this.nodeModule, this.unloadModelFunctionName).then(() => {
|
||||
events.emit(ModelEvent.OnModelStopped, {})
|
||||
})
|
||||
}
|
||||
}
|
||||
116
core/src/extensions/ai-engines/OAIEngine.ts
Normal file
116
core/src/extensions/ai-engines/OAIEngine.ts
Normal file
@ -0,0 +1,116 @@
|
||||
import { requestInference } from './helpers/sse'
|
||||
import { ulid } from 'ulid'
|
||||
import { AIEngine } from './AIEngine'
|
||||
import {
|
||||
ChatCompletionRole,
|
||||
ContentType,
|
||||
InferenceEvent,
|
||||
MessageEvent,
|
||||
MessageRequest,
|
||||
MessageRequestType,
|
||||
MessageStatus,
|
||||
Model,
|
||||
ModelInfo,
|
||||
ThreadContent,
|
||||
ThreadMessage,
|
||||
} from '../../types'
|
||||
import { events } from '../../events'
|
||||
|
||||
/**
|
||||
* Base OAI Inference Provider
|
||||
* Applicable to all OAI compatible inference providers
|
||||
*/
|
||||
export abstract class OAIEngine extends AIEngine {
|
||||
// The inference engine
|
||||
abstract inferenceUrl: string
|
||||
abstract nodeModule: string
|
||||
|
||||
// Controller to handle stop requests
|
||||
controller = new AbortController()
|
||||
isCancelled = false
|
||||
|
||||
// The loaded model instance
|
||||
loadedModel: Model | undefined
|
||||
|
||||
/**
|
||||
* On extension load, subscribe to events.
|
||||
*/
|
||||
onLoad() {
|
||||
super.onLoad()
|
||||
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data))
|
||||
events.on(InferenceEvent.OnInferenceStopped, () => this.onInferenceStopped())
|
||||
}
|
||||
|
||||
/**
|
||||
* On extension unload
|
||||
*/
|
||||
onUnload(): void {}
|
||||
|
||||
/*
|
||||
* Inference request
|
||||
*/
|
||||
inference(data: MessageRequest) {
|
||||
if (data.model?.engine?.toString() !== this.provider) return
|
||||
|
||||
const timestamp = Date.now()
|
||||
const message: ThreadMessage = {
|
||||
id: ulid(),
|
||||
thread_id: data.threadId,
|
||||
type: data.type,
|
||||
assistant_id: data.assistantId,
|
||||
role: ChatCompletionRole.Assistant,
|
||||
content: [],
|
||||
status: MessageStatus.Pending,
|
||||
created: timestamp,
|
||||
updated: timestamp,
|
||||
object: 'thread.message',
|
||||
}
|
||||
|
||||
if (data.type !== MessageRequestType.Summary) {
|
||||
events.emit(MessageEvent.OnMessageResponse, message)
|
||||
}
|
||||
|
||||
this.isCancelled = false
|
||||
this.controller = new AbortController()
|
||||
|
||||
const model: ModelInfo = {
|
||||
...(this.loadedModel ? this.loadedModel : {}),
|
||||
...data.model,
|
||||
}
|
||||
|
||||
requestInference(this.inferenceUrl, data.messages ?? [], model, this.controller).subscribe({
|
||||
next: (content: any) => {
|
||||
const messageContent: ThreadContent = {
|
||||
type: ContentType.Text,
|
||||
text: {
|
||||
value: content.trim(),
|
||||
annotations: [],
|
||||
},
|
||||
}
|
||||
message.content = [messageContent]
|
||||
events.emit(MessageEvent.OnMessageUpdate, message)
|
||||
},
|
||||
complete: async () => {
|
||||
message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error
|
||||
events.emit(MessageEvent.OnMessageUpdate, message)
|
||||
},
|
||||
error: async (err: any) => {
|
||||
if (this.isCancelled || message.content.length) {
|
||||
message.status = MessageStatus.Stopped
|
||||
events.emit(MessageEvent.OnMessageUpdate, message)
|
||||
return
|
||||
}
|
||||
message.status = MessageStatus.Error
|
||||
events.emit(MessageEvent.OnMessageUpdate, message)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops the inference.
|
||||
*/
|
||||
onInferenceStopped() {
|
||||
this.isCancelled = true
|
||||
this.controller?.abort()
|
||||
}
|
||||
}
|
||||
67
core/src/extensions/ai-engines/helpers/sse.ts
Normal file
67
core/src/extensions/ai-engines/helpers/sse.ts
Normal file
@ -0,0 +1,67 @@
|
||||
import { Observable } from 'rxjs'
|
||||
import { ModelRuntimeParams } from '../../../types'
|
||||
/**
|
||||
* Sends a request to the inference server to generate a response based on the recent messages.
|
||||
* @param recentMessages - An array of recent messages to use as context for the inference.
|
||||
* @returns An Observable that emits the generated response as a string.
|
||||
*/
|
||||
export function requestInference(
|
||||
inferenceUrl: string,
|
||||
recentMessages: any[],
|
||||
model: {
|
||||
id: string
|
||||
parameters: ModelRuntimeParams
|
||||
},
|
||||
controller?: AbortController
|
||||
): Observable<string> {
|
||||
return new Observable((subscriber) => {
|
||||
const requestBody = JSON.stringify({
|
||||
messages: recentMessages,
|
||||
model: model.id,
|
||||
stream: true,
|
||||
...model.parameters,
|
||||
})
|
||||
fetch(inferenceUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Access-Control-Allow-Origin': '*',
|
||||
'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
|
||||
},
|
||||
body: requestBody,
|
||||
signal: controller?.signal,
|
||||
})
|
||||
.then(async (response) => {
|
||||
if (model.parameters.stream === false) {
|
||||
const data = await response.json()
|
||||
subscriber.next(data.choices[0]?.message?.content ?? '')
|
||||
} else {
|
||||
const stream = response.body
|
||||
const decoder = new TextDecoder('utf-8')
|
||||
const reader = stream?.getReader()
|
||||
let content = ''
|
||||
|
||||
while (true && reader) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) {
|
||||
break
|
||||
}
|
||||
const text = decoder.decode(value)
|
||||
const lines = text.trim().split('\n')
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ') && !line.includes('data: [DONE]')) {
|
||||
const data = JSON.parse(line.replace('data: ', ''))
|
||||
content += data.choices[0]?.delta?.content ?? ''
|
||||
if (content.startsWith('assistant: ')) {
|
||||
content = content.replace('assistant: ', '')
|
||||
}
|
||||
subscriber.next(content)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
subscriber.complete()
|
||||
})
|
||||
.catch((err) => subscriber.error(err))
|
||||
})
|
||||
}
|
||||
3
core/src/extensions/ai-engines/index.ts
Normal file
3
core/src/extensions/ai-engines/index.ts
Normal file
@ -0,0 +1,3 @@
|
||||
export * from './AIEngine'
|
||||
export * from './OAIEngine'
|
||||
export * from './LocalOAIEngine'
|
||||
@ -28,3 +28,8 @@ export { ModelExtension } from './model'
|
||||
* Hugging Face extension for converting HF models to GGUF.
|
||||
*/
|
||||
export { HuggingFaceExtension } from './huggingface'
|
||||
|
||||
/**
|
||||
* Base AI Engines.
|
||||
*/
|
||||
export * from './ai-engines'
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { BaseExtension, ExtensionTypeEnum } from '../extension'
|
||||
import { ImportingModel, Model, ModelInterface, OptionType } from '../index'
|
||||
import { GpuSetting, ImportingModel, Model, ModelInterface, OptionType } from '../index'
|
||||
|
||||
/**
|
||||
* Model extension for managing models.
|
||||
@ -14,6 +14,7 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
|
||||
|
||||
abstract downloadModel(
|
||||
model: Model,
|
||||
gpuSettings?: GpuSetting,
|
||||
network?: { proxy: string; ignoreSSL?: boolean }
|
||||
): Promise<void>
|
||||
abstract cancelModelDownload(modelId: string): Promise<void>
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { BaseExtension, ExtensionTypeEnum } from '../extension'
|
||||
import { MonitoringInterface } from '../index'
|
||||
import { GpuSetting, MonitoringInterface } from '../index'
|
||||
|
||||
/**
|
||||
* Monitoring extension for system monitoring.
|
||||
@ -13,6 +13,7 @@ export abstract class MonitoringExtension extends BaseExtension implements Monit
|
||||
return ExtensionTypeEnum.SystemMonitoring
|
||||
}
|
||||
|
||||
abstract getGpuSetting(): Promise<GpuSetting>
|
||||
abstract getResourcesInfo(): Promise<any>
|
||||
abstract getCurrentLoad(): Promise<any>
|
||||
}
|
||||
|
||||
@ -5,7 +5,7 @@ import { getJanDataFolderPath } from '../../helper'
|
||||
import { DownloadManager } from '../../helper/download'
|
||||
import { createWriteStream, renameSync } from 'fs'
|
||||
import { Processor } from './Processor'
|
||||
import { DownloadState } from '../../../types'
|
||||
import { DownloadRequest, DownloadState, NetworkConfig } from '../../../types'
|
||||
|
||||
export class Downloader implements Processor {
|
||||
observer?: Function
|
||||
@ -20,24 +20,27 @@ export class Downloader implements Processor {
|
||||
return func(this.observer, ...args)
|
||||
}
|
||||
|
||||
downloadFile(observer: any, url: string, localPath: string, network: any) {
|
||||
downloadFile(observer: any, downloadRequest: DownloadRequest, network?: NetworkConfig) {
|
||||
const request = require('request')
|
||||
const progress = require('request-progress')
|
||||
|
||||
const strictSSL = !network?.ignoreSSL
|
||||
const proxy = network?.proxy?.startsWith('http') ? network.proxy : undefined
|
||||
|
||||
const { localPath, url } = downloadRequest
|
||||
let normalizedPath = localPath
|
||||
if (typeof localPath === 'string') {
|
||||
localPath = normalizeFilePath(localPath)
|
||||
normalizedPath = normalizeFilePath(localPath)
|
||||
}
|
||||
const array = localPath.split(sep)
|
||||
const array = normalizedPath.split(sep)
|
||||
const fileName = array.pop() ?? ''
|
||||
const modelId = array.pop() ?? ''
|
||||
|
||||
const destination = resolve(getJanDataFolderPath(), localPath)
|
||||
const destination = resolve(getJanDataFolderPath(), normalizedPath)
|
||||
const rq = request({ url, strictSSL, proxy })
|
||||
|
||||
// Put request to download manager instance
|
||||
DownloadManager.instance.setRequest(localPath, rq)
|
||||
DownloadManager.instance.setRequest(normalizedPath, rq)
|
||||
|
||||
// Downloading file to a temp file first
|
||||
const downloadingTempFile = `${destination}.download`
|
||||
@ -56,16 +59,25 @@ export class Downloader implements Processor {
|
||||
total: 0,
|
||||
transferred: 0,
|
||||
},
|
||||
children: [],
|
||||
downloadState: 'downloading',
|
||||
extensionId: downloadRequest.extensionId,
|
||||
downloadType: downloadRequest.downloadType,
|
||||
localPath: normalizedPath,
|
||||
}
|
||||
DownloadManager.instance.downloadProgressMap[modelId] = initialDownloadState
|
||||
|
||||
if (downloadRequest.downloadType === 'extension') {
|
||||
observer?.(DownloadEvent.onFileDownloadUpdate, initialDownloadState)
|
||||
}
|
||||
|
||||
progress(rq, {})
|
||||
.on('progress', (state: any) => {
|
||||
const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
|
||||
const downloadState: DownloadState = {
|
||||
...currentDownloadState,
|
||||
...state,
|
||||
modelId,
|
||||
fileName,
|
||||
fileName: fileName,
|
||||
downloadState: 'downloading',
|
||||
}
|
||||
console.debug('progress: ', downloadState)
|
||||
@ -76,22 +88,22 @@ export class Downloader implements Processor {
|
||||
const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
|
||||
const downloadState: DownloadState = {
|
||||
...currentDownloadState,
|
||||
fileName: fileName,
|
||||
error: error.message,
|
||||
downloadState: 'error',
|
||||
}
|
||||
if (currentDownloadState) {
|
||||
DownloadManager.instance.downloadProgressMap[modelId] = downloadState
|
||||
}
|
||||
|
||||
observer?.(DownloadEvent.onFileDownloadError, downloadState)
|
||||
DownloadManager.instance.downloadProgressMap[modelId] = downloadState
|
||||
})
|
||||
.on('end', () => {
|
||||
const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
|
||||
if (currentDownloadState && DownloadManager.instance.networkRequests[localPath]) {
|
||||
if (currentDownloadState && DownloadManager.instance.networkRequests[normalizedPath]) {
|
||||
// Finished downloading, rename temp file to actual file
|
||||
renameSync(downloadingTempFile, destination)
|
||||
const downloadState: DownloadState = {
|
||||
...currentDownloadState,
|
||||
fileName: fileName,
|
||||
downloadState: 'end',
|
||||
}
|
||||
observer?.(DownloadEvent.onFileDownloadSuccess, downloadState)
|
||||
|
||||
@ -1,7 +1,16 @@
|
||||
import fs from 'fs'
|
||||
import {
|
||||
existsSync,
|
||||
readdirSync,
|
||||
readFileSync,
|
||||
writeFileSync,
|
||||
mkdirSync,
|
||||
appendFileSync,
|
||||
createWriteStream,
|
||||
rmdirSync,
|
||||
} from 'fs'
|
||||
import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
|
||||
import { join } from 'path'
|
||||
import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../index'
|
||||
import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../types'
|
||||
import { getEngineConfiguration, getJanDataFolderPath } from '../../../helper'
|
||||
import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
|
||||
|
||||
@ -9,12 +18,12 @@ import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
|
||||
export const getBuilder = async (configuration: RouteConfiguration) => {
|
||||
const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
|
||||
try {
|
||||
if (!fs.existsSync(directoryPath)) {
|
||||
if (!existsSync(directoryPath)) {
|
||||
console.debug('model folder not found')
|
||||
return []
|
||||
}
|
||||
|
||||
const files: string[] = fs.readdirSync(directoryPath)
|
||||
const files: string[] = readdirSync(directoryPath)
|
||||
|
||||
const allDirectories: string[] = []
|
||||
for (const file of files) {
|
||||
@ -46,8 +55,8 @@ export const getBuilder = async (configuration: RouteConfiguration) => {
|
||||
}
|
||||
|
||||
const readModelMetadata = (path: string): string | undefined => {
|
||||
if (fs.existsSync(path)) {
|
||||
return fs.readFileSync(path, 'utf-8')
|
||||
if (existsSync(path)) {
|
||||
return readFileSync(path, 'utf-8')
|
||||
} else {
|
||||
return undefined
|
||||
}
|
||||
@ -81,7 +90,7 @@ export const deleteBuilder = async (configuration: RouteConfiguration, id: strin
|
||||
}
|
||||
|
||||
const objectPath = join(directoryPath, id)
|
||||
fs.rmdirSync(objectPath, { recursive: true })
|
||||
rmdirSync(objectPath, { recursive: true })
|
||||
return {
|
||||
id: id,
|
||||
object: configuration.delete.object,
|
||||
@ -96,20 +105,19 @@ export const getMessages = async (threadId: string): Promise<ThreadMessage[]> =>
|
||||
const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
|
||||
const messageFile = 'messages.jsonl'
|
||||
try {
|
||||
const files: string[] = fs.readdirSync(threadDirPath)
|
||||
const files: string[] = readdirSync(threadDirPath)
|
||||
if (!files.includes(messageFile)) {
|
||||
console.error(`${threadDirPath} not contains message file`)
|
||||
return []
|
||||
}
|
||||
|
||||
const messageFilePath = join(threadDirPath, messageFile)
|
||||
if (!fs.existsSync(messageFilePath)) {
|
||||
if (!existsSync(messageFilePath)) {
|
||||
console.debug('message file not found')
|
||||
return []
|
||||
}
|
||||
|
||||
const lines = fs
|
||||
.readFileSync(messageFilePath, 'utf-8')
|
||||
const lines = readFileSync(messageFilePath, 'utf-8')
|
||||
.toString()
|
||||
.split('\n')
|
||||
.filter((line: any) => line !== '')
|
||||
@ -157,11 +165,11 @@ export const createThread = async (thread: any) => {
|
||||
const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
|
||||
const threadJsonPath = join(threadDirPath, threadMetadataFileName)
|
||||
|
||||
if (!fs.existsSync(threadDirPath)) {
|
||||
fs.mkdirSync(threadDirPath)
|
||||
if (!existsSync(threadDirPath)) {
|
||||
mkdirSync(threadDirPath)
|
||||
}
|
||||
|
||||
await fs.writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
|
||||
await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
|
||||
return updatedThread
|
||||
} catch (err) {
|
||||
return {
|
||||
@ -191,7 +199,7 @@ export const updateThread = async (threadId: string, thread: any) => {
|
||||
const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
|
||||
const threadJsonPath = join(threadDirPath, threadMetadataFileName)
|
||||
|
||||
await fs.writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
|
||||
await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
|
||||
return updatedThread
|
||||
} catch (err) {
|
||||
return {
|
||||
@ -233,10 +241,10 @@ export const createMessage = async (threadId: string, message: any) => {
|
||||
const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
|
||||
const threadMessagePath = join(threadDirPath, threadMessagesFileName)
|
||||
|
||||
if (!fs.existsSync(threadDirPath)) {
|
||||
fs.mkdirSync(threadDirPath)
|
||||
if (!existsSync(threadDirPath)) {
|
||||
mkdirSync(threadDirPath)
|
||||
}
|
||||
fs.appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
|
||||
appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
|
||||
return threadMessage
|
||||
} catch (err) {
|
||||
return {
|
||||
@ -259,8 +267,8 @@ export const downloadModel = async (
|
||||
}
|
||||
|
||||
const directoryPath = join(getJanDataFolderPath(), 'models', modelId)
|
||||
if (!fs.existsSync(directoryPath)) {
|
||||
fs.mkdirSync(directoryPath)
|
||||
if (!existsSync(directoryPath)) {
|
||||
mkdirSync(directoryPath)
|
||||
}
|
||||
|
||||
// path to model binary
|
||||
@ -281,7 +289,7 @@ export const downloadModel = async (
|
||||
.on('end', function () {
|
||||
console.debug('end')
|
||||
})
|
||||
.pipe(fs.createWriteStream(modelBinaryPath))
|
||||
.pipe(createWriteStream(modelBinaryPath))
|
||||
}
|
||||
|
||||
return {
|
||||
|
||||
@ -4,16 +4,43 @@ export type FileStat = {
|
||||
}
|
||||
|
||||
export type DownloadState = {
|
||||
modelId: string
|
||||
modelId: string // TODO: change to download id
|
||||
fileName: string
|
||||
time: DownloadTime
|
||||
speed: number
|
||||
percent: number
|
||||
|
||||
percent: number
|
||||
size: DownloadSize
|
||||
children?: DownloadState[]
|
||||
error?: string
|
||||
downloadState: 'downloading' | 'error' | 'end'
|
||||
children?: DownloadState[]
|
||||
|
||||
error?: string
|
||||
extensionId?: string
|
||||
downloadType?: DownloadType
|
||||
localPath?: string
|
||||
}
|
||||
|
||||
export type DownloadType = 'model' | 'extension'
|
||||
|
||||
export type DownloadRequest = {
|
||||
/**
|
||||
* The URL to download the file from.
|
||||
*/
|
||||
url: string
|
||||
|
||||
/**
|
||||
* The local path to save the file to.
|
||||
*/
|
||||
localPath: string
|
||||
|
||||
/**
|
||||
* The extension ID of the extension that initiated the download.
|
||||
*
|
||||
* Can be extension name.
|
||||
*/
|
||||
extensionId?: string
|
||||
|
||||
downloadType?: DownloadType
|
||||
}
|
||||
|
||||
type DownloadTime = {
|
||||
|
||||
8
core/src/types/miscellaneous/fileDownloadRequest.ts
Normal file
8
core/src/types/miscellaneous/fileDownloadRequest.ts
Normal file
@ -0,0 +1,8 @@
|
||||
export type FileDownloadRequest = {
|
||||
downloadId: string
|
||||
url: string
|
||||
localPath: string
|
||||
fileName: string
|
||||
displayName: string
|
||||
metadata: Record<string, string | number>
|
||||
}
|
||||
@ -1,3 +1,5 @@
|
||||
export * from './systemResourceInfo'
|
||||
export * from './promptTemplate'
|
||||
export * from './appUpdate'
|
||||
export * from './fileDownloadRequest'
|
||||
export * from './networkConfig'
|
||||
4
core/src/types/miscellaneous/networkConfig.ts
Normal file
4
core/src/types/miscellaneous/networkConfig.ts
Normal file
@ -0,0 +1,4 @@
|
||||
export type NetworkConfig = {
|
||||
proxy?: string
|
||||
ignoreSSL?: boolean
|
||||
}
|
||||
@ -2,3 +2,31 @@ export type SystemResourceInfo = {
|
||||
numCpuPhysicalCore: number
|
||||
memAvailable: number
|
||||
}
|
||||
|
||||
export type RunMode = 'cpu' | 'gpu'
|
||||
|
||||
export type GpuSetting = {
|
||||
notify: boolean
|
||||
run_mode: RunMode
|
||||
nvidia_driver: {
|
||||
exist: boolean
|
||||
version: string
|
||||
}
|
||||
cuda: {
|
||||
exist: boolean
|
||||
version: string
|
||||
}
|
||||
gpus: GpuSettingInfo[]
|
||||
gpu_highest_vram: string
|
||||
gpus_in_use: string[]
|
||||
is_initial: boolean
|
||||
// TODO: This needs to be set based on user toggle in settings
|
||||
vulkan: boolean
|
||||
}
|
||||
|
||||
export type GpuSettingInfo = {
|
||||
id: string
|
||||
vram: string
|
||||
name: string
|
||||
arch?: string
|
||||
}
|
||||
|
||||
@ -19,6 +19,7 @@ export enum InferenceEngine {
|
||||
nitro = 'nitro',
|
||||
openai = 'openai',
|
||||
triton_trtllm = 'triton_trtllm',
|
||||
nitro_tensorrt_llm = 'nitro-tensorrt-llm',
|
||||
|
||||
tool_retrieval_enabled = 'tool_retrieval_enabled',
|
||||
}
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import { GpuSetting } from '../miscellaneous'
|
||||
import { Model } from './modelEntity'
|
||||
|
||||
/**
|
||||
@ -10,7 +11,11 @@ export interface ModelInterface {
|
||||
* @param network - Optional object to specify proxy/whether to ignore SSL certificates.
|
||||
* @returns A Promise that resolves when the model has been downloaded.
|
||||
*/
|
||||
downloadModel(model: Model, network?: { ignoreSSL?: boolean; proxy?: string }): Promise<void>
|
||||
downloadModel(
|
||||
model: Model,
|
||||
gpuSettings?: GpuSetting,
|
||||
network?: { ignoreSSL?: boolean; proxy?: string }
|
||||
): Promise<void>
|
||||
|
||||
/**
|
||||
* Cancels the download of a specific model.
|
||||
|
||||
@ -1 +1,2 @@
|
||||
export * from './monitoringInterface'
|
||||
export * from './resourceInfo'
|
||||
|
||||
6
core/src/types/monitoring/resourceInfo.ts
Normal file
6
core/src/types/monitoring/resourceInfo.ts
Normal file
@ -0,0 +1,6 @@
|
||||
export type ResourceInfo = {
|
||||
mem: {
|
||||
totalMemory: number
|
||||
usedMemory: number
|
||||
}
|
||||
}
|
||||
@ -13,7 +13,7 @@
|
||||
"declarationDir": "dist/types",
|
||||
"outDir": "dist/lib",
|
||||
"importHelpers": true,
|
||||
"types": ["@types/jest"]
|
||||
"types": ["@types/jest"],
|
||||
},
|
||||
"include": ["src"]
|
||||
"include": ["src"],
|
||||
}
|
||||
|
||||
@ -13,6 +13,7 @@ import {
|
||||
events,
|
||||
DownloadEvent,
|
||||
log,
|
||||
DownloadRequest,
|
||||
} from '@janhq/core'
|
||||
import { ggufMetadata } from 'hyllama'
|
||||
|
||||
@ -148,7 +149,11 @@ export default class JanHuggingFaceExtension extends HuggingFaceExtension {
|
||||
|
||||
if (this.interrupted) return
|
||||
if (!(await fs.existsSync(localPath))) {
|
||||
downloadFile(url, localPath, network)
|
||||
const downloadRequest: DownloadRequest = {
|
||||
url,
|
||||
localPath,
|
||||
}
|
||||
downloadFile(downloadRequest, network)
|
||||
filePaths.push(filePath)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
@echo off
|
||||
set /p NITRO_VERSION=<./bin/version.txt
|
||||
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan && .\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin
|
||||
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
"license": "AGPL-3.0",
|
||||
"scripts": {
|
||||
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
||||
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro && download https://delta.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
|
||||
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
|
||||
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
|
||||
"downloadnitro:win32": "download.bat",
|
||||
"downloadnitro": "run-script-os",
|
||||
|
||||
@ -108,9 +108,6 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
|
||||
events.on(InferenceEvent.OnInferenceStopped, () =>
|
||||
this.onInferenceStopped()
|
||||
)
|
||||
|
||||
// Attempt to fetch nvidia info
|
||||
await executeOnMain(NODE, 'updateNvidiaInfo', {})
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -1,237 +0,0 @@
|
||||
import { writeFileSync, existsSync, readFileSync } from 'fs'
|
||||
import { exec, spawn } from 'child_process'
|
||||
import path from 'path'
|
||||
import { getJanDataFolderPath, log } from '@janhq/core/node'
|
||||
|
||||
/**
|
||||
* Default GPU settings
|
||||
* TODO: This needs to be refactored to support multiple accelerators
|
||||
**/
|
||||
const DEFALT_SETTINGS = {
|
||||
notify: true,
|
||||
run_mode: 'cpu',
|
||||
nvidia_driver: {
|
||||
exist: false,
|
||||
version: '',
|
||||
},
|
||||
cuda: {
|
||||
exist: false,
|
||||
version: '',
|
||||
},
|
||||
gpus: [],
|
||||
gpu_highest_vram: '',
|
||||
gpus_in_use: [],
|
||||
is_initial: true,
|
||||
// TODO: This needs to be set based on user toggle in settings
|
||||
vulkan: false
|
||||
}
|
||||
|
||||
/**
|
||||
* Path to the settings file
|
||||
**/
|
||||
export const GPU_INFO_FILE = path.join(
|
||||
getJanDataFolderPath(),
|
||||
'settings',
|
||||
'settings.json'
|
||||
)
|
||||
|
||||
/**
|
||||
* Current nitro process
|
||||
*/
|
||||
let nitroProcessInfo: NitroProcessInfo | undefined = undefined
|
||||
|
||||
/**
|
||||
* Nitro process info
|
||||
*/
|
||||
export interface NitroProcessInfo {
|
||||
isRunning: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* This will retrive GPU informations and persist settings.json
|
||||
* Will be called when the extension is loaded to turn on GPU acceleration if supported
|
||||
*/
|
||||
export async function updateNvidiaInfo() {
|
||||
if (process.platform !== 'darwin') {
|
||||
let data
|
||||
try {
|
||||
data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
||||
} catch (error) {
|
||||
data = DEFALT_SETTINGS
|
||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
|
||||
}
|
||||
updateNvidiaDriverInfo()
|
||||
updateGpuInfo()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve current nitro process
|
||||
*/
|
||||
export const getNitroProcessInfo = (subprocess: any): NitroProcessInfo => {
|
||||
nitroProcessInfo = {
|
||||
isRunning: subprocess != null,
|
||||
}
|
||||
return nitroProcessInfo
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate nvidia and cuda for linux and windows
|
||||
*/
|
||||
export async function updateNvidiaDriverInfo(): Promise<void> {
|
||||
exec(
|
||||
'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
|
||||
(error, stdout) => {
|
||||
let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
||||
|
||||
if (!error) {
|
||||
const firstLine = stdout.split('\n')[0].trim()
|
||||
data['nvidia_driver'].exist = true
|
||||
data['nvidia_driver'].version = firstLine
|
||||
} else {
|
||||
data['nvidia_driver'].exist = false
|
||||
}
|
||||
|
||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
|
||||
Promise.resolve()
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if file exists in paths
|
||||
*/
|
||||
export function checkFileExistenceInPaths(
|
||||
file: string,
|
||||
paths: string[]
|
||||
): boolean {
|
||||
return paths.some((p) => existsSync(path.join(p, file)))
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate cuda for linux and windows
|
||||
*/
|
||||
export function updateCudaExistence(
|
||||
data: Record<string, any> = DEFALT_SETTINGS
|
||||
): Record<string, any> {
|
||||
let filesCuda12: string[]
|
||||
let filesCuda11: string[]
|
||||
let paths: string[]
|
||||
let cudaVersion: string = ''
|
||||
|
||||
if (process.platform === 'win32') {
|
||||
filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
|
||||
filesCuda11 = ['cublas64_11.dll', 'cudart64_11.dll', 'cublasLt64_11.dll']
|
||||
paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
|
||||
} else {
|
||||
filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
|
||||
filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
|
||||
paths = process.env.LD_LIBRARY_PATH
|
||||
? process.env.LD_LIBRARY_PATH.split(path.delimiter)
|
||||
: []
|
||||
paths.push('/usr/lib/x86_64-linux-gnu/')
|
||||
}
|
||||
|
||||
let cudaExists = filesCuda12.every(
|
||||
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
|
||||
)
|
||||
|
||||
if (!cudaExists) {
|
||||
cudaExists = filesCuda11.every(
|
||||
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
|
||||
)
|
||||
if (cudaExists) {
|
||||
cudaVersion = '11'
|
||||
}
|
||||
} else {
|
||||
cudaVersion = '12'
|
||||
}
|
||||
|
||||
data['cuda'].exist = cudaExists
|
||||
data['cuda'].version = cudaVersion
|
||||
console.debug(data['is_initial'], data['gpus_in_use'])
|
||||
if (cudaExists && data['is_initial'] && data['gpus_in_use'].length > 0) {
|
||||
data.run_mode = 'gpu'
|
||||
}
|
||||
data.is_initial = false
|
||||
return data
|
||||
}
|
||||
|
||||
/**
|
||||
* Get GPU information
|
||||
*/
|
||||
export async function updateGpuInfo(): Promise<void> {
|
||||
let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
||||
|
||||
// Cuda
|
||||
if (data['vulkan'] === true) {
|
||||
// Vulkan
|
||||
exec(
|
||||
process.platform === 'win32'
|
||||
? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
|
||||
: `${__dirname}/../bin/vulkaninfo --summary`,
|
||||
(error, stdout) => {
|
||||
if (!error) {
|
||||
const output = stdout.toString()
|
||||
log(output)
|
||||
const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
|
||||
|
||||
let gpus = []
|
||||
let match
|
||||
while ((match = gpuRegex.exec(output)) !== null) {
|
||||
const id = match[1]
|
||||
const name = match[2]
|
||||
gpus.push({ id, vram: 0, name })
|
||||
}
|
||||
data.gpus = gpus
|
||||
|
||||
if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
|
||||
data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
|
||||
}
|
||||
|
||||
data = updateCudaExistence(data)
|
||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
|
||||
}
|
||||
Promise.resolve()
|
||||
}
|
||||
)
|
||||
} else {
|
||||
exec(
|
||||
'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
|
||||
(error, stdout) => {
|
||||
if (!error) {
|
||||
log(stdout)
|
||||
// Get GPU info and gpu has higher memory first
|
||||
let highestVram = 0
|
||||
let highestVramId = '0'
|
||||
let gpus = stdout
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map((line) => {
|
||||
let [id, vram, name] = line.split(', ')
|
||||
vram = vram.replace(/\r/g, '')
|
||||
if (parseFloat(vram) > highestVram) {
|
||||
highestVram = parseFloat(vram)
|
||||
highestVramId = id
|
||||
}
|
||||
return { id, vram, name }
|
||||
})
|
||||
|
||||
data.gpus = gpus
|
||||
data.gpu_highest_vram = highestVramId
|
||||
} else {
|
||||
data.gpus = []
|
||||
data.gpu_highest_vram = ''
|
||||
}
|
||||
|
||||
if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
|
||||
data.gpus_in_use = [data['gpu_highest_vram']]
|
||||
}
|
||||
|
||||
data = updateCudaExistence(data)
|
||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
|
||||
Promise.resolve()
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
@ -1,12 +1,19 @@
|
||||
import { getJanDataFolderPath } from '@janhq/core/node'
|
||||
import { readFileSync } from 'fs'
|
||||
import * as path from 'path'
|
||||
import { GPU_INFO_FILE } from './accelerator'
|
||||
|
||||
export interface NitroExecutableOptions {
|
||||
executablePath: string
|
||||
cudaVisibleDevices: string
|
||||
vkVisibleDevices: string
|
||||
}
|
||||
|
||||
export const GPU_INFO_FILE = path.join(
|
||||
getJanDataFolderPath(),
|
||||
'settings',
|
||||
'settings.json'
|
||||
)
|
||||
|
||||
/**
|
||||
* Find which executable file to run based on the current platform.
|
||||
* @returns The name of the executable file to run.
|
||||
|
||||
@ -4,7 +4,6 @@ import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
|
||||
import tcpPortUsed from 'tcp-port-used'
|
||||
import fetchRT from 'fetch-retry'
|
||||
import { log, getSystemResourceInfo } from '@janhq/core/node'
|
||||
import { getNitroProcessInfo, updateNvidiaInfo } from './accelerator'
|
||||
import {
|
||||
Model,
|
||||
InferenceEngine,
|
||||
@ -385,11 +384,26 @@ function dispose() {
|
||||
killSubprocess()
|
||||
}
|
||||
|
||||
/**
|
||||
* Nitro process info
|
||||
*/
|
||||
export interface NitroProcessInfo {
|
||||
isRunning: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve current nitro process
|
||||
*/
|
||||
const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
|
||||
return {
|
||||
isRunning: subprocess != null,
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
runModel,
|
||||
stopModel,
|
||||
killSubprocess,
|
||||
dispose,
|
||||
updateNvidiaInfo,
|
||||
getCurrentNitroProcessInfo: () => getNitroProcessInfo(subprocess),
|
||||
getCurrentNitroProcessInfo,
|
||||
}
|
||||
|
||||
@ -17,6 +17,8 @@ import {
|
||||
ImportingModel,
|
||||
LocalImportModelEvent,
|
||||
baseName,
|
||||
GpuSetting,
|
||||
DownloadRequest,
|
||||
} from '@janhq/core'
|
||||
|
||||
import { extractFileName } from './helpers/path'
|
||||
@ -29,10 +31,14 @@ export default class JanModelExtension extends ModelExtension {
|
||||
private static readonly _modelMetadataFileName = 'model.json'
|
||||
private static readonly _supportedModelFormat = '.gguf'
|
||||
private static readonly _incompletedModelFileName = '.download'
|
||||
private static readonly _offlineInferenceEngine = InferenceEngine.nitro
|
||||
|
||||
private static readonly _offlineInferenceEngine = [
|
||||
InferenceEngine.nitro,
|
||||
InferenceEngine.nitro_tensorrt_llm,
|
||||
]
|
||||
private static readonly _tensorRtEngineFormat = '.engine'
|
||||
private static readonly _configDirName = 'config'
|
||||
private static readonly _defaultModelFileName = 'default-model.json'
|
||||
private static readonly _supportedGpuArch = ['turing', 'ampere', 'ada']
|
||||
|
||||
/**
|
||||
* Called when the extension is loaded.
|
||||
@ -89,12 +95,52 @@ export default class JanModelExtension extends ModelExtension {
|
||||
*/
|
||||
async downloadModel(
|
||||
model: Model,
|
||||
gpuSettings?: GpuSetting,
|
||||
network?: { ignoreSSL?: boolean; proxy?: string }
|
||||
): Promise<void> {
|
||||
// create corresponding directory
|
||||
const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id])
|
||||
if (!(await fs.existsSync(modelDirPath))) await fs.mkdirSync(modelDirPath)
|
||||
|
||||
if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
|
||||
if (!gpuSettings || gpuSettings.gpus.length === 0) {
|
||||
console.error('No GPU found. Please check your GPU setting.')
|
||||
return
|
||||
}
|
||||
const firstGpu = gpuSettings.gpus[0]
|
||||
if (!firstGpu.name.toLowerCase().includes('nvidia')) {
|
||||
console.error('No Nvidia GPU found. Please check your GPU setting.')
|
||||
return
|
||||
}
|
||||
const gpuArch = firstGpu.arch
|
||||
if (gpuArch === undefined) {
|
||||
console.error(
|
||||
'No GPU architecture found. Please check your GPU setting.'
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
if (!JanModelExtension._supportedGpuArch.includes(gpuArch)) {
|
||||
console.error(
|
||||
`Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
const os = 'windows' // TODO: remove this hard coded value
|
||||
|
||||
const newSources = model.sources.map((source) => {
|
||||
const newSource = { ...source }
|
||||
newSource.url = newSource.url
|
||||
.replace(/<os>/g, os)
|
||||
.replace(/<gpuarch>/g, gpuArch)
|
||||
return newSource
|
||||
})
|
||||
model.sources = newSources
|
||||
}
|
||||
|
||||
console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
|
||||
|
||||
if (model.sources.length > 1) {
|
||||
// path to model binaries
|
||||
for (const source of model.sources) {
|
||||
@ -105,8 +151,11 @@ export default class JanModelExtension extends ModelExtension {
|
||||
if (source.filename) {
|
||||
path = await joinPath([modelDirPath, source.filename])
|
||||
}
|
||||
|
||||
downloadFile(source.url, path, network)
|
||||
const downloadRequest: DownloadRequest = {
|
||||
url: source.url,
|
||||
localPath: path,
|
||||
}
|
||||
downloadFile(downloadRequest, network)
|
||||
}
|
||||
// TODO: handle multiple binaries for web later
|
||||
} else {
|
||||
@ -115,7 +164,11 @@ export default class JanModelExtension extends ModelExtension {
|
||||
JanModelExtension._supportedModelFormat
|
||||
)
|
||||
const path = await joinPath([modelDirPath, fileName])
|
||||
downloadFile(model.sources[0]?.url, path, network)
|
||||
const downloadRequest: DownloadRequest = {
|
||||
url: model.sources[0]?.url,
|
||||
localPath: path,
|
||||
}
|
||||
downloadFile(downloadRequest, network)
|
||||
|
||||
if (window && window.core?.api && window.core.api.baseApiUrl) {
|
||||
this.startPollingDownloadProgress(model.id)
|
||||
@ -238,7 +291,7 @@ export default class JanModelExtension extends ModelExtension {
|
||||
async getDownloadedModels(): Promise<Model[]> {
|
||||
return await this.getModelsMetadata(
|
||||
async (modelDir: string, model: Model) => {
|
||||
if (model.engine !== JanModelExtension._offlineInferenceEngine)
|
||||
if (!JanModelExtension._offlineInferenceEngine.includes(model.engine))
|
||||
return true
|
||||
|
||||
// model binaries (sources) are absolute path & exist
|
||||
@ -247,22 +300,32 @@ export default class JanModelExtension extends ModelExtension {
|
||||
)
|
||||
if (existFiles.every((exist) => exist)) return true
|
||||
|
||||
return await fs
|
||||
const result = await fs
|
||||
.readdirSync(await joinPath([JanModelExtension._homeDir, modelDir]))
|
||||
.then((files: string[]) => {
|
||||
// Model binary exists in the directory
|
||||
// Model binary name can match model ID or be a .gguf file and not be an incompleted model file
|
||||
return (
|
||||
files.includes(modelDir) ||
|
||||
files.filter(
|
||||
(file) =>
|
||||
files.filter((file) => {
|
||||
if (
|
||||
file.endsWith(JanModelExtension._incompletedModelFileName)
|
||||
) {
|
||||
return false
|
||||
}
|
||||
return (
|
||||
file
|
||||
.toLowerCase()
|
||||
.includes(JanModelExtension._supportedModelFormat) &&
|
||||
!file.endsWith(JanModelExtension._incompletedModelFileName)
|
||||
)?.length >= model.sources.length
|
||||
.includes(JanModelExtension._supportedModelFormat) ||
|
||||
file
|
||||
.toLowerCase()
|
||||
.includes(JanModelExtension._tensorRtEngineFormat)
|
||||
)
|
||||
})?.length > 0 // TODO: NamH find better way (can use basename to check the file name with source url)
|
||||
)
|
||||
})
|
||||
|
||||
return result
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
0
extensions/monitoring-extension/bin/.gitkeep
Normal file
0
extensions/monitoring-extension/bin/.gitkeep
Normal file
2
extensions/monitoring-extension/download.bat
Normal file
2
extensions/monitoring-extension/download.bat
Normal file
@ -0,0 +1,2 @@
|
||||
@echo off
|
||||
.\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin
|
||||
@ -3,21 +3,40 @@
|
||||
"version": "1.0.10",
|
||||
"description": "This extension provides system health and OS level data",
|
||||
"main": "dist/index.js",
|
||||
"module": "dist/module.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
"license": "AGPL-3.0",
|
||||
"scripts": {
|
||||
"build": "tsc -b . && webpack --config webpack.config.js",
|
||||
"build": "tsc --module commonjs && rollup -c rollup.config.ts && npm run download-artifacts",
|
||||
"download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
|
||||
"download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
|
||||
"download-artifacts:win32": "download.bat",
|
||||
"download-artifacts:linux": "download https://delta.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
|
||||
"build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../pre-install"
|
||||
},
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
"./main": "./dist/node/index.cjs.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@rollup/plugin-commonjs": "^25.0.7",
|
||||
"@rollup/plugin-json": "^6.1.0",
|
||||
"@rollup/plugin-node-resolve": "^15.2.3",
|
||||
"@types/node": "^20.11.4",
|
||||
"@types/node-os-utils": "^1.3.4",
|
||||
"run-script-os": "^1.1.6",
|
||||
"cpx": "^1.5.0",
|
||||
"rimraf": "^3.0.2",
|
||||
"webpack": "^5.88.2",
|
||||
"webpack-cli": "^5.1.4",
|
||||
"ts-loader": "^9.5.0"
|
||||
"rollup": "^2.38.5",
|
||||
"rollup-plugin-define": "^1.0.1",
|
||||
"rollup-plugin-sourcemaps": "^0.6.3",
|
||||
"rollup-plugin-typescript2": "^0.36.0",
|
||||
"typescript": "^5.3.3",
|
||||
"download-cli": "^1.1.1"
|
||||
},
|
||||
"dependencies": {
|
||||
"@janhq/core": "file:../../core",
|
||||
"@rollup/plugin-replace": "^5.0.5",
|
||||
"node-os-utils": "^1.3.7"
|
||||
},
|
||||
"files": [
|
||||
|
||||
68
extensions/monitoring-extension/rollup.config.ts
Normal file
68
extensions/monitoring-extension/rollup.config.ts
Normal file
@ -0,0 +1,68 @@
|
||||
import resolve from '@rollup/plugin-node-resolve'
|
||||
import commonjs from '@rollup/plugin-commonjs'
|
||||
import sourceMaps from 'rollup-plugin-sourcemaps'
|
||||
import typescript from 'rollup-plugin-typescript2'
|
||||
import json from '@rollup/plugin-json'
|
||||
import replace from '@rollup/plugin-replace'
|
||||
const packageJson = require('./package.json')
|
||||
|
||||
export default [
|
||||
{
|
||||
input: `src/index.ts`,
|
||||
output: [{ file: packageJson.main, format: 'es', sourcemap: true }],
|
||||
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
|
||||
external: [],
|
||||
watch: {
|
||||
include: 'src/**',
|
||||
},
|
||||
plugins: [
|
||||
replace({
|
||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||
}),
|
||||
// Allow json resolution
|
||||
json(),
|
||||
// Compile TypeScript files
|
||||
typescript({ useTsconfigDeclarationDir: true }),
|
||||
// Compile TypeScript files
|
||||
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
|
||||
commonjs(),
|
||||
// Allow node_modules resolution, so you can use 'external' to control
|
||||
// which external modules to include in the bundle
|
||||
// https://github.com/rollup/rollup-plugin-node-resolve#usage
|
||||
resolve({
|
||||
extensions: ['.js', '.ts', '.svelte'],
|
||||
}),
|
||||
|
||||
// Resolve source maps to the original source
|
||||
sourceMaps(),
|
||||
],
|
||||
},
|
||||
{
|
||||
input: `src/node/index.ts`,
|
||||
output: [
|
||||
{ file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
|
||||
],
|
||||
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
|
||||
external: ['@janhq/core/node'],
|
||||
watch: {
|
||||
include: 'src/node/**',
|
||||
},
|
||||
plugins: [
|
||||
// Allow json resolution
|
||||
json(),
|
||||
// Compile TypeScript files
|
||||
typescript({ useTsconfigDeclarationDir: true }),
|
||||
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
|
||||
commonjs(),
|
||||
// Allow node_modules resolution, so you can use 'external' to control
|
||||
// which external modules to include in the bundle
|
||||
// https://github.com/rollup/rollup-plugin-node-resolve#usage
|
||||
resolve({
|
||||
extensions: ['.ts', '.js', '.json'],
|
||||
}),
|
||||
|
||||
// Resolve source maps to the original source
|
||||
sourceMaps(),
|
||||
],
|
||||
},
|
||||
]
|
||||
@ -1 +1,18 @@
|
||||
declare const MODULE: string
|
||||
declare const NODE: string
|
||||
|
||||
type CpuGpuInfo = {
|
||||
cpu: {
|
||||
usage: number
|
||||
}
|
||||
gpu: GpuInfo[]
|
||||
}
|
||||
|
||||
type GpuInfo = {
|
||||
id: string
|
||||
name: string
|
||||
temperature: string
|
||||
utilization: string
|
||||
memoryTotal: string
|
||||
memoryFree: string
|
||||
memoryUtilization: string
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { MonitoringExtension, executeOnMain } from '@janhq/core'
|
||||
import { GpuSetting, MonitoringExtension, executeOnMain } from '@janhq/core'
|
||||
|
||||
/**
|
||||
* JanMonitoringExtension is a extension that provides system monitoring functionality.
|
||||
@ -8,19 +8,30 @@ export default class JanMonitoringExtension extends MonitoringExtension {
|
||||
/**
|
||||
* Called when the extension is loaded.
|
||||
*/
|
||||
async onLoad() {}
|
||||
async onLoad() {
|
||||
// Attempt to fetch nvidia info
|
||||
await executeOnMain(NODE, 'updateNvidiaInfo')
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when the extension is unloaded.
|
||||
*/
|
||||
onUnload(): void {}
|
||||
|
||||
/**
|
||||
* Returns the GPU configuration.
|
||||
* @returns A Promise that resolves to an object containing the GPU configuration.
|
||||
*/
|
||||
async getGpuSetting(): Promise<GpuSetting | undefined> {
|
||||
return executeOnMain(NODE, 'getGpuConfig')
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns information about the system resources.
|
||||
* @returns A Promise that resolves to an object containing information about the system resources.
|
||||
*/
|
||||
getResourcesInfo(): Promise<any> {
|
||||
return executeOnMain(MODULE, 'getResourcesInfo')
|
||||
return executeOnMain(NODE, 'getResourcesInfo')
|
||||
}
|
||||
|
||||
/**
|
||||
@ -28,6 +39,6 @@ export default class JanMonitoringExtension extends MonitoringExtension {
|
||||
* @returns A Promise that resolves to an object containing information about the current system load.
|
||||
*/
|
||||
getCurrentLoad(): Promise<any> {
|
||||
return executeOnMain(MODULE, 'getCurrentLoad')
|
||||
return executeOnMain(NODE, 'getCurrentLoad')
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,92 +0,0 @@
|
||||
const nodeOsUtils = require('node-os-utils')
|
||||
const getJanDataFolderPath = require('@janhq/core/node').getJanDataFolderPath
|
||||
const path = require('path')
|
||||
const { readFileSync } = require('fs')
|
||||
const exec = require('child_process').exec
|
||||
|
||||
const NVIDIA_INFO_FILE = path.join(
|
||||
getJanDataFolderPath(),
|
||||
'settings',
|
||||
'settings.json'
|
||||
)
|
||||
|
||||
const getResourcesInfo = () =>
|
||||
new Promise((resolve) => {
|
||||
nodeOsUtils.mem.used().then((ramUsedInfo) => {
|
||||
const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
|
||||
const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
|
||||
const response = {
|
||||
mem: {
|
||||
totalMemory,
|
||||
usedMemory,
|
||||
},
|
||||
}
|
||||
resolve(response)
|
||||
})
|
||||
})
|
||||
|
||||
const getCurrentLoad = () =>
|
||||
new Promise((resolve, reject) => {
|
||||
nodeOsUtils.cpu.usage().then((cpuPercentage) => {
|
||||
let data = {
|
||||
run_mode: 'cpu',
|
||||
gpus_in_use: [],
|
||||
}
|
||||
if (process.platform !== 'darwin') {
|
||||
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8'))
|
||||
}
|
||||
if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
|
||||
const gpuIds = data['gpus_in_use'].join(',')
|
||||
if (gpuIds !== '' && data['vulkan'] !== true) {
|
||||
exec(
|
||||
`nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
|
||||
(error, stdout, _) => {
|
||||
if (error) {
|
||||
console.error(`exec error: ${error}`)
|
||||
reject(error)
|
||||
return
|
||||
}
|
||||
const gpuInfo = stdout
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map((line) => {
|
||||
const [
|
||||
id,
|
||||
name,
|
||||
temperature,
|
||||
utilization,
|
||||
memoryTotal,
|
||||
memoryFree,
|
||||
memoryUtilization,
|
||||
] = line.split(', ').map((item) => item.replace(/\r/g, ''))
|
||||
return {
|
||||
id,
|
||||
name,
|
||||
temperature,
|
||||
utilization,
|
||||
memoryTotal,
|
||||
memoryFree,
|
||||
memoryUtilization,
|
||||
}
|
||||
})
|
||||
resolve({
|
||||
cpu: { usage: cpuPercentage },
|
||||
gpu: gpuInfo,
|
||||
})
|
||||
}
|
||||
)
|
||||
} else {
|
||||
// Handle the case where gpuIds is empty
|
||||
resolve({ cpu: { usage: cpuPercentage }, gpu: [] })
|
||||
}
|
||||
} else {
|
||||
// Handle the case where run_mode is not 'gpu' or no GPUs are in use
|
||||
resolve({ cpu: { usage: cpuPercentage }, gpu: [] })
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
module.exports = {
|
||||
getResourcesInfo,
|
||||
getCurrentLoad,
|
||||
}
|
||||
317
extensions/monitoring-extension/src/node/index.ts
Normal file
317
extensions/monitoring-extension/src/node/index.ts
Normal file
@ -0,0 +1,317 @@
|
||||
import { GpuSetting, GpuSettingInfo, ResourceInfo } from '@janhq/core'
|
||||
import { getJanDataFolderPath, log } from '@janhq/core/node'
|
||||
import { mem, cpu } from 'node-os-utils'
|
||||
import { exec } from 'child_process'
|
||||
import { writeFileSync, existsSync, readFileSync } from 'fs'
|
||||
import path from 'path'
|
||||
|
||||
/**
|
||||
* Path to the settings file
|
||||
**/
|
||||
export const GPU_INFO_FILE = path.join(
|
||||
getJanDataFolderPath(),
|
||||
'settings',
|
||||
'settings.json'
|
||||
)
|
||||
|
||||
/**
|
||||
* Default GPU settings
|
||||
* TODO: This needs to be refactored to support multiple accelerators
|
||||
**/
|
||||
const DEFAULT_SETTINGS: GpuSetting = {
|
||||
notify: true,
|
||||
run_mode: 'cpu',
|
||||
nvidia_driver: {
|
||||
exist: false,
|
||||
version: '',
|
||||
},
|
||||
cuda: {
|
||||
exist: false,
|
||||
version: '',
|
||||
},
|
||||
gpus: [],
|
||||
gpu_highest_vram: '',
|
||||
gpus_in_use: [],
|
||||
is_initial: true,
|
||||
// TODO: This needs to be set based on user toggle in settings
|
||||
vulkan: false,
|
||||
}
|
||||
|
||||
export const getGpuConfig = async (): Promise<GpuSetting | undefined> => {
|
||||
if (process.platform === 'darwin') return undefined
|
||||
return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
||||
}
|
||||
|
||||
export const getResourcesInfo = async (): Promise<ResourceInfo> => {
|
||||
const ramUsedInfo = await mem.used()
|
||||
const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
|
||||
const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
|
||||
|
||||
const resourceInfo: ResourceInfo = {
|
||||
mem: {
|
||||
totalMemory,
|
||||
usedMemory,
|
||||
},
|
||||
}
|
||||
|
||||
return resourceInfo
|
||||
}
|
||||
|
||||
export const getCurrentLoad = () =>
|
||||
new Promise<CpuGpuInfo>(async (resolve, reject) => {
|
||||
const cpuPercentage = await cpu.usage()
|
||||
let data = {
|
||||
run_mode: 'cpu',
|
||||
gpus_in_use: [],
|
||||
}
|
||||
|
||||
if (process.platform !== 'darwin') {
|
||||
data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
||||
}
|
||||
|
||||
if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
|
||||
const gpuIds = data.gpus_in_use.join(',')
|
||||
if (gpuIds !== '' && data['vulkan'] !== true) {
|
||||
exec(
|
||||
`nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
|
||||
(error, stdout, _) => {
|
||||
if (error) {
|
||||
console.error(`exec error: ${error}`)
|
||||
throw new Error(error.message)
|
||||
}
|
||||
const gpuInfo: GpuInfo[] = stdout
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map((line) => {
|
||||
const [
|
||||
id,
|
||||
name,
|
||||
temperature,
|
||||
utilization,
|
||||
memoryTotal,
|
||||
memoryFree,
|
||||
memoryUtilization,
|
||||
] = line.split(', ').map((item) => item.replace(/\r/g, ''))
|
||||
return {
|
||||
id,
|
||||
name,
|
||||
temperature,
|
||||
utilization,
|
||||
memoryTotal,
|
||||
memoryFree,
|
||||
memoryUtilization,
|
||||
}
|
||||
})
|
||||
|
||||
resolve({
|
||||
cpu: { usage: cpuPercentage },
|
||||
gpu: gpuInfo,
|
||||
})
|
||||
}
|
||||
)
|
||||
} else {
|
||||
// Handle the case where gpuIds is empty
|
||||
resolve({
|
||||
cpu: { usage: cpuPercentage },
|
||||
gpu: [],
|
||||
})
|
||||
}
|
||||
} else {
|
||||
// Handle the case where run_mode is not 'gpu' or no GPUs are in use
|
||||
resolve({
|
||||
cpu: { usage: cpuPercentage },
|
||||
gpu: [],
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* This will retrive GPU informations and persist settings.json
|
||||
* Will be called when the extension is loaded to turn on GPU acceleration if supported
|
||||
*/
|
||||
export const updateNvidiaInfo = async () => {
|
||||
// ignore if macos
|
||||
if (process.platform === 'darwin') return
|
||||
|
||||
try {
|
||||
JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
||||
} catch (error) {
|
||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
|
||||
}
|
||||
|
||||
await updateNvidiaDriverInfo()
|
||||
await updateGpuInfo()
|
||||
}
|
||||
|
||||
const updateNvidiaDriverInfo = async () =>
|
||||
new Promise((resolve, reject) => {
|
||||
exec(
|
||||
'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
|
||||
(error, stdout) => {
|
||||
const data: GpuSetting = JSON.parse(
|
||||
readFileSync(GPU_INFO_FILE, 'utf-8')
|
||||
)
|
||||
|
||||
if (!error) {
|
||||
const firstLine = stdout.split('\n')[0].trim()
|
||||
data.nvidia_driver.exist = true
|
||||
data.nvidia_driver.version = firstLine
|
||||
} else {
|
||||
data.nvidia_driver.exist = false
|
||||
}
|
||||
|
||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
|
||||
resolve({})
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
const getGpuArch = (gpuName: string): string => {
|
||||
if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
|
||||
|
||||
if (gpuName.includes('20')) return 'turing'
|
||||
else if (gpuName.includes('30')) return 'ampere'
|
||||
else if (gpuName.includes('40')) return 'ada'
|
||||
else return 'unknown'
|
||||
}
|
||||
|
||||
const updateGpuInfo = async () =>
|
||||
new Promise((resolve, reject) => {
|
||||
let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
|
||||
|
||||
// Cuda
|
||||
if (data.vulkan === true) {
|
||||
// Vulkan
|
||||
exec(
|
||||
process.platform === 'win32'
|
||||
? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
|
||||
: `${__dirname}/../bin/vulkaninfo --summary`,
|
||||
(error, stdout) => {
|
||||
if (!error) {
|
||||
const output = stdout.toString()
|
||||
|
||||
log(output)
|
||||
const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
|
||||
|
||||
const gpus: GpuSettingInfo[] = []
|
||||
let match
|
||||
while ((match = gpuRegex.exec(output)) !== null) {
|
||||
const id = match[1]
|
||||
const name = match[2]
|
||||
const arch = getGpuArch(name)
|
||||
gpus.push({ id, vram: '0', name, arch })
|
||||
}
|
||||
data.gpus = gpus
|
||||
|
||||
if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
|
||||
data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
|
||||
}
|
||||
|
||||
data = updateCudaExistence(data)
|
||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
|
||||
resolve({})
|
||||
} else {
|
||||
reject(error)
|
||||
}
|
||||
}
|
||||
)
|
||||
} else {
|
||||
exec(
|
||||
'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
|
||||
(error, stdout) => {
|
||||
if (!error) {
|
||||
log(stdout)
|
||||
// Get GPU info and gpu has higher memory first
|
||||
let highestVram = 0
|
||||
let highestVramId = '0'
|
||||
const gpus: GpuSettingInfo[] = stdout
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map((line) => {
|
||||
let [id, vram, name] = line.split(', ')
|
||||
const arch = getGpuArch(name)
|
||||
vram = vram.replace(/\r/g, '')
|
||||
if (parseFloat(vram) > highestVram) {
|
||||
highestVram = parseFloat(vram)
|
||||
highestVramId = id
|
||||
}
|
||||
return { id, vram, name, arch }
|
||||
})
|
||||
|
||||
data.gpus = gpus
|
||||
data.gpu_highest_vram = highestVramId
|
||||
} else {
|
||||
data.gpus = []
|
||||
data.gpu_highest_vram = ''
|
||||
}
|
||||
|
||||
if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
|
||||
data.gpus_in_use = [data.gpu_highest_vram]
|
||||
}
|
||||
|
||||
data = updateCudaExistence(data)
|
||||
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
|
||||
resolve({})
|
||||
}
|
||||
)
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* Check if file exists in paths
|
||||
*/
|
||||
const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
|
||||
return paths.some((p) => existsSync(path.join(p, file)))
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate cuda for linux and windows
|
||||
*/
|
||||
const updateCudaExistence = (
|
||||
data: GpuSetting = DEFAULT_SETTINGS
|
||||
): GpuSetting => {
|
||||
let filesCuda12: string[]
|
||||
let filesCuda11: string[]
|
||||
let paths: string[]
|
||||
let cudaVersion: string = ''
|
||||
|
||||
if (process.platform === 'win32') {
|
||||
filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
|
||||
filesCuda11 = ['cublas64_11.dll', 'cudart64_11.dll', 'cublasLt64_11.dll']
|
||||
paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
|
||||
} else {
|
||||
filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
|
||||
filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
|
||||
paths = process.env.LD_LIBRARY_PATH
|
||||
? process.env.LD_LIBRARY_PATH.split(path.delimiter)
|
||||
: []
|
||||
paths.push('/usr/lib/x86_64-linux-gnu/')
|
||||
}
|
||||
|
||||
let cudaExists = filesCuda12.every(
|
||||
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
|
||||
)
|
||||
|
||||
if (!cudaExists) {
|
||||
cudaExists = filesCuda11.every(
|
||||
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
|
||||
)
|
||||
if (cudaExists) {
|
||||
cudaVersion = '11'
|
||||
}
|
||||
} else {
|
||||
cudaVersion = '12'
|
||||
}
|
||||
|
||||
data.cuda.exist = cudaExists
|
||||
data.cuda.version = cudaVersion
|
||||
|
||||
console.debug(data.is_initial, data.gpus_in_use)
|
||||
|
||||
if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
|
||||
data.run_mode = 'gpu'
|
||||
}
|
||||
|
||||
data.is_initial = false
|
||||
return data
|
||||
}
|
||||
@ -1,35 +0,0 @@
|
||||
const path = require('path')
|
||||
const webpack = require('webpack')
|
||||
const packageJson = require('./package.json')
|
||||
|
||||
module.exports = {
|
||||
experiments: { outputModule: true },
|
||||
entry: './src/index.ts', // Adjust the entry point to match your project's main file
|
||||
mode: 'production',
|
||||
module: {
|
||||
rules: [
|
||||
{
|
||||
test: /\.tsx?$/,
|
||||
use: 'ts-loader',
|
||||
exclude: /node_modules/,
|
||||
},
|
||||
],
|
||||
},
|
||||
output: {
|
||||
filename: 'index.js', // Adjust the output file name as needed
|
||||
path: path.resolve(__dirname, 'dist'),
|
||||
library: { type: 'module' }, // Specify ESM output format
|
||||
},
|
||||
plugins: [
|
||||
new webpack.DefinePlugin({
|
||||
MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
|
||||
}),
|
||||
],
|
||||
resolve: {
|
||||
extensions: ['.ts', '.js'],
|
||||
},
|
||||
optimization: {
|
||||
minimize: false,
|
||||
},
|
||||
// Add loaders and other configuration as needed for your project
|
||||
}
|
||||
79
extensions/tensorrt-llm-extension/README.md
Normal file
79
extensions/tensorrt-llm-extension/README.md
Normal file
@ -0,0 +1,79 @@
|
||||
# Tensorrt-LLM Extension
|
||||
|
||||
Created using Jan extension example
|
||||
|
||||
# Create a Jan Extension using Typescript
|
||||
|
||||
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
|
||||
|
||||
## Create Your Own Extension
|
||||
|
||||
To create your own extension, you can use this repository as a template! Just follow the below instructions:
|
||||
|
||||
1. Click the Use this template button at the top of the repository
|
||||
2. Select Create a new repository
|
||||
3. Select an owner and name for your new repository
|
||||
4. Click Create repository
|
||||
5. Clone your new repository
|
||||
|
||||
## Initial Setup
|
||||
|
||||
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
|
||||
|
||||
> [!NOTE]
|
||||
>
|
||||
> You'll need to have a reasonably modern version of
|
||||
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
|
||||
> [`nodenv`](https://github.com/nodenv/nodenv) or
|
||||
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
|
||||
> root of your repository to install the version specified in
|
||||
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
|
||||
|
||||
1. :hammer_and_wrench: Install the dependencies
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
1. :building_construction: Package the TypeScript for distribution
|
||||
|
||||
```bash
|
||||
npm run bundle
|
||||
```
|
||||
|
||||
1. :white_check_mark: Check your artifact
|
||||
|
||||
There will be a tgz file in your extension directory now
|
||||
|
||||
## Update the Extension Metadata
|
||||
|
||||
The [`package.json`](package.json) file defines metadata about your extension, such as
|
||||
extension name, main entry, description and version.
|
||||
|
||||
When you copy this repository, update `package.json` with the name, description for your extension.
|
||||
|
||||
## Update the Extension Code
|
||||
|
||||
The [`src/`](./src/) directory is the heart of your extension! This contains the
|
||||
source code that will be run when your extension functions are invoked. You can replace the
|
||||
contents of this directory with your own code.
|
||||
|
||||
There are a few things to keep in mind when writing your extension code:
|
||||
|
||||
- Most Jan Extension functions are processed asynchronously.
|
||||
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
|
||||
|
||||
```typescript
|
||||
import { events, MessageEvent, MessageRequest } from '@janhq/core'
|
||||
|
||||
function onStart(): Promise<any> {
|
||||
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
|
||||
this.inference(data)
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
For more information about the Jan Extension Core module, see the
|
||||
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
|
||||
|
||||
So, what are you waiting for? Go ahead and start customizing your extension!
|
||||
49
extensions/tensorrt-llm-extension/models.json
Normal file
49
extensions/tensorrt-llm-extension/models.json
Normal file
@ -0,0 +1,49 @@
|
||||
[
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "config.json",
|
||||
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/config.json"
|
||||
},
|
||||
{
|
||||
"filename": "rank0.engine",
|
||||
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/rank0.engine"
|
||||
},
|
||||
{
|
||||
"filename": "tokenizer.model",
|
||||
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
|
||||
},
|
||||
{
|
||||
"filename": "special_tokens_map.json",
|
||||
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
|
||||
},
|
||||
{
|
||||
"filename": "tokenizer.json",
|
||||
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
|
||||
},
|
||||
{
|
||||
"filename": "tokenizer_config.json",
|
||||
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
|
||||
}
|
||||
],
|
||||
"id": "llamacorn-1.1b-chat-fp16",
|
||||
"object": "model",
|
||||
"name": "LlamaCorn 1.1B Chat FP16",
|
||||
"version": "1.0",
|
||||
"description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM",
|
||||
"format": "TensorRT-LLM",
|
||||
"settings": {
|
||||
"ctx_len": 2048
|
||||
},
|
||||
"parameters": {
|
||||
"stream": true,
|
||||
"max_tokens": 4096
|
||||
},
|
||||
"metadata": {
|
||||
"author": "LLama",
|
||||
"tags": ["TensorRT-LLM", "1B", "Finetuned"],
|
||||
"size": 2151000000
|
||||
},
|
||||
"engine": "nitro-tensorrt-llm"
|
||||
}
|
||||
]
|
||||
75
extensions/tensorrt-llm-extension/package.json
Normal file
75
extensions/tensorrt-llm-extension/package.json
Normal file
@ -0,0 +1,75 @@
|
||||
{
|
||||
"name": "@janhq/tensorrt-llm-extension",
|
||||
"version": "0.0.2",
|
||||
"description": "Enables accelerated inference leveraging Nvidia's TensorRT-LLM for optimal GPU hardware optimizations. Compatible with models in TensorRT-LLM format. Requires Nvidia GPU driver and CUDA Toolkit installation.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
"license": "AGPL-3.0",
|
||||
"config": {
|
||||
"host": "127.0.0.1",
|
||||
"port": "3928"
|
||||
},
|
||||
"compatibility": {
|
||||
"platform": [
|
||||
"win32",
|
||||
"linux"
|
||||
],
|
||||
"app": [
|
||||
"0.1.0"
|
||||
]
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
||||
"build:publish:win32": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
"build:publish:linux": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
"build:publish:darwin": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
"build:publish": "run-script-os"
|
||||
},
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
"./main": "./dist/node/index.cjs.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@rollup/plugin-commonjs": "^25.0.7",
|
||||
"@rollup/plugin-json": "^6.1.0",
|
||||
"@rollup/plugin-node-resolve": "^15.2.3",
|
||||
"@rollup/plugin-replace": "^5.0.5",
|
||||
"@types/node": "^20.11.4",
|
||||
"@types/os-utils": "^0.0.4",
|
||||
"@types/tcp-port-used": "^1.0.4",
|
||||
"@types/decompress": "4.2.7",
|
||||
"cpx": "^1.5.0",
|
||||
"download-cli": "^1.1.1",
|
||||
"rimraf": "^3.0.2",
|
||||
"rollup": "^2.38.5",
|
||||
"rollup-plugin-define": "^1.0.1",
|
||||
"rollup-plugin-sourcemaps": "^0.6.3",
|
||||
"rollup-plugin-typescript2": "^0.36.0",
|
||||
"run-script-os": "^1.1.6",
|
||||
"typescript": "^5.2.2"
|
||||
},
|
||||
"dependencies": {
|
||||
"@janhq/core": "file:../../core",
|
||||
"decompress": "^4.2.1",
|
||||
"fetch-retry": "^5.0.6",
|
||||
"path-browserify": "^1.0.1",
|
||||
"rxjs": "^7.8.1",
|
||||
"tcp-port-used": "^1.0.2",
|
||||
"ulid": "^2.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"files": [
|
||||
"dist/*",
|
||||
"package.json",
|
||||
"README.md"
|
||||
],
|
||||
"bundleDependencies": [
|
||||
"tcp-port-used",
|
||||
"fetch-retry",
|
||||
"decompress",
|
||||
"@janhq/core"
|
||||
]
|
||||
}
|
||||
73
extensions/tensorrt-llm-extension/rollup.config.ts
Normal file
73
extensions/tensorrt-llm-extension/rollup.config.ts
Normal file
@ -0,0 +1,73 @@
|
||||
import resolve from '@rollup/plugin-node-resolve'
|
||||
import commonjs from '@rollup/plugin-commonjs'
|
||||
import sourceMaps from 'rollup-plugin-sourcemaps'
|
||||
import typescript from 'rollup-plugin-typescript2'
|
||||
import json from '@rollup/plugin-json'
|
||||
import replace from '@rollup/plugin-replace'
|
||||
const packageJson = require('./package.json')
|
||||
|
||||
export default [
|
||||
{
|
||||
input: `src/index.ts`,
|
||||
output: [{ file: packageJson.main, format: 'es', sourcemap: true }],
|
||||
watch: {
|
||||
include: 'src/**',
|
||||
},
|
||||
plugins: [
|
||||
replace({
|
||||
EXTENSION_NAME: JSON.stringify(packageJson.name),
|
||||
TENSORRT_VERSION: JSON.stringify('0.1.5'),
|
||||
DOWNLOAD_RUNNER_URL:
|
||||
process.platform === 'darwin' || process.platform === 'win32'
|
||||
? JSON.stringify(
|
||||
'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v<version>/nitro-windows-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
|
||||
)
|
||||
: JSON.stringify(
|
||||
'https://github.com/janhq/nitro-tensorrt-llm/releases/download/linux-v<version>/nitro-linux-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
|
||||
),
|
||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||
INFERENCE_URL: JSON.stringify(
|
||||
process.env.INFERENCE_URL ||
|
||||
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/v1/chat/completions`
|
||||
),
|
||||
COMPATIBILITY: JSON.stringify(packageJson.compatibility),
|
||||
}),
|
||||
json(),
|
||||
typescript({ useTsconfigDeclarationDir: true }),
|
||||
commonjs(),
|
||||
resolve({
|
||||
extensions: ['.js', '.ts', '.svelte'],
|
||||
}),
|
||||
sourceMaps(),
|
||||
],
|
||||
},
|
||||
{
|
||||
input: `src/node/index.ts`,
|
||||
output: [
|
||||
{ file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
|
||||
],
|
||||
external: ['@janhq/core/node'],
|
||||
watch: {
|
||||
include: 'src/node/**',
|
||||
},
|
||||
plugins: [
|
||||
replace({
|
||||
LOAD_MODEL_URL: JSON.stringify(
|
||||
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel`
|
||||
),
|
||||
TERMINATE_ENGINE_URL: JSON.stringify(
|
||||
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/processmanager/destroy`
|
||||
),
|
||||
ENGINE_HOST: JSON.stringify(packageJson.config?.host ?? '127.0.0.1'),
|
||||
ENGINE_PORT: JSON.stringify(packageJson.config?.port ?? '3928'),
|
||||
}),
|
||||
json(),
|
||||
typescript({ useTsconfigDeclarationDir: true }),
|
||||
commonjs(),
|
||||
resolve({
|
||||
extensions: ['.ts', '.js', '.json'],
|
||||
}),
|
||||
sourceMaps(),
|
||||
],
|
||||
},
|
||||
]
|
||||
10
extensions/tensorrt-llm-extension/src/@types/global.d.ts
vendored
Normal file
10
extensions/tensorrt-llm-extension/src/@types/global.d.ts
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
declare const NODE: string
|
||||
declare const INFERENCE_URL: string
|
||||
declare const LOAD_MODEL_URL: string
|
||||
declare const TERMINATE_ENGINE_URL: string
|
||||
declare const ENGINE_HOST: string
|
||||
declare const ENGINE_PORT: string
|
||||
declare const DOWNLOAD_RUNNER_URL: string
|
||||
declare const TENSORRT_VERSION: string
|
||||
declare const COMPATIBILITY: object
|
||||
declare const EXTENSION_NAME: string
|
||||
147
extensions/tensorrt-llm-extension/src/index.ts
Normal file
147
extensions/tensorrt-llm-extension/src/index.ts
Normal file
@ -0,0 +1,147 @@
|
||||
/**
|
||||
* @module tensorrt-llm-extension/src/index
|
||||
*/
|
||||
|
||||
import {
|
||||
Compatibility,
|
||||
DownloadEvent,
|
||||
DownloadRequest,
|
||||
DownloadState,
|
||||
GpuSetting,
|
||||
InstallationState,
|
||||
Model,
|
||||
baseName,
|
||||
downloadFile,
|
||||
events,
|
||||
executeOnMain,
|
||||
joinPath,
|
||||
showToast,
|
||||
systemInformations,
|
||||
LocalOAIEngine,
|
||||
fs,
|
||||
} from '@janhq/core'
|
||||
import models from '../models.json'
|
||||
|
||||
/**
|
||||
* TensorRTLLMExtension - Implementation of LocalOAIEngine
|
||||
* @extends BaseOAILocalInferenceProvider
|
||||
* Provide pre-populated models for TensorRTLLM
|
||||
*/
|
||||
export default class TensorRTLLMExtension extends LocalOAIEngine {
|
||||
/**
|
||||
* Override custom function name for loading and unloading model
|
||||
* Which are implemented from node module
|
||||
*/
|
||||
override provider = 'nitro-tensorrt-llm'
|
||||
override inferenceUrl = INFERENCE_URL
|
||||
override nodeModule = NODE
|
||||
|
||||
private supportedGpuArch = ['turing', 'ampere', 'ada']
|
||||
|
||||
compatibility() {
|
||||
return COMPATIBILITY as unknown as Compatibility
|
||||
}
|
||||
/**
|
||||
* models implemented by the extension
|
||||
* define pre-populated models
|
||||
*/
|
||||
async models(): Promise<Model[]> {
|
||||
if ((await this.installationState()) === 'Installed')
|
||||
return models as unknown as Model[]
|
||||
return []
|
||||
}
|
||||
|
||||
override async install(): Promise<void> {
|
||||
const info = await systemInformations()
|
||||
console.debug(
|
||||
`TensorRTLLMExtension installing pre-requisites... ${JSON.stringify(info)}`
|
||||
)
|
||||
const gpuSetting: GpuSetting | undefined = info.gpuSetting
|
||||
if (gpuSetting === undefined || gpuSetting.gpus.length === 0) {
|
||||
console.error('No GPU setting found. Please check your GPU setting.')
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: we only check for the first graphics card. Need to refactor this later.
|
||||
const firstGpu = gpuSetting.gpus[0]
|
||||
if (!firstGpu.name.toLowerCase().includes('nvidia')) {
|
||||
console.error('No Nvidia GPU found. Please check your GPU setting.')
|
||||
return
|
||||
}
|
||||
|
||||
if (firstGpu.arch === undefined) {
|
||||
console.error('No GPU architecture found. Please check your GPU setting.')
|
||||
return
|
||||
}
|
||||
|
||||
if (!this.supportedGpuArch.includes(firstGpu.arch)) {
|
||||
console.error(
|
||||
`Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
const binaryFolderPath = await executeOnMain(
|
||||
this.nodeModule,
|
||||
'binaryFolder'
|
||||
)
|
||||
if (!(await fs.existsSync(binaryFolderPath))) {
|
||||
await fs.mkdirSync(binaryFolderPath)
|
||||
}
|
||||
|
||||
const placeholderUrl = DOWNLOAD_RUNNER_URL
|
||||
const tensorrtVersion = TENSORRT_VERSION
|
||||
|
||||
const url = placeholderUrl
|
||||
.replace(/<version>/g, tensorrtVersion)
|
||||
.replace(/<gpuarch>/g, firstGpu.arch)
|
||||
|
||||
const tarball = await baseName(url)
|
||||
|
||||
const tarballFullPath = await joinPath([binaryFolderPath, tarball])
|
||||
const downloadRequest: DownloadRequest = {
|
||||
url,
|
||||
localPath: tarballFullPath,
|
||||
extensionId: EXTENSION_NAME,
|
||||
downloadType: 'extension',
|
||||
}
|
||||
downloadFile(downloadRequest)
|
||||
|
||||
// TODO: wrap this into a Promise
|
||||
const onFileDownloadSuccess = async (state: DownloadState) => {
|
||||
// if other download, ignore
|
||||
if (state.fileName !== tarball) return
|
||||
events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
|
||||
await executeOnMain(this.nodeModule, 'decompressRunner', tarballFullPath)
|
||||
events.emit(DownloadEvent.onFileUnzipSuccess, state)
|
||||
|
||||
// Prepopulate models as soon as it's ready
|
||||
this.prePopulateModels().then(() => {
|
||||
showToast(
|
||||
'Extension installed successfully.',
|
||||
'New models are added to Model Hub.'
|
||||
)
|
||||
})
|
||||
}
|
||||
events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
|
||||
}
|
||||
|
||||
override async installationState(): Promise<InstallationState> {
|
||||
// For now, we just check the executable of nitro x tensor rt
|
||||
const isNitroExecutableAvailable = await executeOnMain(
|
||||
this.nodeModule,
|
||||
'isNitroExecutableAvailable'
|
||||
)
|
||||
|
||||
return isNitroExecutableAvailable ? 'Installed' : 'NotInstalled'
|
||||
}
|
||||
|
||||
override onInferenceStopped() {
|
||||
if (!this.isRunning) return
|
||||
showToast(
|
||||
'Unable to Stop Inference',
|
||||
'The model does not support stopping inference.'
|
||||
)
|
||||
return Promise.resolve()
|
||||
}
|
||||
}
|
||||
191
extensions/tensorrt-llm-extension/src/node/index.ts
Normal file
191
extensions/tensorrt-llm-extension/src/node/index.ts
Normal file
@ -0,0 +1,191 @@
|
||||
import path from 'path'
|
||||
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
|
||||
import tcpPortUsed from 'tcp-port-used'
|
||||
import fetchRT from 'fetch-retry'
|
||||
import { log } from '@janhq/core/node'
|
||||
import { existsSync } from 'fs'
|
||||
import decompress from 'decompress'
|
||||
|
||||
// Polyfill fetch with retry
|
||||
const fetchRetry = fetchRT(fetch)
|
||||
|
||||
/**
|
||||
* The response object for model init operation.
|
||||
*/
|
||||
interface ModelLoadParams {
|
||||
engine_path: string
|
||||
ctx_len: number
|
||||
}
|
||||
|
||||
// The subprocess instance for Engine
|
||||
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
|
||||
|
||||
/**
|
||||
* Initializes a engine subprocess to load a machine learning model.
|
||||
* @param params - The model load settings.
|
||||
*/
|
||||
async function loadModel(params: any): Promise<{ error: Error | undefined }> {
|
||||
// modelFolder is the absolute path to the running model folder
|
||||
// e.g. ~/jan/models/llama-2
|
||||
let modelFolder = params.modelFolder
|
||||
|
||||
const settings: ModelLoadParams = {
|
||||
engine_path: modelFolder,
|
||||
ctx_len: params.model.settings.ctx_len ?? 2048,
|
||||
}
|
||||
return runEngineAndLoadModel(settings)
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops a Engine subprocess.
|
||||
*/
|
||||
function unloadModel(): Promise<any> {
|
||||
const controller = new AbortController()
|
||||
setTimeout(() => controller.abort(), 5000)
|
||||
debugLog(`Request to kill engine`)
|
||||
|
||||
subprocess?.kill()
|
||||
return fetch(TERMINATE_ENGINE_URL, {
|
||||
method: 'DELETE',
|
||||
signal: controller.signal,
|
||||
})
|
||||
.then(() => {
|
||||
subprocess = undefined
|
||||
})
|
||||
.catch(() => {}) // Do nothing with this attempt
|
||||
.then(() => tcpPortUsed.waitUntilFree(parseInt(ENGINE_PORT), 300, 5000)) // Wait for port available
|
||||
.then(() => debugLog(`Engine process is terminated`))
|
||||
.catch((err) => {
|
||||
debugLog(
|
||||
`Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
|
||||
)
|
||||
throw 'PORT_NOT_AVAILABLE'
|
||||
})
|
||||
}
|
||||
/**
|
||||
* 1. Spawn engine process
|
||||
* 2. Load model into engine subprocess
|
||||
* @returns
|
||||
*/
|
||||
async function runEngineAndLoadModel(settings: ModelLoadParams) {
|
||||
return unloadModel()
|
||||
.then(runEngine)
|
||||
.then(() => loadModelRequest(settings))
|
||||
.catch((err) => {
|
||||
// TODO: Broadcast error so app could display proper error message
|
||||
debugLog(`${err}`, 'Error')
|
||||
return { error: err }
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a LLM model into the Engine subprocess by sending a HTTP POST request.
|
||||
*/
|
||||
function loadModelRequest(
|
||||
settings: ModelLoadParams
|
||||
): Promise<{ error: Error | undefined }> {
|
||||
debugLog(`Loading model with params ${JSON.stringify(settings)}`)
|
||||
return fetchRetry(LOAD_MODEL_URL, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(settings),
|
||||
retries: 3,
|
||||
retryDelay: 500,
|
||||
})
|
||||
.then((res) => {
|
||||
debugLog(`Load model success with response ${JSON.stringify(res)}`)
|
||||
return Promise.resolve({ error: undefined })
|
||||
})
|
||||
.catch((err) => {
|
||||
debugLog(`Load model failed with error ${err}`, 'Error')
|
||||
return Promise.resolve({ error: err })
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawns engine subprocess.
|
||||
*/
|
||||
function runEngine(): Promise<any> {
|
||||
debugLog(`Spawning engine subprocess...`)
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
// Current directory by default
|
||||
let binaryFolder = path.join(__dirname, '..', 'bin')
|
||||
// Binary path
|
||||
const binary = path.join(
|
||||
binaryFolder,
|
||||
process.platform === 'win32' ? 'nitro.exe' : 'nitro'
|
||||
)
|
||||
|
||||
const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT]
|
||||
// Execute the binary
|
||||
debugLog(`Spawn nitro at path: ${binary}, and args: ${args}`)
|
||||
subprocess = spawn(binary, args, {
|
||||
cwd: binaryFolder,
|
||||
env: {
|
||||
...process.env,
|
||||
},
|
||||
})
|
||||
|
||||
// Handle subprocess output
|
||||
subprocess.stdout.on('data', (data: any) => {
|
||||
debugLog(`${data}`)
|
||||
})
|
||||
|
||||
subprocess.stderr.on('data', (data: any) => {
|
||||
debugLog(`${data}`)
|
||||
})
|
||||
|
||||
subprocess.on('close', (code: any) => {
|
||||
debugLog(`Engine exited with code: ${code}`)
|
||||
subprocess = undefined
|
||||
reject(`child process exited with code ${code}`)
|
||||
})
|
||||
|
||||
tcpPortUsed.waitUntilUsed(parseInt(ENGINE_PORT), 300, 30000).then(() => {
|
||||
debugLog(`Engine is ready`)
|
||||
resolve()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
function debugLog(message: string, level: string = 'Debug') {
|
||||
log(`[TENSORRT_LLM_NITRO]::${level}:${message}`)
|
||||
}
|
||||
|
||||
const binaryFolder = async (): Promise<string> => {
|
||||
return path.join(__dirname, '..', 'bin')
|
||||
}
|
||||
|
||||
const decompressRunner = async (zipPath: string) => {
|
||||
const output = path.join(__dirname, '..', 'bin')
|
||||
console.debug(`Decompressing ${zipPath} to ${output}...`)
|
||||
try {
|
||||
const files = await decompress(zipPath, output)
|
||||
console.debug('Decompress finished!', files)
|
||||
} catch (err) {
|
||||
console.error(`Decompress ${zipPath} failed: ${err}`)
|
||||
}
|
||||
}
|
||||
|
||||
const isNitroExecutableAvailable = async (): Promise<boolean> => {
|
||||
const binary = path.join(
|
||||
__dirname,
|
||||
'..',
|
||||
'bin',
|
||||
process.platform === 'win32' ? 'nitro.exe' : 'nitro'
|
||||
)
|
||||
|
||||
return existsSync(binary)
|
||||
}
|
||||
|
||||
export default {
|
||||
binaryFolder,
|
||||
decompressRunner,
|
||||
loadModel,
|
||||
unloadModel,
|
||||
dispose: unloadModel,
|
||||
isNitroExecutableAvailable,
|
||||
}
|
||||
20
extensions/tensorrt-llm-extension/tsconfig.json
Normal file
20
extensions/tensorrt-llm-extension/tsconfig.json
Normal file
@ -0,0 +1,20 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"moduleResolution": "node",
|
||||
"target": "es5",
|
||||
"module": "ES2020",
|
||||
"lib": ["es2015", "es2016", "es2017", "dom"],
|
||||
"strict": true,
|
||||
"sourceMap": true,
|
||||
"declaration": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"experimentalDecorators": true,
|
||||
"emitDecoratorMetadata": true,
|
||||
"declarationDir": "dist/types",
|
||||
"outDir": "dist",
|
||||
"importHelpers": true,
|
||||
"resolveJsonModule": true,
|
||||
"typeRoots": ["node_modules/@types"]
|
||||
},
|
||||
"include": ["src"]
|
||||
}
|
||||
@ -73,8 +73,9 @@ const DropdownListSidebar = ({
|
||||
|
||||
const [copyId, setCopyId] = useState('')
|
||||
|
||||
// TODO: Update filter condition for the local model
|
||||
const localModel = downloadedModels.filter(
|
||||
(model) => model.engine === InferenceEngine.nitro
|
||||
(model) => model.engine !== InferenceEngine.openai
|
||||
)
|
||||
const remoteModel = downloadedModels.filter(
|
||||
(model) => model.engine === InferenceEngine.openai
|
||||
|
||||
@ -0,0 +1,87 @@
|
||||
import { useCallback, useEffect } from 'react'
|
||||
|
||||
import { abortDownload } from '@janhq/core'
|
||||
import {
|
||||
Button,
|
||||
Modal,
|
||||
ModalContent,
|
||||
ModalHeader,
|
||||
ModalTitle,
|
||||
Progress,
|
||||
} from '@janhq/uikit'
|
||||
import { atom, useAtom, useAtomValue } from 'jotai'
|
||||
|
||||
import {
|
||||
formatDownloadPercentage,
|
||||
formatExtensionsName,
|
||||
} from '@/utils/converter'
|
||||
|
||||
import {
|
||||
InstallingExtensionState,
|
||||
installingExtensionAtom,
|
||||
} from '@/helpers/atoms/Extension.atom'
|
||||
|
||||
export const showInstallingExtensionModalAtom = atom(false)
|
||||
|
||||
const InstallingExtensionModal: React.FC = () => {
|
||||
const [showInstallingExtensionModal, setShowInstallingExtensionModal] =
|
||||
useAtom(showInstallingExtensionModalAtom)
|
||||
const installingExtensions = useAtomValue(installingExtensionAtom)
|
||||
|
||||
useEffect(() => {
|
||||
if (installingExtensions.length === 0) {
|
||||
setShowInstallingExtensionModal(false)
|
||||
}
|
||||
}, [installingExtensions, setShowInstallingExtensionModal])
|
||||
|
||||
const onAbortInstallingExtensionClick = useCallback(
|
||||
(item: InstallingExtensionState) => {
|
||||
if (item.localPath) {
|
||||
abortDownload(item.localPath)
|
||||
}
|
||||
},
|
||||
[]
|
||||
)
|
||||
|
||||
return (
|
||||
<Modal
|
||||
open={showInstallingExtensionModal}
|
||||
onOpenChange={() => setShowInstallingExtensionModal(false)}
|
||||
>
|
||||
<ModalContent>
|
||||
<ModalHeader>
|
||||
<ModalTitle>Installing Extension</ModalTitle>
|
||||
</ModalHeader>
|
||||
{Object.values(installingExtensions).map((item) => (
|
||||
<div className="pt-2" key={item.extensionId}>
|
||||
<Progress
|
||||
className="mb-2 h-2"
|
||||
value={
|
||||
formatDownloadPercentage(item.percentage, {
|
||||
hidePercentage: true,
|
||||
}) as number
|
||||
}
|
||||
/>
|
||||
<div className="flex items-center justify-between gap-x-2">
|
||||
<div className="flex gap-x-2">
|
||||
<p className="line-clamp-1">
|
||||
{formatExtensionsName(item.extensionId)}
|
||||
</p>
|
||||
<span>{formatDownloadPercentage(item.percentage)}</span>
|
||||
</div>
|
||||
<Button
|
||||
themes="outline"
|
||||
size="sm"
|
||||
onClick={() => onAbortInstallingExtensionClick(item)}
|
||||
>
|
||||
Cancel
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</ModalContent>
|
||||
</Modal>
|
||||
)
|
||||
}
|
||||
|
||||
export default InstallingExtensionModal
|
||||
@ -0,0 +1,52 @@
|
||||
import { Fragment, useCallback } from 'react'
|
||||
|
||||
import { Progress } from '@janhq/uikit'
|
||||
import { useAtomValue, useSetAtom } from 'jotai'
|
||||
|
||||
import { showInstallingExtensionModalAtom } from './InstallingExtensionModal'
|
||||
|
||||
import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom'
|
||||
|
||||
const InstallingExtension: React.FC = () => {
|
||||
const installingExtensions = useAtomValue(installingExtensionAtom)
|
||||
const setShowInstallingExtensionModal = useSetAtom(
|
||||
showInstallingExtensionModalAtom
|
||||
)
|
||||
const shouldShowInstalling = installingExtensions.length > 0
|
||||
|
||||
let totalPercentage = 0
|
||||
let totalExtensions = 0
|
||||
for (const installation of installingExtensions) {
|
||||
totalPercentage += installation.percentage
|
||||
totalExtensions++
|
||||
}
|
||||
const progress = (totalPercentage / totalExtensions) * 100
|
||||
|
||||
const onClick = useCallback(() => {
|
||||
setShowInstallingExtensionModal(true)
|
||||
}, [setShowInstallingExtensionModal])
|
||||
|
||||
return (
|
||||
<Fragment>
|
||||
{shouldShowInstalling ? (
|
||||
<div
|
||||
className="flex cursor-pointer flex-row items-center space-x-2"
|
||||
onClick={onClick}
|
||||
>
|
||||
<p className="text-xs font-semibold text-muted-foreground">
|
||||
Installing Extension
|
||||
</p>
|
||||
|
||||
<div className="flex flex-row items-center justify-center space-x-2 rounded-md bg-secondary px-2 py-[2px]">
|
||||
<Progress className="h-2 w-24" value={progress} />
|
||||
<span className="text-xs font-bold text-muted-foreground">
|
||||
{progress.toFixed(2)}%
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
) : null}
|
||||
</Fragment>
|
||||
)
|
||||
}
|
||||
|
||||
export default InstallingExtension
|
||||
@ -16,6 +16,7 @@ import ProgressBar from '@/containers/ProgressBar'
|
||||
import { appDownloadProgress } from '@/containers/Providers/Jotai'
|
||||
|
||||
import ImportingModelState from './ImportingModelState'
|
||||
import InstallingExtension from './InstallingExtension'
|
||||
import SystemMonitor from './SystemMonitor'
|
||||
import UpdatedFailedModal from './UpdateFailedModal'
|
||||
|
||||
@ -46,6 +47,7 @@ const BottomBar = () => {
|
||||
<ImportingModelState />
|
||||
<DownloadingState />
|
||||
<UpdatedFailedModal />
|
||||
<InstallingExtension />
|
||||
</div>
|
||||
<div className="flex items-center gap-x-3">
|
||||
<SystemMonitor />
|
||||
|
||||
@ -22,6 +22,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
|
||||
import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
|
||||
import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
|
||||
|
||||
import InstallingExtensionModal from './BottomBar/InstallingExtension/InstallingExtensionModal'
|
||||
|
||||
import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
|
||||
|
||||
const BaseLayout = (props: PropsWithChildren) => {
|
||||
@ -68,6 +70,7 @@ const BaseLayout = (props: PropsWithChildren) => {
|
||||
{importModelStage === 'IMPORTING_MODEL' && <ImportingModelModal />}
|
||||
{importModelStage === 'EDIT_MODEL_INFO' && <EditModelInfoModal />}
|
||||
{importModelStage === 'CONFIRM_CANCEL' && <CancelModelImportModal />}
|
||||
<InstallingExtensionModal />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@ -7,6 +7,10 @@ import { useSetAtom } from 'jotai'
|
||||
|
||||
import { setDownloadStateAtom } from '@/hooks/useDownloadState'
|
||||
|
||||
import { formatExtensionsName } from '@/utils/converter'
|
||||
|
||||
import { toaster } from '../Toast'
|
||||
|
||||
import AppUpdateListener from './AppUpdateListener'
|
||||
import ClipboardListener from './ClipboardListener'
|
||||
import EventHandler from './EventHandler'
|
||||
@ -14,46 +18,89 @@ import EventHandler from './EventHandler'
|
||||
import ModelImportListener from './ModelImportListener'
|
||||
import QuickAskListener from './QuickAskListener'
|
||||
|
||||
import {
|
||||
InstallingExtensionState,
|
||||
removeInstallingExtensionAtom,
|
||||
setInstallingExtensionAtom,
|
||||
} from '@/helpers/atoms/Extension.atom'
|
||||
|
||||
const EventListenerWrapper = ({ children }: PropsWithChildren) => {
|
||||
const setDownloadState = useSetAtom(setDownloadStateAtom)
|
||||
const setInstallingExtension = useSetAtom(setInstallingExtensionAtom)
|
||||
const removeInstallingExtension = useSetAtom(removeInstallingExtensionAtom)
|
||||
|
||||
const onFileDownloadUpdate = useCallback(
|
||||
async (state: DownloadState) => {
|
||||
console.debug('onFileDownloadUpdate', state)
|
||||
setDownloadState(state)
|
||||
if (state.downloadType === 'extension') {
|
||||
const installingExtensionState: InstallingExtensionState = {
|
||||
extensionId: state.extensionId!,
|
||||
percentage: state.percent,
|
||||
localPath: state.localPath,
|
||||
}
|
||||
setInstallingExtension(state.extensionId!, installingExtensionState)
|
||||
} else {
|
||||
setDownloadState(state)
|
||||
}
|
||||
},
|
||||
[setDownloadState]
|
||||
[setDownloadState, setInstallingExtension]
|
||||
)
|
||||
|
||||
const onFileDownloadError = useCallback(
|
||||
(state: DownloadState) => {
|
||||
console.debug('onFileDownloadError', state)
|
||||
setDownloadState(state)
|
||||
if (state.downloadType === 'extension') {
|
||||
removeInstallingExtension(state.extensionId!)
|
||||
} else {
|
||||
setDownloadState(state)
|
||||
}
|
||||
},
|
||||
[setDownloadState]
|
||||
[setDownloadState, removeInstallingExtension]
|
||||
)
|
||||
|
||||
const onFileDownloadSuccess = useCallback(
|
||||
(state: DownloadState) => {
|
||||
console.debug('onFileDownloadSuccess', state)
|
||||
setDownloadState(state)
|
||||
if (state.downloadType !== 'extension') {
|
||||
setDownloadState(state)
|
||||
}
|
||||
},
|
||||
[setDownloadState]
|
||||
)
|
||||
|
||||
const onFileUnzipSuccess = useCallback(
|
||||
(state: DownloadState) => {
|
||||
console.debug('onFileUnzipSuccess', state)
|
||||
toaster({
|
||||
title: 'Success',
|
||||
description: `Install ${formatExtensionsName(state.extensionId!)} successfully.`,
|
||||
type: 'success',
|
||||
})
|
||||
removeInstallingExtension(state.extensionId!)
|
||||
},
|
||||
[removeInstallingExtension]
|
||||
)
|
||||
|
||||
useEffect(() => {
|
||||
console.debug('EventListenerWrapper: registering event listeners...')
|
||||
events.on(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
|
||||
events.on(DownloadEvent.onFileDownloadError, onFileDownloadError)
|
||||
events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
|
||||
events.on(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
|
||||
|
||||
return () => {
|
||||
console.debug('EventListenerWrapper: unregistering event listeners...')
|
||||
events.off(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
|
||||
events.off(DownloadEvent.onFileDownloadError, onFileDownloadError)
|
||||
events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
|
||||
events.off(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
|
||||
}
|
||||
}, [onFileDownloadUpdate, onFileDownloadError, onFileDownloadSuccess])
|
||||
}, [
|
||||
onFileDownloadUpdate,
|
||||
onFileDownloadError,
|
||||
onFileDownloadSuccess,
|
||||
onFileUnzipSuccess,
|
||||
])
|
||||
|
||||
return (
|
||||
<AppUpdateListener>
|
||||
|
||||
@ -23,7 +23,9 @@ export class ExtensionManager {
|
||||
* @param type - The type of the extension to retrieve.
|
||||
* @returns The extension, if found.
|
||||
*/
|
||||
get<T extends BaseExtension>(type: ExtensionTypeEnum): T | undefined {
|
||||
get<T extends BaseExtension>(
|
||||
type: ExtensionTypeEnum | string
|
||||
): T | undefined {
|
||||
return this.extensions.get(type) as T | undefined
|
||||
}
|
||||
|
||||
|
||||
40
web/helpers/atoms/Extension.atom.ts
Normal file
40
web/helpers/atoms/Extension.atom.ts
Normal file
@ -0,0 +1,40 @@
|
||||
import { atom } from 'jotai'
|
||||
|
||||
type ExtensionId = string
|
||||
|
||||
export type InstallingExtensionState = {
|
||||
extensionId: ExtensionId
|
||||
percentage: number
|
||||
localPath?: string
|
||||
}
|
||||
|
||||
export const installingExtensionAtom = atom<InstallingExtensionState[]>([])
|
||||
|
||||
export const setInstallingExtensionAtom = atom(
|
||||
null,
|
||||
(get, set, extensionId: string, state: InstallingExtensionState) => {
|
||||
const current = get(installingExtensionAtom)
|
||||
|
||||
const isExists = current.some((e) => e.extensionId === extensionId)
|
||||
if (isExists) {
|
||||
const newCurrent = current.map((e) => {
|
||||
if (e.extensionId === extensionId) {
|
||||
return state
|
||||
}
|
||||
return e
|
||||
})
|
||||
set(installingExtensionAtom, newCurrent)
|
||||
} else {
|
||||
set(installingExtensionAtom, [...current, state])
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
export const removeInstallingExtensionAtom = atom(
|
||||
null,
|
||||
(get, set, extensionId: string) => {
|
||||
const current = get(installingExtensionAtom)
|
||||
const newCurrent = current.filter((e) => e.extensionId !== extensionId)
|
||||
set(installingExtensionAtom, newCurrent)
|
||||
}
|
||||
)
|
||||
@ -40,6 +40,16 @@ export function useActiveModel() {
|
||||
console.debug(`Model ${modelId} is already initialized. Ignore..`)
|
||||
return
|
||||
}
|
||||
|
||||
let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
|
||||
|
||||
// Switch between engines
|
||||
if (model && activeModel && activeModel.engine !== model.engine) {
|
||||
stopModel()
|
||||
// TODO: Refactor inference provider would address this
|
||||
await new Promise((res) => setTimeout(res, 1000))
|
||||
}
|
||||
|
||||
// TODO: incase we have multiple assistants, the configuration will be from assistant
|
||||
setLoadModelError(undefined)
|
||||
|
||||
@ -47,8 +57,6 @@ export function useActiveModel() {
|
||||
|
||||
setStateModel({ state: 'start', loading: true, model: modelId })
|
||||
|
||||
let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
|
||||
|
||||
if (!model) {
|
||||
toaster({
|
||||
title: `Model ${modelId} not found!`,
|
||||
|
||||
@ -8,12 +8,15 @@ import {
|
||||
joinPath,
|
||||
ModelArtifact,
|
||||
DownloadState,
|
||||
GpuSetting,
|
||||
} from '@janhq/core'
|
||||
|
||||
import { useAtomValue, useSetAtom } from 'jotai'
|
||||
|
||||
import { setDownloadStateAtom } from './useDownloadState'
|
||||
|
||||
import useGpuSetting from './useGpuSetting'
|
||||
|
||||
import { extensionManager } from '@/extension/ExtensionManager'
|
||||
import {
|
||||
ignoreSslAtom,
|
||||
@ -29,6 +32,8 @@ export default function useDownloadModel() {
|
||||
const setDownloadState = useSetAtom(setDownloadStateAtom)
|
||||
const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
|
||||
|
||||
const { getGpuSettings } = useGpuSetting()
|
||||
|
||||
const downloadModel = useCallback(
|
||||
async (model: Model) => {
|
||||
const childProgresses: DownloadState[] = model.sources.map(
|
||||
@ -68,10 +73,22 @@ export default function useDownloadModel() {
|
||||
})
|
||||
|
||||
addDownloadingModel(model)
|
||||
|
||||
await localDownloadModel(model, ignoreSSL, proxyEnabled ? proxy : '')
|
||||
const gpuSettings = await getGpuSettings()
|
||||
await localDownloadModel(
|
||||
model,
|
||||
ignoreSSL,
|
||||
proxyEnabled ? proxy : '',
|
||||
gpuSettings
|
||||
)
|
||||
},
|
||||
[ignoreSSL, proxy, proxyEnabled, addDownloadingModel, setDownloadState]
|
||||
[
|
||||
ignoreSSL,
|
||||
proxy,
|
||||
proxyEnabled,
|
||||
getGpuSettings,
|
||||
addDownloadingModel,
|
||||
setDownloadState,
|
||||
]
|
||||
)
|
||||
|
||||
const abortModelDownload = useCallback(async (model: Model) => {
|
||||
@ -90,8 +107,9 @@ export default function useDownloadModel() {
|
||||
const localDownloadModel = async (
|
||||
model: Model,
|
||||
ignoreSSL: boolean,
|
||||
proxy: string
|
||||
proxy: string,
|
||||
gpuSettings?: GpuSetting
|
||||
) =>
|
||||
extensionManager
|
||||
.get<ModelExtension>(ExtensionTypeEnum.Model)
|
||||
?.downloadModel(model, { ignoreSSL, proxy })
|
||||
?.downloadModel(model, gpuSettings, { ignoreSSL, proxy })
|
||||
|
||||
@ -18,123 +18,129 @@ export const modelDownloadStateAtom = atom<Record<string, DownloadState>>({})
|
||||
export const setDownloadStateAtom = atom(
|
||||
null,
|
||||
(get, set, state: DownloadState) => {
|
||||
const currentState = { ...get(modelDownloadStateAtom) }
|
||||
try {
|
||||
const currentState = { ...get(modelDownloadStateAtom) }
|
||||
|
||||
if (state.downloadState === 'end') {
|
||||
const modelDownloadState = currentState[state.modelId]
|
||||
if (state.downloadState === 'end') {
|
||||
const modelDownloadState = currentState[state.modelId]
|
||||
|
||||
const updatedChildren: DownloadState[] =
|
||||
modelDownloadState.children!.filter(
|
||||
(m) => m.fileName !== state.fileName
|
||||
const updatedChildren: DownloadState[] = (
|
||||
modelDownloadState.children ?? []
|
||||
).filter((m) => m.fileName !== state.fileName)
|
||||
updatedChildren.push(state)
|
||||
modelDownloadState.children = updatedChildren
|
||||
currentState[state.modelId] = modelDownloadState
|
||||
|
||||
const isAllChildrenDownloadEnd = modelDownloadState.children?.every(
|
||||
(m) => m.downloadState === 'end'
|
||||
)
|
||||
updatedChildren.push(state)
|
||||
modelDownloadState.children = updatedChildren
|
||||
currentState[state.modelId] = modelDownloadState
|
||||
|
||||
const isAllChildrenDownloadEnd = modelDownloadState.children?.every(
|
||||
(m) => m.downloadState === 'end'
|
||||
)
|
||||
if (isAllChildrenDownloadEnd) {
|
||||
// download successfully
|
||||
delete currentState[state.modelId]
|
||||
set(removeDownloadingModelAtom, state.modelId)
|
||||
|
||||
if (isAllChildrenDownloadEnd) {
|
||||
// download successfully
|
||||
const model = get(configuredModelsAtom).find(
|
||||
(e) => e.id === state.modelId
|
||||
)
|
||||
if (model) set(downloadedModelsAtom, (prev) => [...prev, model])
|
||||
toaster({
|
||||
title: 'Download Completed',
|
||||
description: `Download ${state.modelId} completed`,
|
||||
type: 'success',
|
||||
})
|
||||
}
|
||||
} else if (state.downloadState === 'error') {
|
||||
// download error
|
||||
delete currentState[state.modelId]
|
||||
set(removeDownloadingModelAtom, state.modelId)
|
||||
|
||||
const model = get(configuredModelsAtom).find(
|
||||
(e) => e.id === state.modelId
|
||||
)
|
||||
if (model) set(downloadedModelsAtom, (prev) => [...prev, model])
|
||||
toaster({
|
||||
title: 'Download Completed',
|
||||
description: `Download ${state.modelId} completed`,
|
||||
type: 'success',
|
||||
})
|
||||
}
|
||||
} else if (state.downloadState === 'error') {
|
||||
// download error
|
||||
delete currentState[state.modelId]
|
||||
set(removeDownloadingModelAtom, state.modelId)
|
||||
if (state.error === 'aborted') {
|
||||
toaster({
|
||||
title: 'Cancel Download',
|
||||
description: `Model ${state.modelId} download cancelled`,
|
||||
type: 'warning',
|
||||
})
|
||||
} else {
|
||||
let error = state.error
|
||||
if (
|
||||
typeof error?.includes === 'function' &&
|
||||
state.error?.includes('certificate')
|
||||
) {
|
||||
error +=
|
||||
'. To fix enable "Ignore SSL Certificates" in Advanced settings.'
|
||||
if (state.error === 'aborted') {
|
||||
toaster({
|
||||
title: 'Cancel Download',
|
||||
description: `Model ${state.modelId} download cancelled`,
|
||||
type: 'warning',
|
||||
})
|
||||
} else {
|
||||
let error = state.error
|
||||
if (
|
||||
typeof error?.includes === 'function' &&
|
||||
state.error?.includes('certificate')
|
||||
) {
|
||||
error +=
|
||||
'. To fix enable "Ignore SSL Certificates" in Advanced settings.'
|
||||
}
|
||||
toaster({
|
||||
title: 'Download Failed',
|
||||
description: `Model ${state.modelId} download failed: ${error}`,
|
||||
type: 'error',
|
||||
})
|
||||
}
|
||||
} else {
|
||||
// download in progress
|
||||
if (state.size.total === 0) {
|
||||
// this is initial state, just set the state
|
||||
currentState[state.modelId] = state
|
||||
set(modelDownloadStateAtom, currentState)
|
||||
return
|
||||
}
|
||||
toaster({
|
||||
title: 'Download Failed',
|
||||
description: `Model ${state.modelId} download failed: ${error}`,
|
||||
type: 'error',
|
||||
})
|
||||
}
|
||||
} else {
|
||||
// download in progress
|
||||
if (state.size.total === 0) {
|
||||
// this is initial state, just set the state
|
||||
currentState[state.modelId] = state
|
||||
set(modelDownloadStateAtom, currentState)
|
||||
return
|
||||
}
|
||||
|
||||
const modelDownloadState = currentState[state.modelId]
|
||||
if (!modelDownloadState) {
|
||||
console.debug('setDownloadStateAtom: modelDownloadState not found')
|
||||
return
|
||||
}
|
||||
const modelDownloadState = currentState[state.modelId]
|
||||
if (!modelDownloadState) {
|
||||
console.debug('setDownloadStateAtom: modelDownloadState not found')
|
||||
return
|
||||
}
|
||||
|
||||
// delete the children if the filename is matched and replace the new state
|
||||
const updatedChildren: DownloadState[] =
|
||||
modelDownloadState.children!.filter(
|
||||
(m) => m.fileName !== state.fileName
|
||||
// delete the children if the filename is matched and replace the new state
|
||||
const updatedChildren: DownloadState[] = (
|
||||
modelDownloadState.children ?? []
|
||||
).filter((m) => m.fileName !== state.fileName)
|
||||
|
||||
updatedChildren.push(state)
|
||||
|
||||
// re-calculate the overall progress if we have all the children download data
|
||||
const isAnyChildDownloadNotReady = updatedChildren.some(
|
||||
(m) =>
|
||||
m.size.total === 0 &&
|
||||
!modelDownloadState.children?.some(
|
||||
(e) => e.fileName === m.fileName && e.downloadState === 'end'
|
||||
) &&
|
||||
modelDownloadState.children?.some((e) => e.fileName === m.fileName)
|
||||
)
|
||||
|
||||
updatedChildren.push(state)
|
||||
modelDownloadState.children = updatedChildren
|
||||
if (isAnyChildDownloadNotReady) {
|
||||
// just update the children
|
||||
currentState[state.modelId] = modelDownloadState
|
||||
set(modelDownloadStateAtom, currentState)
|
||||
return
|
||||
}
|
||||
|
||||
// re-calculate the overall progress if we have all the children download data
|
||||
const isAnyChildDownloadNotReady = updatedChildren.some(
|
||||
(m) => m.size.total === 0
|
||||
)
|
||||
const parentTotalSize = modelDownloadState.size.total
|
||||
if (parentTotalSize === 0) {
|
||||
// calculate the total size of the parent by sum all children total size
|
||||
const totalSize = updatedChildren.reduce(
|
||||
(acc, m) => acc + m.size.total,
|
||||
0
|
||||
)
|
||||
|
||||
modelDownloadState.children = updatedChildren
|
||||
modelDownloadState.size.total = totalSize
|
||||
}
|
||||
|
||||
if (isAnyChildDownloadNotReady) {
|
||||
// just update the children
|
||||
currentState[state.modelId] = modelDownloadState
|
||||
set(modelDownloadStateAtom, currentState)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
const parentTotalSize = modelDownloadState.size.total
|
||||
if (parentTotalSize === 0) {
|
||||
// calculate the total size of the parent by sum all children total size
|
||||
const totalSize = updatedChildren.reduce(
|
||||
(acc, m) => acc + m.size.total,
|
||||
// calculate the total transferred size by sum all children transferred size
|
||||
const transferredSize = updatedChildren.reduce(
|
||||
(acc, m) => acc + m.size.transferred,
|
||||
0
|
||||
)
|
||||
|
||||
modelDownloadState.size.total = totalSize
|
||||
modelDownloadState.size.transferred = transferredSize
|
||||
modelDownloadState.percent =
|
||||
parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
|
||||
currentState[state.modelId] = modelDownloadState
|
||||
}
|
||||
|
||||
// calculate the total transferred size by sum all children transferred size
|
||||
const transferredSize = updatedChildren.reduce(
|
||||
(acc, m) => acc + m.size.transferred,
|
||||
0
|
||||
)
|
||||
modelDownloadState.size.transferred = transferredSize
|
||||
modelDownloadState.percent =
|
||||
parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
|
||||
currentState[state.modelId] = modelDownloadState
|
||||
set(modelDownloadStateAtom, currentState)
|
||||
} catch (e) {
|
||||
console.debug('setDownloadStateAtom: state', state)
|
||||
console.debug('setDownloadStateAtom: error', e)
|
||||
}
|
||||
|
||||
set(modelDownloadStateAtom, currentState)
|
||||
}
|
||||
)
|
||||
|
||||
21
web/hooks/useGpuSetting.ts
Normal file
21
web/hooks/useGpuSetting.ts
Normal file
@ -0,0 +1,21 @@
|
||||
import { useCallback } from 'react'
|
||||
|
||||
import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
|
||||
|
||||
import { extensionManager } from '@/extension'
|
||||
|
||||
export default function useGpuSetting() {
|
||||
const getGpuSettings = useCallback(async () => {
|
||||
const gpuSetting = await extensionManager
|
||||
?.get<MonitoringExtension>(ExtensionTypeEnum.SystemMonitoring)
|
||||
?.getGpuSetting()
|
||||
|
||||
if (!gpuSetting) {
|
||||
console.debug('No GPU setting found')
|
||||
return undefined
|
||||
}
|
||||
return gpuSetting
|
||||
}, [])
|
||||
|
||||
return { getGpuSettings }
|
||||
}
|
||||
@ -38,6 +38,7 @@ const nextConfig = {
|
||||
isMac: process.platform === 'darwin',
|
||||
isWindows: process.platform === 'win32',
|
||||
isLinux: process.platform === 'linux',
|
||||
PLATFORM: JSON.stringify(process.platform),
|
||||
}),
|
||||
]
|
||||
return config
|
||||
|
||||
@ -3,6 +3,8 @@ import { useState } from 'react'
|
||||
import { Model } from '@janhq/core'
|
||||
import { Badge } from '@janhq/uikit'
|
||||
|
||||
import { twMerge } from 'tailwind-merge'
|
||||
|
||||
import ExploreModelItemHeader from '@/screens/ExploreModels/ExploreModelItemHeader'
|
||||
|
||||
type Props = {
|
||||
@ -75,7 +77,16 @@ const ExploreModelItem: React.FC<Props> = ({ model }) => {
|
||||
<span className="font-semibold text-muted-foreground">
|
||||
Format
|
||||
</span>
|
||||
<p className="mt-2 font-medium uppercase">{model.format}</p>
|
||||
<p
|
||||
className={twMerge(
|
||||
'mt-2 font-medium',
|
||||
!model.format?.includes(' ') &&
|
||||
!model.format?.includes('-') &&
|
||||
'uppercase'
|
||||
)}
|
||||
>
|
||||
{model.format}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -152,6 +152,7 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
|
||||
<div className="flex items-center justify-between p-4">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="font-bold">{model.name}</span>
|
||||
<EngineBadge engine={model.engine} />
|
||||
</div>
|
||||
<div className="inline-flex items-center space-x-2">
|
||||
<span className="mr-4 font-semibold text-muted-foreground">
|
||||
@ -172,4 +173,21 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
|
||||
)
|
||||
}
|
||||
|
||||
type EngineBadgeProps = {
|
||||
engine: string
|
||||
}
|
||||
|
||||
const EngineBadge: React.FC<EngineBadgeProps> = ({ engine }) => {
|
||||
switch (engine) {
|
||||
case 'nitro-tensorrt-llm':
|
||||
return (
|
||||
<div className="flex items-center justify-center rounded-md bg-[#EFF6FF] px-2 py-[2px] font-semibold text-primary">
|
||||
TensorRT-LLM
|
||||
</div>
|
||||
)
|
||||
default:
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
export default ExploreModelItemHeader
|
||||
|
||||
225
web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
Normal file
225
web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
Normal file
@ -0,0 +1,225 @@
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
|
||||
import {
|
||||
Compatibility,
|
||||
GpuSetting,
|
||||
InstallationState,
|
||||
abortDownload,
|
||||
systemInformations,
|
||||
} from '@janhq/core'
|
||||
import {
|
||||
Button,
|
||||
Progress,
|
||||
Tooltip,
|
||||
TooltipArrow,
|
||||
TooltipContent,
|
||||
TooltipPortal,
|
||||
TooltipTrigger,
|
||||
} from '@janhq/uikit'
|
||||
|
||||
import { InfoCircledIcon } from '@radix-ui/react-icons'
|
||||
import { useAtomValue } from 'jotai'
|
||||
|
||||
import { extensionManager } from '@/extension'
|
||||
import Extension from '@/extension/Extension'
|
||||
import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom'
|
||||
|
||||
type Props = {
|
||||
item: Extension
|
||||
}
|
||||
|
||||
const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
|
||||
const [compatibility, setCompatibility] = useState<Compatibility | undefined>(
|
||||
undefined
|
||||
)
|
||||
const [installState, setInstallState] =
|
||||
useState<InstallationState>('NotRequired')
|
||||
const installingExtensions = useAtomValue(installingExtensionAtom)
|
||||
const [isGpuSupported, setIsGpuSupported] = useState<boolean>(false)
|
||||
|
||||
const isInstalling = installingExtensions.some(
|
||||
(e) => e.extensionId === item.name
|
||||
)
|
||||
|
||||
const progress = isInstalling
|
||||
? installingExtensions.find((e) => e.extensionId === item.name)
|
||||
?.percentage ?? -1
|
||||
: -1
|
||||
|
||||
useEffect(() => {
|
||||
const getSystemInfos = async () => {
|
||||
const info = await systemInformations()
|
||||
if (!info) {
|
||||
setIsGpuSupported(false)
|
||||
return
|
||||
}
|
||||
|
||||
const gpuSettings: GpuSetting | undefined = info.gpuSetting
|
||||
if (!gpuSettings || gpuSettings.gpus.length === 0) {
|
||||
setIsGpuSupported(false)
|
||||
return
|
||||
}
|
||||
|
||||
const arch = gpuSettings.gpus[0].arch
|
||||
if (!arch) {
|
||||
setIsGpuSupported(false)
|
||||
return
|
||||
}
|
||||
|
||||
const supportedGpuArch = ['turing', 'ampere', 'ada']
|
||||
setIsGpuSupported(supportedGpuArch.includes(arch))
|
||||
}
|
||||
getSystemInfos()
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
const getExtensionInstallationState = async () => {
|
||||
const extension = extensionManager.get(item.name ?? '')
|
||||
if (!extension) return
|
||||
|
||||
if (typeof extension?.installationState === 'function') {
|
||||
const installState = await extension.installationState()
|
||||
setInstallState(installState)
|
||||
}
|
||||
}
|
||||
|
||||
getExtensionInstallationState()
|
||||
}, [item.name, isInstalling])
|
||||
|
||||
useEffect(() => {
|
||||
const extension = extensionManager.get(item.name ?? '')
|
||||
if (!extension) return
|
||||
setCompatibility(extension.compatibility())
|
||||
}, [setCompatibility, item.name])
|
||||
|
||||
const onInstallClick = useCallback(async () => {
|
||||
const extension = extensionManager.get(item.name ?? '')
|
||||
if (!extension) return
|
||||
|
||||
await extension.install()
|
||||
}, [item.name])
|
||||
|
||||
const onCancelInstallingClick = () => {
|
||||
const extension = installingExtensions.find(
|
||||
(e) => e.extensionId === item.name
|
||||
)
|
||||
if (extension?.localPath) {
|
||||
abortDownload(extension.localPath)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none">
|
||||
<div className="flex-1 flex-shrink-0 space-y-1.5">
|
||||
<div className="flex items-center gap-x-2">
|
||||
<h6 className="text-sm font-semibold capitalize">
|
||||
TensorRT-LLM Extension
|
||||
</h6>
|
||||
<p className="whitespace-pre-wrap text-sm font-semibold leading-relaxed">
|
||||
v{item.version}
|
||||
</p>
|
||||
</div>
|
||||
<p className="whitespace-pre-wrap leading-relaxed">
|
||||
{item.description}
|
||||
</p>
|
||||
</div>
|
||||
{(!compatibility || compatibility['platform']?.includes(PLATFORM)) &&
|
||||
isGpuSupported ? (
|
||||
<div className="flex min-w-[150px] flex-row justify-end">
|
||||
<InstallStateIndicator
|
||||
installProgress={progress}
|
||||
installState={installState}
|
||||
onInstallClick={onInstallClick}
|
||||
onCancelClick={onCancelInstallingClick}
|
||||
/>
|
||||
</div>
|
||||
) : (
|
||||
<div className="rounded-md bg-secondary px-3 py-1.5 text-sm font-semibold text-gray-400">
|
||||
<div className="flex flex-row items-center justify-center gap-1">
|
||||
Incompatible{' '}
|
||||
<Tooltip>
|
||||
<TooltipTrigger className="w-full">
|
||||
<InfoCircledIcon />
|
||||
</TooltipTrigger>
|
||||
<TooltipPortal>
|
||||
<TooltipContent side="top">
|
||||
{compatibility ? (
|
||||
<span>
|
||||
Only available on{' '}
|
||||
{compatibility?.platform
|
||||
?.map((e: string) =>
|
||||
e === 'win32'
|
||||
? 'Windows'
|
||||
: e === 'linux'
|
||||
? 'Linux'
|
||||
: 'MacOS'
|
||||
)
|
||||
.join(', ')}
|
||||
</span>
|
||||
) : (
|
||||
<span>
|
||||
Your GPUs are not compatible with this extension
|
||||
</span>
|
||||
)}
|
||||
<TooltipArrow />
|
||||
</TooltipContent>
|
||||
</TooltipPortal>
|
||||
</Tooltip>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
type InstallStateProps = {
|
||||
installProgress: number
|
||||
installState: InstallationState
|
||||
onInstallClick: () => void
|
||||
onCancelClick: () => void
|
||||
}
|
||||
|
||||
const InstallStateIndicator: React.FC<InstallStateProps> = ({
|
||||
installProgress,
|
||||
installState,
|
||||
onInstallClick,
|
||||
onCancelClick,
|
||||
}) => {
|
||||
// TODO: NamH support dark mode for this
|
||||
if (installProgress !== -1) {
|
||||
const progress = installProgress * 100
|
||||
return (
|
||||
<div className="flex h-10 flex-row items-center justify-center space-x-2 rounded-md bg-[#EFF8FF] px-4 text-primary">
|
||||
<button onClick={onCancelClick} className="font-semibold text-primary">
|
||||
Cancel
|
||||
</button>
|
||||
<div className="flex w-[113px] flex-row items-center justify-center space-x-2 rounded-md bg-[#D1E9FF] px-2 py-[2px]">
|
||||
<Progress className="h-1 w-[69px]" value={progress} />
|
||||
<span className="text-xs font-bold text-primary">
|
||||
{progress.toFixed(0)}%
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// TODO: NamH check for dark mode here
|
||||
switch (installState) {
|
||||
case 'Installed':
|
||||
return (
|
||||
<div className="rounded-md bg-secondary px-3 py-1.5 text-sm font-semibold text-gray-400">
|
||||
Installed
|
||||
</div>
|
||||
)
|
||||
case 'NotInstalled':
|
||||
return (
|
||||
<Button themes="secondaryBlue" size="sm" onClick={onInstallClick}>
|
||||
Install
|
||||
</Button>
|
||||
)
|
||||
default:
|
||||
return <div></div>
|
||||
}
|
||||
}
|
||||
|
||||
export default TensorRtExtensionItem
|
||||
@ -4,13 +4,18 @@ import React, { useState, useEffect, useRef } from 'react'
|
||||
|
||||
import { Button, ScrollArea } from '@janhq/uikit'
|
||||
|
||||
import Loader from '@/containers/Loader'
|
||||
|
||||
import { formatExtensionsName } from '@/utils/converter'
|
||||
|
||||
import TensorRtExtensionItem from './TensorRtExtensionItem'
|
||||
|
||||
import { extensionManager } from '@/extension'
|
||||
import Extension from '@/extension/Extension'
|
||||
|
||||
const ExtensionCatalog = () => {
|
||||
const [activeExtensions, setActiveExtensions] = useState<Extension[]>([])
|
||||
const [showLoading, setShowLoading] = useState(false)
|
||||
const fileInputRef = useRef<HTMLInputElement | null>(null)
|
||||
/**
|
||||
* Fetches the active extensions and their preferences from the `extensions` and `preferences` modules.
|
||||
@ -63,65 +68,76 @@ const ExtensionCatalog = () => {
|
||||
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const file = event.target.files?.[0]
|
||||
if (file) {
|
||||
setShowLoading(true)
|
||||
install(event)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<ScrollArea className="h-full w-full px-4">
|
||||
<div className="block w-full">
|
||||
{activeExtensions.map((item, i) => {
|
||||
return (
|
||||
<div
|
||||
key={i}
|
||||
className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none"
|
||||
>
|
||||
<div className="w-4/5 flex-shrink-0 space-y-1.5">
|
||||
<div className="flex gap-x-2">
|
||||
<h6 className="text-sm font-semibold capitalize">
|
||||
{formatExtensionsName(item.name ?? item.description ?? '')}
|
||||
</h6>
|
||||
<p className="whitespace-pre-wrap font-semibold leading-relaxed ">
|
||||
v{item.version}
|
||||
<>
|
||||
<ScrollArea className="h-full w-full px-4">
|
||||
<div className="block w-full">
|
||||
{activeExtensions.map((item, i) => {
|
||||
// TODO: this is bad code, rewrite it
|
||||
if (item.name === '@janhq/tensorrt-llm-extension') {
|
||||
return <TensorRtExtensionItem key={i} item={item} />
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
key={i}
|
||||
className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none"
|
||||
>
|
||||
<div className="w-4/5 flex-shrink-0 space-y-1.5">
|
||||
<div className="flex items-center gap-x-2">
|
||||
<h6 className="text-sm font-semibold capitalize">
|
||||
{formatExtensionsName(
|
||||
item.name ?? item.description ?? ''
|
||||
)}
|
||||
</h6>
|
||||
<p className="whitespace-pre-wrap text-sm font-semibold leading-relaxed ">
|
||||
v{item.version}
|
||||
</p>
|
||||
</div>
|
||||
<p className="whitespace-pre-wrap leading-relaxed ">
|
||||
{item.description}
|
||||
</p>
|
||||
</div>
|
||||
<p className="whitespace-pre-wrap leading-relaxed ">
|
||||
{item.description}
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
{/* Manual Installation */}
|
||||
<div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-0 last:border-none">
|
||||
<div className="w-4/5 flex-shrink-0 space-y-1.5">
|
||||
<div className="flex gap-x-2">
|
||||
<h6 className="text-sm font-semibold capitalize">
|
||||
Manual Installation
|
||||
</h6>
|
||||
</div>
|
||||
<p className="whitespace-pre-wrap leading-relaxed ">
|
||||
Select a extension file to install (.tgz)
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
{/* Manual Installation */}
|
||||
<div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-0 last:border-none">
|
||||
<div className="w-4/5 flex-shrink-0 space-y-1.5">
|
||||
<div className="flex gap-x-2">
|
||||
<h6 className="text-sm font-semibold capitalize">
|
||||
Manual Installation
|
||||
</h6>
|
||||
<div>
|
||||
<input
|
||||
type="file"
|
||||
style={{ display: 'none' }}
|
||||
ref={fileInputRef}
|
||||
onChange={handleFileChange}
|
||||
/>
|
||||
<Button
|
||||
themes="secondaryBlue"
|
||||
size="sm"
|
||||
onClick={() => fileInputRef.current?.click()}
|
||||
>
|
||||
Select
|
||||
</Button>
|
||||
</div>
|
||||
<p className="whitespace-pre-wrap leading-relaxed ">
|
||||
Select a extension file to install (.tgz)
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<input
|
||||
type="file"
|
||||
style={{ display: 'none' }}
|
||||
ref={fileInputRef}
|
||||
onChange={handleFileChange}
|
||||
/>
|
||||
<Button
|
||||
themes="secondaryBlue"
|
||||
size="sm"
|
||||
onClick={() => fileInputRef.current?.click()}
|
||||
>
|
||||
Select
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</ScrollArea>
|
||||
</ScrollArea>
|
||||
{showLoading && <Loader description="Installing..." />}
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
24
web/services/appService.ts
Normal file
24
web/services/appService.ts
Normal file
@ -0,0 +1,24 @@
|
||||
import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
|
||||
|
||||
import { toaster } from '@/containers/Toast'
|
||||
|
||||
import { extensionManager } from '@/extension'
|
||||
|
||||
export const appService = {
|
||||
systemInformations: async () => {
|
||||
const gpuSetting = await extensionManager
|
||||
?.get<MonitoringExtension>(ExtensionTypeEnum.SystemMonitoring)
|
||||
?.getGpuSetting()
|
||||
|
||||
return {
|
||||
gpuSetting,
|
||||
// TODO: Other system information
|
||||
}
|
||||
},
|
||||
showToast: (title: string, description: string) => {
|
||||
toaster({
|
||||
title,
|
||||
description: description,
|
||||
})
|
||||
},
|
||||
}
|
||||
@ -1,5 +1,7 @@
|
||||
import { appService } from './appService'
|
||||
import { EventEmitter } from './eventsService'
|
||||
import { restAPI } from './restService'
|
||||
|
||||
export const setupCoreServices = () => {
|
||||
if (typeof window === 'undefined') {
|
||||
console.debug('undefine', window)
|
||||
@ -10,7 +12,10 @@ export const setupCoreServices = () => {
|
||||
if (!window.core) {
|
||||
window.core = {
|
||||
events: new EventEmitter(),
|
||||
api: window.electronAPI ?? restAPI,
|
||||
api: {
|
||||
...(window.electronAPI ? window.electronAPI : restAPI),
|
||||
...appService,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
1
web/types/index.d.ts
vendored
1
web/types/index.d.ts
vendored
@ -11,6 +11,7 @@ declare global {
|
||||
declare const isMac: boolean
|
||||
declare const isWindows: boolean
|
||||
declare const isLinux: boolean
|
||||
declare const PLATFORM: string
|
||||
interface Core {
|
||||
api: APIFunctions
|
||||
events: EventEmitter
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user