diff --git a/.gitignore b/.gitignore
index ae0691605..d9787d87b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,16 +22,16 @@ package-lock.json
core/lib/**
# Nitro binary files
-extensions/inference-nitro-extension/bin/*/nitro
-extensions/inference-nitro-extension/bin/*/*.metal
-extensions/inference-nitro-extension/bin/*/*.exe
-extensions/inference-nitro-extension/bin/*/*.dll
-extensions/inference-nitro-extension/bin/*/*.exp
-extensions/inference-nitro-extension/bin/*/*.lib
-extensions/inference-nitro-extension/bin/saved-*
-extensions/inference-nitro-extension/bin/*.tar.gz
-extensions/inference-nitro-extension/bin/vulkaninfoSDK.exe
-extensions/inference-nitro-extension/bin/vulkaninfo
+extensions/*-extension/bin/*/nitro
+extensions/*-extension/bin/*/*.metal
+extensions/*-extension/bin/*/*.exe
+extensions/*-extension/bin/*/*.dll
+extensions/*-extension/bin/*/*.exp
+extensions/*-extension/bin/*/*.lib
+extensions/*-extension/bin/saved-*
+extensions/*-extension/bin/*.tar.gz
+extensions/*-extension/bin/vulkaninfoSDK.exe
+extensions/*-extension/bin/vulkaninfo
# Turborepo
diff --git a/README.md b/README.md
index bc206f4eb..adebb8ea1 100644
--- a/README.md
+++ b/README.md
@@ -76,31 +76,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
| Experimental (Nightly Build) |
-
+
jan.exe
|
-
+
Intel
|
-
+
M1/M2
|
-
+
jan.deb
|
-
+
jan.AppImage
diff --git a/core/package.json b/core/package.json
index 2bf3e1735..2f4f6b576 100644
--- a/core/package.json
+++ b/core/package.json
@@ -45,11 +45,12 @@
"start": "rollup -c rollup.config.ts -w"
},
"devDependencies": {
- "jest": "^29.7.0",
"@types/jest": "^29.5.12",
"@types/node": "^12.0.2",
- "eslint-plugin-jest": "^27.9.0",
"eslint": "8.57.0",
+ "eslint-plugin-jest": "^27.9.0",
+ "jest": "^29.7.0",
+ "rimraf": "^3.0.2",
"rollup": "^2.38.5",
"rollup-plugin-commonjs": "^9.1.8",
"rollup-plugin-json": "^3.1.0",
@@ -58,7 +59,10 @@
"rollup-plugin-typescript2": "^0.36.0",
"ts-jest": "^29.1.2",
"tslib": "^2.6.2",
- "typescript": "^5.3.3",
- "rimraf": "^3.0.2"
+ "typescript": "^5.3.3"
+ },
+ "dependencies": {
+ "rxjs": "^7.8.1",
+ "ulid": "^2.3.0"
}
}
diff --git a/core/rollup.config.ts b/core/rollup.config.ts
index ebea8e237..95305bf25 100644
--- a/core/rollup.config.ts
+++ b/core/rollup.config.ts
@@ -64,7 +64,7 @@ export default [
// Allow json resolution
json(),
// Compile TypeScript files
- typescript({ useTsconfigDeclarationDir: true }),
+ typescript({ useTsconfigDeclarationDir: true, exclude: ['src/*.ts', 'src/extensions/**'] }),
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
commonjs(),
// Allow node_modules resolution, so you can use 'external' to control
diff --git a/core/src/api/index.ts b/core/src/api/index.ts
index e62b49087..f97593934 100644
--- a/core/src/api/index.ts
+++ b/core/src/api/index.ts
@@ -33,6 +33,8 @@ export enum AppRoute {
stopServer = 'stopServer',
log = 'log',
logServer = 'logServer',
+ systemInformations = 'systemInformations',
+ showToast = 'showToast',
}
export enum AppEvent {
@@ -56,6 +58,7 @@ export enum DownloadEvent {
onFileDownloadUpdate = 'onFileDownloadUpdate',
onFileDownloadError = 'onFileDownloadError',
onFileDownloadSuccess = 'onFileDownloadSuccess',
+ onFileUnzipSuccess = 'onFileUnzipSuccess',
}
export enum LocalImportModelEvent {
diff --git a/core/src/core.ts b/core/src/core.ts
index 6e2442c2b..b8cbd3162 100644
--- a/core/src/core.ts
+++ b/core/src/core.ts
@@ -1,4 +1,4 @@
-import { FileStat } from './types'
+import { DownloadRequest, FileStat, NetworkConfig } from './types'
/**
* Execute a extension module function in main process
@@ -17,18 +17,16 @@ const executeOnMain: (extension: string, method: string, ...args: any[]) => Prom
/**
* Downloads a file from a URL and saves it to the local file system.
- * @param {string} url - The URL of the file to download.
- * @param {string} fileName - The name to use for the downloaded file.
- * @param {object} network - Optional object to specify proxy/whether to ignore SSL certificates.
+ *
+ * @param {DownloadRequest} downloadRequest - The request to download the file.
+ * @param {NetworkConfig} network - Optional object to specify proxy/whether to ignore SSL certificates.
+ *
* @returns {Promise} A promise that resolves when the file is downloaded.
*/
-const downloadFile: (
- url: string,
- fileName: string,
- network?: { proxy?: string; ignoreSSL?: boolean }
-) => Promise = (url, fileName, network) => {
- return global.core?.api?.downloadFile(url, fileName, network)
-}
+const downloadFile: (downloadRequest: DownloadRequest, network?: NetworkConfig) => Promise = (
+ downloadRequest,
+ network
+) => global.core?.api?.downloadFile(downloadRequest, network)
/**
* Aborts the download of a specific file.
@@ -108,6 +106,20 @@ const log: (message: string, fileName?: string) => void = (message, fileName) =>
const isSubdirectory: (from: string, to: string) => Promise = (from: string, to: string) =>
global.core.api?.isSubdirectory(from, to)
+/**
+ * Get system information
+ * @returns {Promise} - A promise that resolves with the system information.
+ */
+const systemInformations: () => Promise = () => global.core.api?.systemInformations()
+
+/**
+ * Show toast message from browser processes.
+ * @param title
+ * @param message
+ * @returns
+ */
+const showToast: (title: string, message: string) => void = (title, message) =>
+ global.core.api?.showToast(title, message)
/**
* Register extension point function type definition
*/
@@ -134,5 +146,7 @@ export {
log,
isSubdirectory,
getUserHomePath,
+ systemInformations,
+ showToast,
FileStat,
}
diff --git a/core/src/extension.ts b/core/src/extension.ts
index 3b3edc7b3..22accb4b4 100644
--- a/core/src/extension.ts
+++ b/core/src/extension.ts
@@ -10,6 +10,22 @@ export enum ExtensionTypeEnum {
export interface ExtensionType {
type(): ExtensionTypeEnum | undefined
}
+
+export interface Compatibility {
+ platform: string[]
+ version: string
+}
+
+const ALL_INSTALLATION_STATE = [
+ 'NotRequired', // not required.
+ 'Installed', // require and installed. Good to go.
+ 'NotInstalled', // require to be installed.
+ 'Corrupted', // require but corrupted. Need to redownload.
+] as const
+
+export type InstallationStateTuple = typeof ALL_INSTALLATION_STATE
+export type InstallationState = InstallationStateTuple[number]
+
/**
* Represents a base extension.
* This class should be extended by any class that represents an extension.
@@ -33,4 +49,32 @@ export abstract class BaseExtension implements ExtensionType {
* Any cleanup logic for the extension should be put here.
*/
abstract onUnload(): void
+
+ /**
+ * The compatibility of the extension.
+ * This is used to check if the extension is compatible with the current environment.
+ * @property {Array} platform
+ */
+ compatibility(): Compatibility | undefined {
+ return undefined
+ }
+
+ /**
+ * Determine if the prerequisites for the extension are installed.
+ *
+ * @returns {boolean} true if the prerequisites are installed, false otherwise.
+ */
+ async installationState(): Promise {
+ return 'NotRequired'
+ }
+
+ /**
+ * Install the prerequisites for the extension.
+ *
+ * @returns {Promise}
+ */
+ // @ts-ignore
+ async install(...args): Promise {
+ return
+ }
}
diff --git a/core/src/extensions/ai-engines/AIEngine.ts b/core/src/extensions/ai-engines/AIEngine.ts
new file mode 100644
index 000000000..608b5c193
--- /dev/null
+++ b/core/src/extensions/ai-engines/AIEngine.ts
@@ -0,0 +1,60 @@
+import { getJanDataFolderPath, joinPath } from '../../core'
+import { events } from '../../events'
+import { BaseExtension } from '../../extension'
+import { fs } from '../../fs'
+import { Model, ModelEvent } from '../../types'
+
+/**
+ * Base AIEngine
+ * Applicable to all AI Engines
+ */
+export abstract class AIEngine extends BaseExtension {
+ // The inference engine
+ abstract provider: string
+ // The model folder
+ modelFolder: string = 'models'
+
+ abstract models(): Promise
+
+ /**
+ * On extension load, subscribe to events.
+ */
+ onLoad() {
+ this.prePopulateModels()
+ }
+
+ /**
+ * Pre-populate models to App Data Folder
+ */
+ prePopulateModels(): Promise {
+ return this.models().then((models) => {
+ const prePoluateOperations = models.map((model) =>
+ getJanDataFolderPath()
+ .then((janDataFolder) =>
+ // Attempt to create the model folder
+ joinPath([janDataFolder, this.modelFolder, model.id]).then((path) =>
+ fs
+ .mkdirSync(path)
+ .catch()
+ .then(() => path)
+ )
+ )
+ .then((path) => joinPath([path, 'model.json']))
+ .then((path) => {
+ // Do not overwite existing model.json
+ return fs.existsSync(path).then((exist: any) => {
+ if (!exist) return fs.writeFileSync(path, JSON.stringify(model, null, 2))
+ })
+ })
+ .catch((e: Error) => {
+ console.error('Error', e)
+ })
+ )
+ Promise.all(prePoluateOperations).then(() =>
+ // Emit event to update models
+ // So the UI can update the models list
+ events.emit(ModelEvent.OnModelsUpdate, {})
+ )
+ })
+ }
+}
diff --git a/core/src/extensions/ai-engines/LocalOAIEngine.ts b/core/src/extensions/ai-engines/LocalOAIEngine.ts
new file mode 100644
index 000000000..79dbcbf5e
--- /dev/null
+++ b/core/src/extensions/ai-engines/LocalOAIEngine.ts
@@ -0,0 +1,63 @@
+import { executeOnMain, getJanDataFolderPath, joinPath } from '../../core'
+import { events } from '../../events'
+import { Model, ModelEvent } from '../../types'
+import { OAIEngine } from './OAIEngine'
+
+/**
+ * Base OAI Local Inference Provider
+ * Added the implementation of loading and unloading model (applicable to local inference providers)
+ */
+export abstract class LocalOAIEngine extends OAIEngine {
+ // The inference engine
+ loadModelFunctionName: string = 'loadModel'
+ unloadModelFunctionName: string = 'unloadModel'
+ isRunning: boolean = false
+
+ /**
+ * On extension load, subscribe to events.
+ */
+ onLoad() {
+ super.onLoad()
+ // These events are applicable to local inference providers
+ events.on(ModelEvent.OnModelInit, (model: Model) => this.onModelInit(model))
+ events.on(ModelEvent.OnModelStop, (model: Model) => this.onModelStop(model))
+ }
+
+ /**
+ * Load the model.
+ */
+ async onModelInit(model: Model) {
+ if (model.engine.toString() !== this.provider) return
+
+ const modelFolder = await joinPath([await getJanDataFolderPath(), this.modelFolder, model.id])
+
+ const res = await executeOnMain(this.nodeModule, this.loadModelFunctionName, {
+ modelFolder,
+ model,
+ })
+
+ if (res?.error) {
+ events.emit(ModelEvent.OnModelFail, {
+ ...model,
+ error: res.error,
+ })
+ return
+ } else {
+ this.loadedModel = model
+ events.emit(ModelEvent.OnModelReady, model)
+ this.isRunning = true
+ }
+ }
+ /**
+ * Stops the model.
+ */
+ onModelStop(model: Model) {
+ if (model.engine?.toString() !== this.provider) return
+
+ this.isRunning = false
+
+ executeOnMain(this.nodeModule, this.unloadModelFunctionName).then(() => {
+ events.emit(ModelEvent.OnModelStopped, {})
+ })
+ }
+}
diff --git a/core/src/extensions/ai-engines/OAIEngine.ts b/core/src/extensions/ai-engines/OAIEngine.ts
new file mode 100644
index 000000000..3e583c9b9
--- /dev/null
+++ b/core/src/extensions/ai-engines/OAIEngine.ts
@@ -0,0 +1,116 @@
+import { requestInference } from './helpers/sse'
+import { ulid } from 'ulid'
+import { AIEngine } from './AIEngine'
+import {
+ ChatCompletionRole,
+ ContentType,
+ InferenceEvent,
+ MessageEvent,
+ MessageRequest,
+ MessageRequestType,
+ MessageStatus,
+ Model,
+ ModelInfo,
+ ThreadContent,
+ ThreadMessage,
+} from '../../types'
+import { events } from '../../events'
+
+/**
+ * Base OAI Inference Provider
+ * Applicable to all OAI compatible inference providers
+ */
+export abstract class OAIEngine extends AIEngine {
+ // The inference engine
+ abstract inferenceUrl: string
+ abstract nodeModule: string
+
+ // Controller to handle stop requests
+ controller = new AbortController()
+ isCancelled = false
+
+ // The loaded model instance
+ loadedModel: Model | undefined
+
+ /**
+ * On extension load, subscribe to events.
+ */
+ onLoad() {
+ super.onLoad()
+ events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data))
+ events.on(InferenceEvent.OnInferenceStopped, () => this.onInferenceStopped())
+ }
+
+ /**
+ * On extension unload
+ */
+ onUnload(): void {}
+
+ /*
+ * Inference request
+ */
+ inference(data: MessageRequest) {
+ if (data.model?.engine?.toString() !== this.provider) return
+
+ const timestamp = Date.now()
+ const message: ThreadMessage = {
+ id: ulid(),
+ thread_id: data.threadId,
+ type: data.type,
+ assistant_id: data.assistantId,
+ role: ChatCompletionRole.Assistant,
+ content: [],
+ status: MessageStatus.Pending,
+ created: timestamp,
+ updated: timestamp,
+ object: 'thread.message',
+ }
+
+ if (data.type !== MessageRequestType.Summary) {
+ events.emit(MessageEvent.OnMessageResponse, message)
+ }
+
+ this.isCancelled = false
+ this.controller = new AbortController()
+
+ const model: ModelInfo = {
+ ...(this.loadedModel ? this.loadedModel : {}),
+ ...data.model,
+ }
+
+ requestInference(this.inferenceUrl, data.messages ?? [], model, this.controller).subscribe({
+ next: (content: any) => {
+ const messageContent: ThreadContent = {
+ type: ContentType.Text,
+ text: {
+ value: content.trim(),
+ annotations: [],
+ },
+ }
+ message.content = [messageContent]
+ events.emit(MessageEvent.OnMessageUpdate, message)
+ },
+ complete: async () => {
+ message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error
+ events.emit(MessageEvent.OnMessageUpdate, message)
+ },
+ error: async (err: any) => {
+ if (this.isCancelled || message.content.length) {
+ message.status = MessageStatus.Stopped
+ events.emit(MessageEvent.OnMessageUpdate, message)
+ return
+ }
+ message.status = MessageStatus.Error
+ events.emit(MessageEvent.OnMessageUpdate, message)
+ },
+ })
+ }
+
+ /**
+ * Stops the inference.
+ */
+ onInferenceStopped() {
+ this.isCancelled = true
+ this.controller?.abort()
+ }
+}
diff --git a/core/src/extensions/ai-engines/helpers/sse.ts b/core/src/extensions/ai-engines/helpers/sse.ts
new file mode 100644
index 000000000..3d810d934
--- /dev/null
+++ b/core/src/extensions/ai-engines/helpers/sse.ts
@@ -0,0 +1,67 @@
+import { Observable } from 'rxjs'
+import { ModelRuntimeParams } from '../../../types'
+/**
+ * Sends a request to the inference server to generate a response based on the recent messages.
+ * @param recentMessages - An array of recent messages to use as context for the inference.
+ * @returns An Observable that emits the generated response as a string.
+ */
+export function requestInference(
+ inferenceUrl: string,
+ recentMessages: any[],
+ model: {
+ id: string
+ parameters: ModelRuntimeParams
+ },
+ controller?: AbortController
+): Observable {
+ return new Observable((subscriber) => {
+ const requestBody = JSON.stringify({
+ messages: recentMessages,
+ model: model.id,
+ stream: true,
+ ...model.parameters,
+ })
+ fetch(inferenceUrl, {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Access-Control-Allow-Origin': '*',
+ 'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
+ },
+ body: requestBody,
+ signal: controller?.signal,
+ })
+ .then(async (response) => {
+ if (model.parameters.stream === false) {
+ const data = await response.json()
+ subscriber.next(data.choices[0]?.message?.content ?? '')
+ } else {
+ const stream = response.body
+ const decoder = new TextDecoder('utf-8')
+ const reader = stream?.getReader()
+ let content = ''
+
+ while (true && reader) {
+ const { done, value } = await reader.read()
+ if (done) {
+ break
+ }
+ const text = decoder.decode(value)
+ const lines = text.trim().split('\n')
+ for (const line of lines) {
+ if (line.startsWith('data: ') && !line.includes('data: [DONE]')) {
+ const data = JSON.parse(line.replace('data: ', ''))
+ content += data.choices[0]?.delta?.content ?? ''
+ if (content.startsWith('assistant: ')) {
+ content = content.replace('assistant: ', '')
+ }
+ subscriber.next(content)
+ }
+ }
+ }
+ }
+ subscriber.complete()
+ })
+ .catch((err) => subscriber.error(err))
+ })
+}
diff --git a/core/src/extensions/ai-engines/index.ts b/core/src/extensions/ai-engines/index.ts
new file mode 100644
index 000000000..f4da62a7c
--- /dev/null
+++ b/core/src/extensions/ai-engines/index.ts
@@ -0,0 +1,3 @@
+export * from './AIEngine'
+export * from './OAIEngine'
+export * from './LocalOAIEngine'
diff --git a/core/src/extensions/index.ts b/core/src/extensions/index.ts
index c6834482c..c049f3b3a 100644
--- a/core/src/extensions/index.ts
+++ b/core/src/extensions/index.ts
@@ -28,3 +28,8 @@ export { ModelExtension } from './model'
* Hugging Face extension for converting HF models to GGUF.
*/
export { HuggingFaceExtension } from './huggingface'
+
+/**
+ * Base AI Engines.
+ */
+export * from './ai-engines'
diff --git a/core/src/extensions/model.ts b/core/src/extensions/model.ts
index 79202398b..33eec0afc 100644
--- a/core/src/extensions/model.ts
+++ b/core/src/extensions/model.ts
@@ -1,5 +1,5 @@
import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import { ImportingModel, Model, ModelInterface, OptionType } from '../index'
+import { GpuSetting, ImportingModel, Model, ModelInterface, OptionType } from '../index'
/**
* Model extension for managing models.
@@ -14,6 +14,7 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
abstract downloadModel(
model: Model,
+ gpuSettings?: GpuSetting,
network?: { proxy: string; ignoreSSL?: boolean }
): Promise
abstract cancelModelDownload(modelId: string): Promise
diff --git a/core/src/extensions/monitoring.ts b/core/src/extensions/monitoring.ts
index ba193f0f4..8d61580fc 100644
--- a/core/src/extensions/monitoring.ts
+++ b/core/src/extensions/monitoring.ts
@@ -1,5 +1,5 @@
import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import { MonitoringInterface } from '../index'
+import { GpuSetting, MonitoringInterface } from '../index'
/**
* Monitoring extension for system monitoring.
@@ -13,6 +13,7 @@ export abstract class MonitoringExtension extends BaseExtension implements Monit
return ExtensionTypeEnum.SystemMonitoring
}
+ abstract getGpuSetting(): Promise
abstract getResourcesInfo(): Promise
abstract getCurrentLoad(): Promise
}
diff --git a/core/src/node/api/processors/download.ts b/core/src/node/api/processors/download.ts
index 4ddeff160..8e8e08f2f 100644
--- a/core/src/node/api/processors/download.ts
+++ b/core/src/node/api/processors/download.ts
@@ -5,7 +5,7 @@ import { getJanDataFolderPath } from '../../helper'
import { DownloadManager } from '../../helper/download'
import { createWriteStream, renameSync } from 'fs'
import { Processor } from './Processor'
-import { DownloadState } from '../../../types'
+import { DownloadRequest, DownloadState, NetworkConfig } from '../../../types'
export class Downloader implements Processor {
observer?: Function
@@ -20,24 +20,27 @@ export class Downloader implements Processor {
return func(this.observer, ...args)
}
- downloadFile(observer: any, url: string, localPath: string, network: any) {
+ downloadFile(observer: any, downloadRequest: DownloadRequest, network?: NetworkConfig) {
const request = require('request')
const progress = require('request-progress')
const strictSSL = !network?.ignoreSSL
const proxy = network?.proxy?.startsWith('http') ? network.proxy : undefined
+
+ const { localPath, url } = downloadRequest
+ let normalizedPath = localPath
if (typeof localPath === 'string') {
- localPath = normalizeFilePath(localPath)
+ normalizedPath = normalizeFilePath(localPath)
}
- const array = localPath.split(sep)
+ const array = normalizedPath.split(sep)
const fileName = array.pop() ?? ''
const modelId = array.pop() ?? ''
- const destination = resolve(getJanDataFolderPath(), localPath)
+ const destination = resolve(getJanDataFolderPath(), normalizedPath)
const rq = request({ url, strictSSL, proxy })
// Put request to download manager instance
- DownloadManager.instance.setRequest(localPath, rq)
+ DownloadManager.instance.setRequest(normalizedPath, rq)
// Downloading file to a temp file first
const downloadingTempFile = `${destination}.download`
@@ -56,16 +59,25 @@ export class Downloader implements Processor {
total: 0,
transferred: 0,
},
+ children: [],
downloadState: 'downloading',
+ extensionId: downloadRequest.extensionId,
+ downloadType: downloadRequest.downloadType,
+ localPath: normalizedPath,
}
DownloadManager.instance.downloadProgressMap[modelId] = initialDownloadState
+ if (downloadRequest.downloadType === 'extension') {
+ observer?.(DownloadEvent.onFileDownloadUpdate, initialDownloadState)
+ }
+
progress(rq, {})
.on('progress', (state: any) => {
+ const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
const downloadState: DownloadState = {
+ ...currentDownloadState,
...state,
- modelId,
- fileName,
+ fileName: fileName,
downloadState: 'downloading',
}
console.debug('progress: ', downloadState)
@@ -76,22 +88,22 @@ export class Downloader implements Processor {
const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
const downloadState: DownloadState = {
...currentDownloadState,
+ fileName: fileName,
error: error.message,
downloadState: 'error',
}
- if (currentDownloadState) {
- DownloadManager.instance.downloadProgressMap[modelId] = downloadState
- }
observer?.(DownloadEvent.onFileDownloadError, downloadState)
+ DownloadManager.instance.downloadProgressMap[modelId] = downloadState
})
.on('end', () => {
const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
- if (currentDownloadState && DownloadManager.instance.networkRequests[localPath]) {
+ if (currentDownloadState && DownloadManager.instance.networkRequests[normalizedPath]) {
// Finished downloading, rename temp file to actual file
renameSync(downloadingTempFile, destination)
const downloadState: DownloadState = {
...currentDownloadState,
+ fileName: fileName,
downloadState: 'end',
}
observer?.(DownloadEvent.onFileDownloadSuccess, downloadState)
diff --git a/core/src/node/api/restful/helper/builder.ts b/core/src/node/api/restful/helper/builder.ts
index 7001c0c76..6b9bbb3a8 100644
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@@ -1,7 +1,16 @@
-import fs from 'fs'
+import {
+ existsSync,
+ readdirSync,
+ readFileSync,
+ writeFileSync,
+ mkdirSync,
+ appendFileSync,
+ createWriteStream,
+ rmdirSync,
+} from 'fs'
import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
import { join } from 'path'
-import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../index'
+import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../types'
import { getEngineConfiguration, getJanDataFolderPath } from '../../../helper'
import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
@@ -9,12 +18,12 @@ import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
export const getBuilder = async (configuration: RouteConfiguration) => {
const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
try {
- if (!fs.existsSync(directoryPath)) {
+ if (!existsSync(directoryPath)) {
console.debug('model folder not found')
return []
}
- const files: string[] = fs.readdirSync(directoryPath)
+ const files: string[] = readdirSync(directoryPath)
const allDirectories: string[] = []
for (const file of files) {
@@ -46,8 +55,8 @@ export const getBuilder = async (configuration: RouteConfiguration) => {
}
const readModelMetadata = (path: string): string | undefined => {
- if (fs.existsSync(path)) {
- return fs.readFileSync(path, 'utf-8')
+ if (existsSync(path)) {
+ return readFileSync(path, 'utf-8')
} else {
return undefined
}
@@ -81,7 +90,7 @@ export const deleteBuilder = async (configuration: RouteConfiguration, id: strin
}
const objectPath = join(directoryPath, id)
- fs.rmdirSync(objectPath, { recursive: true })
+ rmdirSync(objectPath, { recursive: true })
return {
id: id,
object: configuration.delete.object,
@@ -96,20 +105,19 @@ export const getMessages = async (threadId: string): Promise =>
const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
const messageFile = 'messages.jsonl'
try {
- const files: string[] = fs.readdirSync(threadDirPath)
+ const files: string[] = readdirSync(threadDirPath)
if (!files.includes(messageFile)) {
console.error(`${threadDirPath} not contains message file`)
return []
}
const messageFilePath = join(threadDirPath, messageFile)
- if (!fs.existsSync(messageFilePath)) {
+ if (!existsSync(messageFilePath)) {
console.debug('message file not found')
return []
}
- const lines = fs
- .readFileSync(messageFilePath, 'utf-8')
+ const lines = readFileSync(messageFilePath, 'utf-8')
.toString()
.split('\n')
.filter((line: any) => line !== '')
@@ -157,11 +165,11 @@ export const createThread = async (thread: any) => {
const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
const threadJsonPath = join(threadDirPath, threadMetadataFileName)
- if (!fs.existsSync(threadDirPath)) {
- fs.mkdirSync(threadDirPath)
+ if (!existsSync(threadDirPath)) {
+ mkdirSync(threadDirPath)
}
- await fs.writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
+ await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
return updatedThread
} catch (err) {
return {
@@ -191,7 +199,7 @@ export const updateThread = async (threadId: string, thread: any) => {
const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
const threadJsonPath = join(threadDirPath, threadMetadataFileName)
- await fs.writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
+ await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
return updatedThread
} catch (err) {
return {
@@ -233,10 +241,10 @@ export const createMessage = async (threadId: string, message: any) => {
const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
const threadMessagePath = join(threadDirPath, threadMessagesFileName)
- if (!fs.existsSync(threadDirPath)) {
- fs.mkdirSync(threadDirPath)
+ if (!existsSync(threadDirPath)) {
+ mkdirSync(threadDirPath)
}
- fs.appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
+ appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
return threadMessage
} catch (err) {
return {
@@ -259,8 +267,8 @@ export const downloadModel = async (
}
const directoryPath = join(getJanDataFolderPath(), 'models', modelId)
- if (!fs.existsSync(directoryPath)) {
- fs.mkdirSync(directoryPath)
+ if (!existsSync(directoryPath)) {
+ mkdirSync(directoryPath)
}
// path to model binary
@@ -281,7 +289,7 @@ export const downloadModel = async (
.on('end', function () {
console.debug('end')
})
- .pipe(fs.createWriteStream(modelBinaryPath))
+ .pipe(createWriteStream(modelBinaryPath))
}
return {
diff --git a/core/src/types/file/index.ts b/core/src/types/file/index.ts
index cc7274a28..d941987ef 100644
--- a/core/src/types/file/index.ts
+++ b/core/src/types/file/index.ts
@@ -4,16 +4,43 @@ export type FileStat = {
}
export type DownloadState = {
- modelId: string
+ modelId: string // TODO: change to download id
fileName: string
time: DownloadTime
speed: number
- percent: number
+ percent: number
size: DownloadSize
- children?: DownloadState[]
- error?: string
downloadState: 'downloading' | 'error' | 'end'
+ children?: DownloadState[]
+
+ error?: string
+ extensionId?: string
+ downloadType?: DownloadType
+ localPath?: string
+}
+
+export type DownloadType = 'model' | 'extension'
+
+export type DownloadRequest = {
+ /**
+ * The URL to download the file from.
+ */
+ url: string
+
+ /**
+ * The local path to save the file to.
+ */
+ localPath: string
+
+ /**
+ * The extension ID of the extension that initiated the download.
+ *
+ * Can be extension name.
+ */
+ extensionId?: string
+
+ downloadType?: DownloadType
}
type DownloadTime = {
diff --git a/core/src/types/miscellaneous/fileDownloadRequest.ts b/core/src/types/miscellaneous/fileDownloadRequest.ts
new file mode 100644
index 000000000..83131aa71
--- /dev/null
+++ b/core/src/types/miscellaneous/fileDownloadRequest.ts
@@ -0,0 +1,8 @@
+export type FileDownloadRequest = {
+ downloadId: string
+ url: string
+ localPath: string
+ fileName: string
+ displayName: string
+ metadata: Record
+}
diff --git a/core/src/types/miscellaneous/index.ts b/core/src/types/miscellaneous/index.ts
index e9c205a73..b4ef68ab6 100644
--- a/core/src/types/miscellaneous/index.ts
+++ b/core/src/types/miscellaneous/index.ts
@@ -1,3 +1,5 @@
export * from './systemResourceInfo'
export * from './promptTemplate'
export * from './appUpdate'
+export * from './fileDownloadRequest'
+export * from './networkConfig'
\ No newline at end of file
diff --git a/core/src/types/miscellaneous/networkConfig.ts b/core/src/types/miscellaneous/networkConfig.ts
new file mode 100644
index 000000000..2d27f4223
--- /dev/null
+++ b/core/src/types/miscellaneous/networkConfig.ts
@@ -0,0 +1,4 @@
+export type NetworkConfig = {
+ proxy?: string
+ ignoreSSL?: boolean
+}
diff --git a/core/src/types/miscellaneous/systemResourceInfo.ts b/core/src/types/miscellaneous/systemResourceInfo.ts
index 1472cda47..f7dd4a82b 100644
--- a/core/src/types/miscellaneous/systemResourceInfo.ts
+++ b/core/src/types/miscellaneous/systemResourceInfo.ts
@@ -2,3 +2,31 @@ export type SystemResourceInfo = {
numCpuPhysicalCore: number
memAvailable: number
}
+
+export type RunMode = 'cpu' | 'gpu'
+
+export type GpuSetting = {
+ notify: boolean
+ run_mode: RunMode
+ nvidia_driver: {
+ exist: boolean
+ version: string
+ }
+ cuda: {
+ exist: boolean
+ version: string
+ }
+ gpus: GpuSettingInfo[]
+ gpu_highest_vram: string
+ gpus_in_use: string[]
+ is_initial: boolean
+ // TODO: This needs to be set based on user toggle in settings
+ vulkan: boolean
+}
+
+export type GpuSettingInfo = {
+ id: string
+ vram: string
+ name: string
+ arch?: string
+}
diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts
index 11d3e0526..74568686b 100644
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@@ -19,6 +19,7 @@ export enum InferenceEngine {
nitro = 'nitro',
openai = 'openai',
triton_trtllm = 'triton_trtllm',
+ nitro_tensorrt_llm = 'nitro-tensorrt-llm',
tool_retrieval_enabled = 'tool_retrieval_enabled',
}
diff --git a/core/src/types/model/modelInterface.ts b/core/src/types/model/modelInterface.ts
index 93d5867ee..639c7c8d3 100644
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@@ -1,3 +1,4 @@
+import { GpuSetting } from '../miscellaneous'
import { Model } from './modelEntity'
/**
@@ -10,7 +11,11 @@ export interface ModelInterface {
* @param network - Optional object to specify proxy/whether to ignore SSL certificates.
* @returns A Promise that resolves when the model has been downloaded.
*/
- downloadModel(model: Model, network?: { ignoreSSL?: boolean; proxy?: string }): Promise
+ downloadModel(
+ model: Model,
+ gpuSettings?: GpuSetting,
+ network?: { ignoreSSL?: boolean; proxy?: string }
+ ): Promise
/**
* Cancels the download of a specific model.
diff --git a/core/src/types/monitoring/index.ts b/core/src/types/monitoring/index.ts
index 5828dae8b..b96c518fd 100644
--- a/core/src/types/monitoring/index.ts
+++ b/core/src/types/monitoring/index.ts
@@ -1 +1,2 @@
export * from './monitoringInterface'
+export * from './resourceInfo'
diff --git a/core/src/types/monitoring/resourceInfo.ts b/core/src/types/monitoring/resourceInfo.ts
new file mode 100644
index 000000000..b19da5462
--- /dev/null
+++ b/core/src/types/monitoring/resourceInfo.ts
@@ -0,0 +1,6 @@
+export type ResourceInfo = {
+ mem: {
+ totalMemory: number
+ usedMemory: number
+ }
+}
diff --git a/core/tsconfig.json b/core/tsconfig.json
index b112079d2..daeb7eeff 100644
--- a/core/tsconfig.json
+++ b/core/tsconfig.json
@@ -13,7 +13,7 @@
"declarationDir": "dist/types",
"outDir": "dist/lib",
"importHelpers": true,
- "types": ["@types/jest"]
+ "types": ["@types/jest"],
},
- "include": ["src"]
+ "include": ["src"],
}
diff --git a/docs/docs/guides/providers/README.mdx b/docs/docs/guides/providers/README.mdx
new file mode 100644
index 000000000..aa3bfea1f
--- /dev/null
+++ b/docs/docs/guides/providers/README.mdx
@@ -0,0 +1,8 @@
+---
+title: Inference Providers
+slug: /guides/providers
+---
+
+import DocCardList from "@theme/DocCardList";
+
+
diff --git a/docs/docs/guides/providers/image.png b/docs/docs/guides/providers/image.png
new file mode 100644
index 000000000..5f1f7104e
Binary files /dev/null and b/docs/docs/guides/providers/image.png differ
diff --git a/docs/docs/guides/providers/llama-cpp.md b/docs/docs/guides/providers/llama-cpp.md
new file mode 100644
index 000000000..d2b0daa2a
--- /dev/null
+++ b/docs/docs/guides/providers/llama-cpp.md
@@ -0,0 +1,10 @@
+---
+title: llama.cpp
+slug: /guides/providers/llama-cpp
+---
+
+## Overview
+
+[Nitro](https://github.com/janhq/nitro) is an inference server on top of [llama.cpp](https://github.com/ggerganov/llama.cpp). It provides an OpenAI-compatible API, queue, & scaling.
+
+Nitro is the default AI engine downloaded with Jan. There is no additional setup needed.
\ No newline at end of file
diff --git a/docs/docs/guides/providers/tensorrt-llm.md b/docs/docs/guides/providers/tensorrt-llm.md
new file mode 100644
index 000000000..52da83b36
--- /dev/null
+++ b/docs/docs/guides/providers/tensorrt-llm.md
@@ -0,0 +1,87 @@
+---
+title: TensorRT-LLM
+slug: /guides/providers/tensorrt-llm
+---
+
+Users with Nvidia GPUs can get **20-40% faster\* token speeds** on their laptop or desktops by using [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). The greater implication is that you are running FP16, which is also more accurate than quantized models.
+
+This guide walks you through how to install Jan's official [TensorRT-LLM Extension](https://github.com/janhq/nitro-tensorrt-llm). This extension uses [Nitro-TensorRT-LLM](https://github.com/janhq/nitro-tensorrt-llm) as the AI engine, instead of the default [Nitro-Llama-CPP](https://github.com/janhq/nitro). It includes an efficient C++ server to natively execute the [TRT-LLM C++ runtime](https://nvidia.github.io/TensorRT-LLM/gpt_runtime.html). It also comes with additional feature and performance improvements like OpenAI compatibility, tokenizer improvements, and queues.
+
+*Compared to using LlamaCPP engine.
+
+:::warning
+This feature is only available for Windows users. Linux is coming soon.
+
+Additionally, we only prebuilt a few demo models. You can always build your desired models directly on your machine. [Read here](#build-your-own-tensorrt-models).
+
+:::
+
+## Requirements
+
+- A Windows PC
+- Nvidia GPU(s): Ada or Ampere series (i.e. RTX 4000s & 3000s). More will be supported soon.
+- 3GB+ of disk space to download TRT-LLM artifacts and a Nitro binary
+- Jan v0.4.9+ or Jan v0.4.8-321+ (nightly)
+- Nvidia Driver v535+ ([installation guide](https://jan.ai/guides/common-error/not-using-gpu/#1-ensure-gpu-mode-requirements))
+- CUDA Toolkit v12.2+ ([installation guide](https://jan.ai/guides/common-error/not-using-gpu/#1-ensure-gpu-mode-requirements))
+
+## Install TensorRT-Extension
+
+1. Go to Settings > Extensions
+2. Click install next to the TensorRT-LLM Extension
+3. Check that files are correctly downloaded
+
+```sh
+ls ~\jan\extensions\@janhq\tensorrt-llm-extension\dist\bin
+# Your Extension Folder should now include `nitro.exe`, among other artifacts needed to run TRT-LLM
+```
+
+## Download a Compatible Model
+TensorRT-LLM can only run models in `TensorRT` format. These models, aka "TensorRT Engines", are prebuilt specifically for each target OS+GPU architecture.
+
+We offer a handful of precompiled models for Ampere and Ada cards that you can immediately download and play with:
+
+1. Restart the application and go to the Hub
+2. Look for models with the `TensorRT-LLM` label in the recommended models list. Click download. This step might take some time. 🙏
+
+
+
+3. Click use and start chatting!
+4. You may need to allow Nitro in your network
+
+
+
+:::warning
+If you are our nightly builds, you may have to reinstall the TensorRT-LLM extension each time you update the app. We're working on better extension lifecyles - stay tuned.
+:::
+
+## Configure Settings
+
+You can customize the default parameters for how Jan runs TensorRT-LLM.
+
+:::info
+coming soon
+:::
+
+## Troubleshooting
+
+### Incompatible Extension vs Engine versions
+
+For now, the model versions are pinned to the extension versions.
+
+### Uninstall Extension
+
+1. Quit the app
+2. Go to Settings > Extensions
+3. Delete the entire Extensions folder.
+4. Reopen the app, only the default extensions should be restored.
+
+### Install Nitro-TensorRT-LLM manually
+
+To manually build the artifacts needed to run the server and TensorRT-LLM, you can reference the source code. [Read here](https://github.com/janhq/nitro-tensorrt-llm?tab=readme-ov-file#quickstart).
+
+### Build your own TensorRT models
+
+:::info
+coming soon
+:::
diff --git a/docs/sidebars.js b/docs/sidebars.js
index 4c45cadbe..b95e4044f 100644
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -199,6 +199,19 @@ const sidebars = {
"guides/models/integrate-remote",
]
},
+ {
+ type: "category",
+ label: "Inference Providers",
+ className: "head_SubMenu",
+ link: {
+ type: 'doc',
+ id: "guides/providers/README",
+ },
+ items: [
+ "guides/providers/llama-cpp",
+ "guides/providers/tensorrt-llm",
+ ]
+ },
{
type: "category",
label: "Extensions",
diff --git a/electron/icons/512x512.png b/electron/icons/512x512.png
new file mode 100644
index 000000000..289f99ded
Binary files /dev/null and b/electron/icons/512x512.png differ
diff --git a/extensions/huggingface-extension/src/index.ts b/extensions/huggingface-extension/src/index.ts
index d8f755080..88292ce58 100644
--- a/extensions/huggingface-extension/src/index.ts
+++ b/extensions/huggingface-extension/src/index.ts
@@ -13,6 +13,7 @@ import {
events,
DownloadEvent,
log,
+ DownloadRequest,
} from '@janhq/core'
import { ggufMetadata } from 'hyllama'
@@ -148,7 +149,11 @@ export default class JanHuggingFaceExtension extends HuggingFaceExtension {
if (this.interrupted) return
if (!(await fs.existsSync(localPath))) {
- downloadFile(url, localPath, network)
+ const downloadRequest: DownloadRequest = {
+ url,
+ localPath,
+ }
+ downloadFile(downloadRequest, network)
filePaths.push(filePath)
}
}
diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat
index 2ef3165c1..bb8c4ffdc 100644
--- a/extensions/inference-nitro-extension/download.bat
+++ b/extensions/inference-nitro-extension/download.bat
@@ -1,3 +1,3 @@
@echo off
set /p NITRO_VERSION=<./bin/version.txt
-.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan && .\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin
+.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json
index e6365ad92..dd5798764 100644
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@@ -8,7 +8,7 @@
"license": "AGPL-3.0",
"scripts": {
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
- "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro && download https://delta.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
+ "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
"downloadnitro:win32": "download.bat",
"downloadnitro": "run-script-os",
diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index 979b4cfac..70244a5d9 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -108,9 +108,6 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
events.on(InferenceEvent.OnInferenceStopped, () =>
this.onInferenceStopped()
)
-
- // Attempt to fetch nvidia info
- await executeOnMain(NODE, 'updateNvidiaInfo', {})
}
/**
diff --git a/extensions/inference-nitro-extension/src/node/accelerator.ts b/extensions/inference-nitro-extension/src/node/accelerator.ts
deleted file mode 100644
index 1ffdbc5bd..000000000
--- a/extensions/inference-nitro-extension/src/node/accelerator.ts
+++ /dev/null
@@ -1,237 +0,0 @@
-import { writeFileSync, existsSync, readFileSync } from 'fs'
-import { exec, spawn } from 'child_process'
-import path from 'path'
-import { getJanDataFolderPath, log } from '@janhq/core/node'
-
-/**
- * Default GPU settings
- * TODO: This needs to be refactored to support multiple accelerators
- **/
-const DEFALT_SETTINGS = {
- notify: true,
- run_mode: 'cpu',
- nvidia_driver: {
- exist: false,
- version: '',
- },
- cuda: {
- exist: false,
- version: '',
- },
- gpus: [],
- gpu_highest_vram: '',
- gpus_in_use: [],
- is_initial: true,
- // TODO: This needs to be set based on user toggle in settings
- vulkan: false
-}
-
-/**
- * Path to the settings file
- **/
-export const GPU_INFO_FILE = path.join(
- getJanDataFolderPath(),
- 'settings',
- 'settings.json'
-)
-
-/**
- * Current nitro process
- */
-let nitroProcessInfo: NitroProcessInfo | undefined = undefined
-
-/**
- * Nitro process info
- */
-export interface NitroProcessInfo {
- isRunning: boolean
-}
-
-/**
- * This will retrive GPU informations and persist settings.json
- * Will be called when the extension is loaded to turn on GPU acceleration if supported
- */
-export async function updateNvidiaInfo() {
- if (process.platform !== 'darwin') {
- let data
- try {
- data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
- } catch (error) {
- data = DEFALT_SETTINGS
- writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
- }
- updateNvidiaDriverInfo()
- updateGpuInfo()
- }
-}
-
-/**
- * Retrieve current nitro process
- */
-export const getNitroProcessInfo = (subprocess: any): NitroProcessInfo => {
- nitroProcessInfo = {
- isRunning: subprocess != null,
- }
- return nitroProcessInfo
-}
-
-/**
- * Validate nvidia and cuda for linux and windows
- */
-export async function updateNvidiaDriverInfo(): Promise {
- exec(
- 'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
- (error, stdout) => {
- let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
-
- if (!error) {
- const firstLine = stdout.split('\n')[0].trim()
- data['nvidia_driver'].exist = true
- data['nvidia_driver'].version = firstLine
- } else {
- data['nvidia_driver'].exist = false
- }
-
- writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
- Promise.resolve()
- }
- )
-}
-
-/**
- * Check if file exists in paths
- */
-export function checkFileExistenceInPaths(
- file: string,
- paths: string[]
-): boolean {
- return paths.some((p) => existsSync(path.join(p, file)))
-}
-
-/**
- * Validate cuda for linux and windows
- */
-export function updateCudaExistence(
- data: Record = DEFALT_SETTINGS
-): Record {
- let filesCuda12: string[]
- let filesCuda11: string[]
- let paths: string[]
- let cudaVersion: string = ''
-
- if (process.platform === 'win32') {
- filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
- filesCuda11 = ['cublas64_11.dll', 'cudart64_11.dll', 'cublasLt64_11.dll']
- paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
- } else {
- filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
- filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
- paths = process.env.LD_LIBRARY_PATH
- ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
- : []
- paths.push('/usr/lib/x86_64-linux-gnu/')
- }
-
- let cudaExists = filesCuda12.every(
- (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
- )
-
- if (!cudaExists) {
- cudaExists = filesCuda11.every(
- (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
- )
- if (cudaExists) {
- cudaVersion = '11'
- }
- } else {
- cudaVersion = '12'
- }
-
- data['cuda'].exist = cudaExists
- data['cuda'].version = cudaVersion
- console.debug(data['is_initial'], data['gpus_in_use'])
- if (cudaExists && data['is_initial'] && data['gpus_in_use'].length > 0) {
- data.run_mode = 'gpu'
- }
- data.is_initial = false
- return data
-}
-
-/**
- * Get GPU information
- */
-export async function updateGpuInfo(): Promise {
- let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
-
- // Cuda
- if (data['vulkan'] === true) {
- // Vulkan
- exec(
- process.platform === 'win32'
- ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
- : `${__dirname}/../bin/vulkaninfo --summary`,
- (error, stdout) => {
- if (!error) {
- const output = stdout.toString()
- log(output)
- const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
-
- let gpus = []
- let match
- while ((match = gpuRegex.exec(output)) !== null) {
- const id = match[1]
- const name = match[2]
- gpus.push({ id, vram: 0, name })
- }
- data.gpus = gpus
-
- if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
- data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
- }
-
- data = updateCudaExistence(data)
- writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
- }
- Promise.resolve()
- }
- )
- } else {
- exec(
- 'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
- (error, stdout) => {
- if (!error) {
- log(stdout)
- // Get GPU info and gpu has higher memory first
- let highestVram = 0
- let highestVramId = '0'
- let gpus = stdout
- .trim()
- .split('\n')
- .map((line) => {
- let [id, vram, name] = line.split(', ')
- vram = vram.replace(/\r/g, '')
- if (parseFloat(vram) > highestVram) {
- highestVram = parseFloat(vram)
- highestVramId = id
- }
- return { id, vram, name }
- })
-
- data.gpus = gpus
- data.gpu_highest_vram = highestVramId
- } else {
- data.gpus = []
- data.gpu_highest_vram = ''
- }
-
- if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
- data.gpus_in_use = [data['gpu_highest_vram']]
- }
-
- data = updateCudaExistence(data)
- writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
- Promise.resolve()
- }
- )
- }
-}
diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-nitro-extension/src/node/execute.ts
index f9a668507..8bcc75ae4 100644
--- a/extensions/inference-nitro-extension/src/node/execute.ts
+++ b/extensions/inference-nitro-extension/src/node/execute.ts
@@ -1,12 +1,19 @@
+import { getJanDataFolderPath } from '@janhq/core/node'
import { readFileSync } from 'fs'
import * as path from 'path'
-import { GPU_INFO_FILE } from './accelerator'
export interface NitroExecutableOptions {
executablePath: string
cudaVisibleDevices: string
vkVisibleDevices: string
}
+
+export const GPU_INFO_FILE = path.join(
+ getJanDataFolderPath(),
+ 'settings',
+ 'settings.json'
+)
+
/**
* Find which executable file to run based on the current platform.
* @returns The name of the executable file to run.
diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts
index 9b2684a6c..c57eb262d 100644
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@@ -4,7 +4,6 @@ import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
import tcpPortUsed from 'tcp-port-used'
import fetchRT from 'fetch-retry'
import { log, getSystemResourceInfo } from '@janhq/core/node'
-import { getNitroProcessInfo, updateNvidiaInfo } from './accelerator'
import {
Model,
InferenceEngine,
@@ -385,11 +384,26 @@ function dispose() {
killSubprocess()
}
+/**
+ * Nitro process info
+ */
+export interface NitroProcessInfo {
+ isRunning: boolean
+}
+
+/**
+ * Retrieve current nitro process
+ */
+const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
+ return {
+ isRunning: subprocess != null,
+ }
+}
+
export default {
runModel,
stopModel,
killSubprocess,
dispose,
- updateNvidiaInfo,
- getCurrentNitroProcessInfo: () => getNitroProcessInfo(subprocess),
+ getCurrentNitroProcessInfo,
}
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index fb1f26885..d05e7d07f 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -17,6 +17,8 @@ import {
ImportingModel,
LocalImportModelEvent,
baseName,
+ GpuSetting,
+ DownloadRequest,
} from '@janhq/core'
import { extractFileName } from './helpers/path'
@@ -29,10 +31,14 @@ export default class JanModelExtension extends ModelExtension {
private static readonly _modelMetadataFileName = 'model.json'
private static readonly _supportedModelFormat = '.gguf'
private static readonly _incompletedModelFileName = '.download'
- private static readonly _offlineInferenceEngine = InferenceEngine.nitro
-
+ private static readonly _offlineInferenceEngine = [
+ InferenceEngine.nitro,
+ InferenceEngine.nitro_tensorrt_llm,
+ ]
+ private static readonly _tensorRtEngineFormat = '.engine'
private static readonly _configDirName = 'config'
private static readonly _defaultModelFileName = 'default-model.json'
+ private static readonly _supportedGpuArch = ['turing', 'ampere', 'ada']
/**
* Called when the extension is loaded.
@@ -89,12 +95,52 @@ export default class JanModelExtension extends ModelExtension {
*/
async downloadModel(
model: Model,
+ gpuSettings?: GpuSetting,
network?: { ignoreSSL?: boolean; proxy?: string }
): Promise {
// create corresponding directory
const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id])
if (!(await fs.existsSync(modelDirPath))) await fs.mkdirSync(modelDirPath)
+ if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
+ if (!gpuSettings || gpuSettings.gpus.length === 0) {
+ console.error('No GPU found. Please check your GPU setting.')
+ return
+ }
+ const firstGpu = gpuSettings.gpus[0]
+ if (!firstGpu.name.toLowerCase().includes('nvidia')) {
+ console.error('No Nvidia GPU found. Please check your GPU setting.')
+ return
+ }
+ const gpuArch = firstGpu.arch
+ if (gpuArch === undefined) {
+ console.error(
+ 'No GPU architecture found. Please check your GPU setting.'
+ )
+ return
+ }
+
+ if (!JanModelExtension._supportedGpuArch.includes(gpuArch)) {
+ console.error(
+ `Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
+ )
+ return
+ }
+
+ const os = 'windows' // TODO: remove this hard coded value
+
+ const newSources = model.sources.map((source) => {
+ const newSource = { ...source }
+ newSource.url = newSource.url
+ .replace(//g, os)
+ .replace(//g, gpuArch)
+ return newSource
+ })
+ model.sources = newSources
+ }
+
+ console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
+
if (model.sources.length > 1) {
// path to model binaries
for (const source of model.sources) {
@@ -105,8 +151,11 @@ export default class JanModelExtension extends ModelExtension {
if (source.filename) {
path = await joinPath([modelDirPath, source.filename])
}
-
- downloadFile(source.url, path, network)
+ const downloadRequest: DownloadRequest = {
+ url: source.url,
+ localPath: path,
+ }
+ downloadFile(downloadRequest, network)
}
// TODO: handle multiple binaries for web later
} else {
@@ -115,7 +164,11 @@ export default class JanModelExtension extends ModelExtension {
JanModelExtension._supportedModelFormat
)
const path = await joinPath([modelDirPath, fileName])
- downloadFile(model.sources[0]?.url, path, network)
+ const downloadRequest: DownloadRequest = {
+ url: model.sources[0]?.url,
+ localPath: path,
+ }
+ downloadFile(downloadRequest, network)
if (window && window.core?.api && window.core.api.baseApiUrl) {
this.startPollingDownloadProgress(model.id)
@@ -238,7 +291,7 @@ export default class JanModelExtension extends ModelExtension {
async getDownloadedModels(): Promise {
return await this.getModelsMetadata(
async (modelDir: string, model: Model) => {
- if (model.engine !== JanModelExtension._offlineInferenceEngine)
+ if (!JanModelExtension._offlineInferenceEngine.includes(model.engine))
return true
// model binaries (sources) are absolute path & exist
@@ -247,22 +300,32 @@ export default class JanModelExtension extends ModelExtension {
)
if (existFiles.every((exist) => exist)) return true
- return await fs
+ const result = await fs
.readdirSync(await joinPath([JanModelExtension._homeDir, modelDir]))
.then((files: string[]) => {
// Model binary exists in the directory
// Model binary name can match model ID or be a .gguf file and not be an incompleted model file
return (
files.includes(modelDir) ||
- files.filter(
- (file) =>
+ files.filter((file) => {
+ if (
+ file.endsWith(JanModelExtension._incompletedModelFileName)
+ ) {
+ return false
+ }
+ return (
file
.toLowerCase()
- .includes(JanModelExtension._supportedModelFormat) &&
- !file.endsWith(JanModelExtension._incompletedModelFileName)
- )?.length >= model.sources.length
+ .includes(JanModelExtension._supportedModelFormat) ||
+ file
+ .toLowerCase()
+ .includes(JanModelExtension._tensorRtEngineFormat)
+ )
+ })?.length > 0 // TODO: NamH find better way (can use basename to check the file name with source url)
)
})
+
+ return result
}
)
}
diff --git a/extensions/monitoring-extension/bin/.gitkeep b/extensions/monitoring-extension/bin/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/extensions/monitoring-extension/download.bat b/extensions/monitoring-extension/download.bat
new file mode 100644
index 000000000..f1cf8b7ea
--- /dev/null
+++ b/extensions/monitoring-extension/download.bat
@@ -0,0 +1,2 @@
+@echo off
+.\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin
\ No newline at end of file
diff --git a/extensions/monitoring-extension/package.json b/extensions/monitoring-extension/package.json
index 582f7cd7b..73d28ab37 100644
--- a/extensions/monitoring-extension/package.json
+++ b/extensions/monitoring-extension/package.json
@@ -3,21 +3,40 @@
"version": "1.0.10",
"description": "This extension provides system health and OS level data",
"main": "dist/index.js",
- "module": "dist/module.js",
+ "node": "dist/node/index.cjs.js",
"author": "Jan ",
"license": "AGPL-3.0",
"scripts": {
- "build": "tsc -b . && webpack --config webpack.config.js",
+ "build": "tsc --module commonjs && rollup -c rollup.config.ts && npm run download-artifacts",
+ "download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
+ "download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
+ "download-artifacts:win32": "download.bat",
+ "download-artifacts:linux": "download https://delta.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
"build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../pre-install"
},
+ "exports": {
+ ".": "./dist/index.js",
+ "./main": "./dist/node/index.cjs.js"
+ },
"devDependencies": {
+ "@rollup/plugin-commonjs": "^25.0.7",
+ "@rollup/plugin-json": "^6.1.0",
+ "@rollup/plugin-node-resolve": "^15.2.3",
+ "@types/node": "^20.11.4",
+ "@types/node-os-utils": "^1.3.4",
+ "run-script-os": "^1.1.6",
+ "cpx": "^1.5.0",
"rimraf": "^3.0.2",
- "webpack": "^5.88.2",
- "webpack-cli": "^5.1.4",
- "ts-loader": "^9.5.0"
+ "rollup": "^2.38.5",
+ "rollup-plugin-define": "^1.0.1",
+ "rollup-plugin-sourcemaps": "^0.6.3",
+ "rollup-plugin-typescript2": "^0.36.0",
+ "typescript": "^5.3.3",
+ "download-cli": "^1.1.1"
},
"dependencies": {
"@janhq/core": "file:../../core",
+ "@rollup/plugin-replace": "^5.0.5",
"node-os-utils": "^1.3.7"
},
"files": [
diff --git a/extensions/monitoring-extension/rollup.config.ts b/extensions/monitoring-extension/rollup.config.ts
new file mode 100644
index 000000000..1b7a40bad
--- /dev/null
+++ b/extensions/monitoring-extension/rollup.config.ts
@@ -0,0 +1,68 @@
+import resolve from '@rollup/plugin-node-resolve'
+import commonjs from '@rollup/plugin-commonjs'
+import sourceMaps from 'rollup-plugin-sourcemaps'
+import typescript from 'rollup-plugin-typescript2'
+import json from '@rollup/plugin-json'
+import replace from '@rollup/plugin-replace'
+const packageJson = require('./package.json')
+
+export default [
+ {
+ input: `src/index.ts`,
+ output: [{ file: packageJson.main, format: 'es', sourcemap: true }],
+ // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
+ external: [],
+ watch: {
+ include: 'src/**',
+ },
+ plugins: [
+ replace({
+ NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
+ }),
+ // Allow json resolution
+ json(),
+ // Compile TypeScript files
+ typescript({ useTsconfigDeclarationDir: true }),
+ // Compile TypeScript files
+ // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
+ commonjs(),
+ // Allow node_modules resolution, so you can use 'external' to control
+ // which external modules to include in the bundle
+ // https://github.com/rollup/rollup-plugin-node-resolve#usage
+ resolve({
+ extensions: ['.js', '.ts', '.svelte'],
+ }),
+
+ // Resolve source maps to the original source
+ sourceMaps(),
+ ],
+ },
+ {
+ input: `src/node/index.ts`,
+ output: [
+ { file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
+ ],
+ // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
+ external: ['@janhq/core/node'],
+ watch: {
+ include: 'src/node/**',
+ },
+ plugins: [
+ // Allow json resolution
+ json(),
+ // Compile TypeScript files
+ typescript({ useTsconfigDeclarationDir: true }),
+ // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
+ commonjs(),
+ // Allow node_modules resolution, so you can use 'external' to control
+ // which external modules to include in the bundle
+ // https://github.com/rollup/rollup-plugin-node-resolve#usage
+ resolve({
+ extensions: ['.ts', '.js', '.json'],
+ }),
+
+ // Resolve source maps to the original source
+ sourceMaps(),
+ ],
+ },
+]
diff --git a/extensions/monitoring-extension/src/@types/global.d.ts b/extensions/monitoring-extension/src/@types/global.d.ts
index 8106353cf..dfa96a0b1 100644
--- a/extensions/monitoring-extension/src/@types/global.d.ts
+++ b/extensions/monitoring-extension/src/@types/global.d.ts
@@ -1 +1,18 @@
-declare const MODULE: string
+declare const NODE: string
+
+type CpuGpuInfo = {
+ cpu: {
+ usage: number
+ }
+ gpu: GpuInfo[]
+}
+
+type GpuInfo = {
+ id: string
+ name: string
+ temperature: string
+ utilization: string
+ memoryTotal: string
+ memoryFree: string
+ memoryUtilization: string
+}
diff --git a/extensions/monitoring-extension/src/index.ts b/extensions/monitoring-extension/src/index.ts
index ce9b2fc14..c7f53455d 100644
--- a/extensions/monitoring-extension/src/index.ts
+++ b/extensions/monitoring-extension/src/index.ts
@@ -1,4 +1,4 @@
-import { MonitoringExtension, executeOnMain } from '@janhq/core'
+import { GpuSetting, MonitoringExtension, executeOnMain } from '@janhq/core'
/**
* JanMonitoringExtension is a extension that provides system monitoring functionality.
@@ -8,19 +8,30 @@ export default class JanMonitoringExtension extends MonitoringExtension {
/**
* Called when the extension is loaded.
*/
- async onLoad() {}
+ async onLoad() {
+ // Attempt to fetch nvidia info
+ await executeOnMain(NODE, 'updateNvidiaInfo')
+ }
/**
* Called when the extension is unloaded.
*/
onUnload(): void {}
+ /**
+ * Returns the GPU configuration.
+ * @returns A Promise that resolves to an object containing the GPU configuration.
+ */
+ async getGpuSetting(): Promise {
+ return executeOnMain(NODE, 'getGpuConfig')
+ }
+
/**
* Returns information about the system resources.
* @returns A Promise that resolves to an object containing information about the system resources.
*/
getResourcesInfo(): Promise {
- return executeOnMain(MODULE, 'getResourcesInfo')
+ return executeOnMain(NODE, 'getResourcesInfo')
}
/**
@@ -28,6 +39,6 @@ export default class JanMonitoringExtension extends MonitoringExtension {
* @returns A Promise that resolves to an object containing information about the current system load.
*/
getCurrentLoad(): Promise {
- return executeOnMain(MODULE, 'getCurrentLoad')
+ return executeOnMain(NODE, 'getCurrentLoad')
}
}
diff --git a/extensions/monitoring-extension/src/module.ts b/extensions/monitoring-extension/src/module.ts
deleted file mode 100644
index 27781a5d6..000000000
--- a/extensions/monitoring-extension/src/module.ts
+++ /dev/null
@@ -1,92 +0,0 @@
-const nodeOsUtils = require('node-os-utils')
-const getJanDataFolderPath = require('@janhq/core/node').getJanDataFolderPath
-const path = require('path')
-const { readFileSync } = require('fs')
-const exec = require('child_process').exec
-
-const NVIDIA_INFO_FILE = path.join(
- getJanDataFolderPath(),
- 'settings',
- 'settings.json'
-)
-
-const getResourcesInfo = () =>
- new Promise((resolve) => {
- nodeOsUtils.mem.used().then((ramUsedInfo) => {
- const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
- const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
- const response = {
- mem: {
- totalMemory,
- usedMemory,
- },
- }
- resolve(response)
- })
- })
-
-const getCurrentLoad = () =>
- new Promise((resolve, reject) => {
- nodeOsUtils.cpu.usage().then((cpuPercentage) => {
- let data = {
- run_mode: 'cpu',
- gpus_in_use: [],
- }
- if (process.platform !== 'darwin') {
- data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8'))
- }
- if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
- const gpuIds = data['gpus_in_use'].join(',')
- if (gpuIds !== '' && data['vulkan'] !== true) {
- exec(
- `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
- (error, stdout, _) => {
- if (error) {
- console.error(`exec error: ${error}`)
- reject(error)
- return
- }
- const gpuInfo = stdout
- .trim()
- .split('\n')
- .map((line) => {
- const [
- id,
- name,
- temperature,
- utilization,
- memoryTotal,
- memoryFree,
- memoryUtilization,
- ] = line.split(', ').map((item) => item.replace(/\r/g, ''))
- return {
- id,
- name,
- temperature,
- utilization,
- memoryTotal,
- memoryFree,
- memoryUtilization,
- }
- })
- resolve({
- cpu: { usage: cpuPercentage },
- gpu: gpuInfo,
- })
- }
- )
- } else {
- // Handle the case where gpuIds is empty
- resolve({ cpu: { usage: cpuPercentage }, gpu: [] })
- }
- } else {
- // Handle the case where run_mode is not 'gpu' or no GPUs are in use
- resolve({ cpu: { usage: cpuPercentage }, gpu: [] })
- }
- })
- })
-
-module.exports = {
- getResourcesInfo,
- getCurrentLoad,
-}
diff --git a/extensions/monitoring-extension/src/node/index.ts b/extensions/monitoring-extension/src/node/index.ts
new file mode 100644
index 000000000..25f151112
--- /dev/null
+++ b/extensions/monitoring-extension/src/node/index.ts
@@ -0,0 +1,322 @@
+import { GpuSetting, GpuSettingInfo, ResourceInfo } from '@janhq/core'
+import { getJanDataFolderPath, log } from '@janhq/core/node'
+import { mem, cpu } from 'node-os-utils'
+import { exec } from 'child_process'
+import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
+import path from 'path'
+
+/**
+ * Path to the settings directory
+ **/
+export const SETTINGS_DIR = path.join(getJanDataFolderPath(), 'settings')
+/**
+ * Path to the settings file
+ **/
+export const GPU_INFO_FILE = path.join(SETTINGS_DIR, 'settings.json')
+
+/**
+ * Default GPU settings
+ * TODO: This needs to be refactored to support multiple accelerators
+ **/
+const DEFAULT_SETTINGS: GpuSetting = {
+ notify: true,
+ run_mode: 'cpu',
+ nvidia_driver: {
+ exist: false,
+ version: '',
+ },
+ cuda: {
+ exist: false,
+ version: '',
+ },
+ gpus: [],
+ gpu_highest_vram: '',
+ gpus_in_use: [],
+ is_initial: true,
+ // TODO: This needs to be set based on user toggle in settings
+ vulkan: false,
+}
+
+export const getGpuConfig = async (): Promise => {
+ if (process.platform === 'darwin') return undefined
+ return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
+}
+
+export const getResourcesInfo = async (): Promise => {
+ const ramUsedInfo = await mem.used()
+ const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
+ const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
+
+ const resourceInfo: ResourceInfo = {
+ mem: {
+ totalMemory,
+ usedMemory,
+ },
+ }
+
+ return resourceInfo
+}
+
+export const getCurrentLoad = () =>
+ new Promise(async (resolve, reject) => {
+ const cpuPercentage = await cpu.usage()
+ let data = {
+ run_mode: 'cpu',
+ gpus_in_use: [],
+ }
+
+ if (process.platform !== 'darwin') {
+ data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
+ }
+
+ if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
+ const gpuIds = data.gpus_in_use.join(',')
+ if (gpuIds !== '' && data['vulkan'] !== true) {
+ exec(
+ `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
+ (error, stdout, _) => {
+ if (error) {
+ console.error(`exec error: ${error}`)
+ throw new Error(error.message)
+ }
+ const gpuInfo: GpuInfo[] = stdout
+ .trim()
+ .split('\n')
+ .map((line) => {
+ const [
+ id,
+ name,
+ temperature,
+ utilization,
+ memoryTotal,
+ memoryFree,
+ memoryUtilization,
+ ] = line.split(', ').map((item) => item.replace(/\r/g, ''))
+ return {
+ id,
+ name,
+ temperature,
+ utilization,
+ memoryTotal,
+ memoryFree,
+ memoryUtilization,
+ }
+ })
+
+ resolve({
+ cpu: { usage: cpuPercentage },
+ gpu: gpuInfo,
+ })
+ }
+ )
+ } else {
+ // Handle the case where gpuIds is empty
+ resolve({
+ cpu: { usage: cpuPercentage },
+ gpu: [],
+ })
+ }
+ } else {
+ // Handle the case where run_mode is not 'gpu' or no GPUs are in use
+ resolve({
+ cpu: { usage: cpuPercentage },
+ gpu: [],
+ })
+ }
+ })
+
+/**
+ * This will retrive GPU informations and persist settings.json
+ * Will be called when the extension is loaded to turn on GPU acceleration if supported
+ */
+export const updateNvidiaInfo = async () => {
+ // ignore if macos
+ if (process.platform === 'darwin') return
+
+ try {
+ JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
+ } catch (error) {
+ if (!existsSync(SETTINGS_DIR)) {
+ mkdirSync(SETTINGS_DIR, {
+ recursive: true,
+ })
+ }
+ writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
+ }
+
+ await updateNvidiaDriverInfo()
+ await updateGpuInfo()
+}
+
+const updateNvidiaDriverInfo = async () =>
+ new Promise((resolve, reject) => {
+ exec(
+ 'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
+ (error, stdout) => {
+ const data: GpuSetting = JSON.parse(
+ readFileSync(GPU_INFO_FILE, 'utf-8')
+ )
+
+ if (!error) {
+ const firstLine = stdout.split('\n')[0].trim()
+ data.nvidia_driver.exist = true
+ data.nvidia_driver.version = firstLine
+ } else {
+ data.nvidia_driver.exist = false
+ }
+
+ writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
+ resolve({})
+ }
+ )
+ })
+
+const getGpuArch = (gpuName: string): string => {
+ if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
+
+ if (gpuName.includes('20')) return 'turing'
+ else if (gpuName.includes('30')) return 'ampere'
+ else if (gpuName.includes('40')) return 'ada'
+ else return 'unknown'
+}
+
+const updateGpuInfo = async () =>
+ new Promise((resolve, reject) => {
+ let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
+
+ // Cuda
+ if (data.vulkan === true) {
+ // Vulkan
+ exec(
+ process.platform === 'win32'
+ ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
+ : `${__dirname}/../bin/vulkaninfo --summary`,
+ (error, stdout) => {
+ if (!error) {
+ const output = stdout.toString()
+
+ log(output)
+ const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
+
+ const gpus: GpuSettingInfo[] = []
+ let match
+ while ((match = gpuRegex.exec(output)) !== null) {
+ const id = match[1]
+ const name = match[2]
+ const arch = getGpuArch(name)
+ gpus.push({ id, vram: '0', name, arch })
+ }
+ data.gpus = gpus
+
+ if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
+ data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
+ }
+
+ data = updateCudaExistence(data)
+ writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
+ resolve({})
+ } else {
+ reject(error)
+ }
+ }
+ )
+ } else {
+ exec(
+ 'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
+ (error, stdout) => {
+ if (!error) {
+ log(stdout)
+ // Get GPU info and gpu has higher memory first
+ let highestVram = 0
+ let highestVramId = '0'
+ const gpus: GpuSettingInfo[] = stdout
+ .trim()
+ .split('\n')
+ .map((line) => {
+ let [id, vram, name] = line.split(', ')
+ const arch = getGpuArch(name)
+ vram = vram.replace(/\r/g, '')
+ if (parseFloat(vram) > highestVram) {
+ highestVram = parseFloat(vram)
+ highestVramId = id
+ }
+ return { id, vram, name, arch }
+ })
+
+ data.gpus = gpus
+ data.gpu_highest_vram = highestVramId
+ } else {
+ data.gpus = []
+ data.gpu_highest_vram = ''
+ }
+
+ if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
+ data.gpus_in_use = [data.gpu_highest_vram]
+ }
+
+ data = updateCudaExistence(data)
+ writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
+ resolve({})
+ }
+ )
+ }
+ })
+
+/**
+ * Check if file exists in paths
+ */
+const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
+ return paths.some((p) => existsSync(path.join(p, file)))
+}
+
+/**
+ * Validate cuda for linux and windows
+ */
+const updateCudaExistence = (
+ data: GpuSetting = DEFAULT_SETTINGS
+): GpuSetting => {
+ let filesCuda12: string[]
+ let filesCuda11: string[]
+ let paths: string[]
+ let cudaVersion: string = ''
+
+ if (process.platform === 'win32') {
+ filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
+ filesCuda11 = ['cublas64_11.dll', 'cudart64_11.dll', 'cublasLt64_11.dll']
+ paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
+ } else {
+ filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
+ filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
+ paths = process.env.LD_LIBRARY_PATH
+ ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
+ : []
+ paths.push('/usr/lib/x86_64-linux-gnu/')
+ }
+
+ let cudaExists = filesCuda12.every(
+ (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
+ )
+
+ if (!cudaExists) {
+ cudaExists = filesCuda11.every(
+ (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
+ )
+ if (cudaExists) {
+ cudaVersion = '11'
+ }
+ } else {
+ cudaVersion = '12'
+ }
+
+ data.cuda.exist = cudaExists
+ data.cuda.version = cudaVersion
+
+ console.debug(data.is_initial, data.gpus_in_use)
+
+ if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
+ data.run_mode = 'gpu'
+ }
+
+ data.is_initial = false
+ return data
+}
diff --git a/extensions/monitoring-extension/webpack.config.js b/extensions/monitoring-extension/webpack.config.js
deleted file mode 100644
index c8c3a34f7..000000000
--- a/extensions/monitoring-extension/webpack.config.js
+++ /dev/null
@@ -1,35 +0,0 @@
-const path = require('path')
-const webpack = require('webpack')
-const packageJson = require('./package.json')
-
-module.exports = {
- experiments: { outputModule: true },
- entry: './src/index.ts', // Adjust the entry point to match your project's main file
- mode: 'production',
- module: {
- rules: [
- {
- test: /\.tsx?$/,
- use: 'ts-loader',
- exclude: /node_modules/,
- },
- ],
- },
- output: {
- filename: 'index.js', // Adjust the output file name as needed
- path: path.resolve(__dirname, 'dist'),
- library: { type: 'module' }, // Specify ESM output format
- },
- plugins: [
- new webpack.DefinePlugin({
- MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
- }),
- ],
- resolve: {
- extensions: ['.ts', '.js'],
- },
- optimization: {
- minimize: false,
- },
- // Add loaders and other configuration as needed for your project
-}
diff --git a/extensions/tensorrt-llm-extension/README.md b/extensions/tensorrt-llm-extension/README.md
new file mode 100644
index 000000000..34a670516
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/README.md
@@ -0,0 +1,79 @@
+# Tensorrt-LLM Extension
+
+Created using Jan extension example
+
+# Create a Jan Extension using Typescript
+
+Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
+
+## Create Your Own Extension
+
+To create your own extension, you can use this repository as a template! Just follow the below instructions:
+
+1. Click the Use this template button at the top of the repository
+2. Select Create a new repository
+3. Select an owner and name for your new repository
+4. Click Create repository
+5. Clone your new repository
+
+## Initial Setup
+
+After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
+
+> [!NOTE]
+>
+> You'll need to have a reasonably modern version of
+> [Node.js](https://nodejs.org) handy. If you are using a version manager like
+> [`nodenv`](https://github.com/nodenv/nodenv) or
+> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
+> root of your repository to install the version specified in
+> [`package.json`](./package.json). Otherwise, 20.x or later should work!
+
+1. :hammer_and_wrench: Install the dependencies
+
+ ```bash
+ npm install
+ ```
+
+1. :building_construction: Package the TypeScript for distribution
+
+ ```bash
+ npm run bundle
+ ```
+
+1. :white_check_mark: Check your artifact
+
+ There will be a tgz file in your extension directory now
+
+## Update the Extension Metadata
+
+The [`package.json`](package.json) file defines metadata about your extension, such as
+extension name, main entry, description and version.
+
+When you copy this repository, update `package.json` with the name, description for your extension.
+
+## Update the Extension Code
+
+The [`src/`](./src/) directory is the heart of your extension! This contains the
+source code that will be run when your extension functions are invoked. You can replace the
+contents of this directory with your own code.
+
+There are a few things to keep in mind when writing your extension code:
+
+- Most Jan Extension functions are processed asynchronously.
+ In `index.ts`, you will see that the extension function will return a `Promise`.
+
+ ```typescript
+ import { events, MessageEvent, MessageRequest } from '@janhq/core'
+
+ function onStart(): Promise {
+ return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
+ this.inference(data)
+ )
+ }
+ ```
+
+ For more information about the Jan Extension Core module, see the
+ [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
+
+So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/tensorrt-llm-extension/models.json b/extensions/tensorrt-llm-extension/models.json
new file mode 100644
index 000000000..30f345f47
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/models.json
@@ -0,0 +1,96 @@
+[
+ {
+ "sources": [
+ {
+ "filename": "config.json",
+ "url": "https://delta.jan.ai/dist/models///LlamaCorn-1.1B-Chat-fp16/config.json"
+ },
+ {
+ "filename": "rank0.engine",
+ "url": "https://delta.jan.ai/dist/models///LlamaCorn-1.1B-Chat-fp16/rank0.engine"
+ },
+ {
+ "filename": "tokenizer.model",
+ "url": "https://delta.jan.ai/dist/models///LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
+ },
+ {
+ "filename": "special_tokens_map.json",
+ "url": "https://delta.jan.ai/dist/models///LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
+ },
+ {
+ "filename": "tokenizer.json",
+ "url": "https://delta.jan.ai/dist/models///LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
+ },
+ {
+ "filename": "tokenizer_config.json",
+ "url": "https://delta.jan.ai/dist/models///LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
+ }
+ ],
+ "id": "llamacorn-1.1b-chat-fp16",
+ "object": "model",
+ "name": "LlamaCorn 1.1B Chat FP16",
+ "version": "1.0",
+ "description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM",
+ "format": "TensorRT-LLM",
+ "settings": {
+ "ctx_len": 2048,
+ "text_model": false
+ },
+ "parameters": {
+ "max_tokens": 4096
+ },
+ "metadata": {
+ "author": "LLama",
+ "tags": ["TensorRT-LLM", "1B", "Finetuned"],
+ "size": 2151000000
+ },
+ "engine": "nitro-tensorrt-llm"
+ },
+ {
+ "sources": [
+ {
+ "filename": "config.json",
+ "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/config.json"
+ },
+ {
+ "filename": "rank0.engine",
+ "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/rank0.engine"
+ },
+ {
+ "filename": "tokenizer.model",
+ "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
+ },
+ {
+ "filename": "special_tokens_map.json",
+ "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
+ },
+ {
+ "filename": "tokenizer.json",
+ "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
+ },
+ {
+ "filename": "tokenizer_config.json",
+ "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
+ }
+ ],
+ "id": "tinyjensen-1.1b-chat-fp16",
+ "object": "model",
+ "name": "TinyJensen 1.1B Chat FP16",
+ "version": "1.0",
+ "description": "Do you want to chat with Jensen Huan? Here you are",
+ "format": "TensorRT-LLM",
+ "settings": {
+ "ctx_len": 2048,
+ "text_model": false
+ },
+ "parameters": {
+ "max_tokens": 4096
+ },
+ "metadata": {
+ "author": "LLama",
+ "tags": ["TensorRT-LLM", "1B", "Finetuned"],
+ "size": 2151000000
+ },
+ "engine": "nitro-tensorrt-llm"
+ }
+]
diff --git a/extensions/tensorrt-llm-extension/package.json b/extensions/tensorrt-llm-extension/package.json
new file mode 100644
index 000000000..96ede4a56
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/package.json
@@ -0,0 +1,75 @@
+{
+ "name": "@janhq/tensorrt-llm-extension",
+ "version": "0.0.3",
+ "description": "Enables accelerated inference leveraging Nvidia's TensorRT-LLM for optimal GPU hardware optimizations. Compatible with models in TensorRT-LLM format. Requires Nvidia GPU driver and CUDA Toolkit installation.",
+ "main": "dist/index.js",
+ "node": "dist/node/index.cjs.js",
+ "author": "Jan ",
+ "license": "AGPL-3.0",
+ "config": {
+ "host": "127.0.0.1",
+ "port": "3929"
+ },
+ "compatibility": {
+ "platform": [
+ "win32",
+ "linux"
+ ],
+ "app": [
+ "0.1.0"
+ ]
+ },
+ "scripts": {
+ "build": "tsc --module commonjs && rollup -c rollup.config.ts",
+ "build:publish:win32": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+ "build:publish:linux": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+ "build:publish:darwin": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+ "build:publish": "run-script-os"
+ },
+ "exports": {
+ ".": "./dist/index.js",
+ "./main": "./dist/node/index.cjs.js"
+ },
+ "devDependencies": {
+ "@rollup/plugin-commonjs": "^25.0.7",
+ "@rollup/plugin-json": "^6.1.0",
+ "@rollup/plugin-node-resolve": "^15.2.3",
+ "@rollup/plugin-replace": "^5.0.5",
+ "@types/node": "^20.11.4",
+ "@types/os-utils": "^0.0.4",
+ "@types/tcp-port-used": "^1.0.4",
+ "@types/decompress": "4.2.7",
+ "cpx": "^1.5.0",
+ "download-cli": "^1.1.1",
+ "rimraf": "^3.0.2",
+ "rollup": "^2.38.5",
+ "rollup-plugin-define": "^1.0.1",
+ "rollup-plugin-sourcemaps": "^0.6.3",
+ "rollup-plugin-typescript2": "^0.36.0",
+ "run-script-os": "^1.1.6",
+ "typescript": "^5.2.2"
+ },
+ "dependencies": {
+ "@janhq/core": "file:../../core",
+ "decompress": "^4.2.1",
+ "fetch-retry": "^5.0.6",
+ "path-browserify": "^1.0.1",
+ "rxjs": "^7.8.1",
+ "tcp-port-used": "^1.0.2",
+ "ulid": "^2.3.0"
+ },
+ "engines": {
+ "node": ">=18.0.0"
+ },
+ "files": [
+ "dist/*",
+ "package.json",
+ "README.md"
+ ],
+ "bundleDependencies": [
+ "tcp-port-used",
+ "fetch-retry",
+ "decompress",
+ "@janhq/core"
+ ]
+}
diff --git a/extensions/tensorrt-llm-extension/rollup.config.ts b/extensions/tensorrt-llm-extension/rollup.config.ts
new file mode 100644
index 000000000..33e45823b
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/rollup.config.ts
@@ -0,0 +1,73 @@
+import resolve from '@rollup/plugin-node-resolve'
+import commonjs from '@rollup/plugin-commonjs'
+import sourceMaps from 'rollup-plugin-sourcemaps'
+import typescript from 'rollup-plugin-typescript2'
+import json from '@rollup/plugin-json'
+import replace from '@rollup/plugin-replace'
+const packageJson = require('./package.json')
+
+export default [
+ {
+ input: `src/index.ts`,
+ output: [{ file: packageJson.main, format: 'es', sourcemap: true }],
+ watch: {
+ include: 'src/**',
+ },
+ plugins: [
+ replace({
+ EXTENSION_NAME: JSON.stringify(packageJson.name),
+ TENSORRT_VERSION: JSON.stringify('0.1.5'),
+ DOWNLOAD_RUNNER_URL:
+ process.platform === 'darwin' || process.platform === 'win32'
+ ? JSON.stringify(
+ 'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v/nitro-windows-v-amd64-tensorrt-llm-.tar.gz'
+ )
+ : JSON.stringify(
+ 'https://github.com/janhq/nitro-tensorrt-llm/releases/download/linux-v/nitro-linux-v-amd64-tensorrt-llm-.tar.gz'
+ ),
+ NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
+ INFERENCE_URL: JSON.stringify(
+ process.env.INFERENCE_URL ||
+ `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/v1/chat/completions`
+ ),
+ COMPATIBILITY: JSON.stringify(packageJson.compatibility),
+ }),
+ json(),
+ typescript({ useTsconfigDeclarationDir: true }),
+ commonjs(),
+ resolve({
+ extensions: ['.js', '.ts', '.svelte'],
+ }),
+ sourceMaps(),
+ ],
+ },
+ {
+ input: `src/node/index.ts`,
+ output: [
+ { file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
+ ],
+ external: ['@janhq/core/node'],
+ watch: {
+ include: 'src/node/**',
+ },
+ plugins: [
+ replace({
+ LOAD_MODEL_URL: JSON.stringify(
+ `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel`
+ ),
+ TERMINATE_ENGINE_URL: JSON.stringify(
+ `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/processmanager/destroy`
+ ),
+ ENGINE_HOST: JSON.stringify(packageJson.config?.host ?? '127.0.0.1'),
+ ENGINE_PORT: JSON.stringify(packageJson.config?.port ?? '3928'),
+ }),
+ json(),
+ typescript({ useTsconfigDeclarationDir: true }),
+ commonjs(),
+ resolve({
+ extensions: ['.ts', '.js', '.json'],
+ }),
+ sourceMaps(),
+ ],
+ },
+]
diff --git a/extensions/tensorrt-llm-extension/src/@types/global.d.ts b/extensions/tensorrt-llm-extension/src/@types/global.d.ts
new file mode 100644
index 000000000..905e86380
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/src/@types/global.d.ts
@@ -0,0 +1,10 @@
+declare const NODE: string
+declare const INFERENCE_URL: string
+declare const LOAD_MODEL_URL: string
+declare const TERMINATE_ENGINE_URL: string
+declare const ENGINE_HOST: string
+declare const ENGINE_PORT: string
+declare const DOWNLOAD_RUNNER_URL: string
+declare const TENSORRT_VERSION: string
+declare const COMPATIBILITY: object
+declare const EXTENSION_NAME: string
diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts
new file mode 100644
index 000000000..02c676841
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/src/index.ts
@@ -0,0 +1,171 @@
+/**
+ * @module tensorrt-llm-extension/src/index
+ */
+
+import {
+ Compatibility,
+ DownloadEvent,
+ DownloadRequest,
+ DownloadState,
+ GpuSetting,
+ InstallationState,
+ Model,
+ baseName,
+ downloadFile,
+ events,
+ executeOnMain,
+ joinPath,
+ showToast,
+ systemInformations,
+ LocalOAIEngine,
+ fs,
+ MessageRequest,
+ ModelEvent,
+} from '@janhq/core'
+import models from '../models.json'
+
+/**
+ * TensorRTLLMExtension - Implementation of LocalOAIEngine
+ * @extends BaseOAILocalInferenceProvider
+ * Provide pre-populated models for TensorRTLLM
+ */
+export default class TensorRTLLMExtension extends LocalOAIEngine {
+ /**
+ * Override custom function name for loading and unloading model
+ * Which are implemented from node module
+ */
+ override provider = 'nitro-tensorrt-llm'
+ override inferenceUrl = INFERENCE_URL
+ override nodeModule = NODE
+
+ private supportedGpuArch = ['turing', 'ampere', 'ada']
+
+ compatibility() {
+ return COMPATIBILITY as unknown as Compatibility
+ }
+ /**
+ * models implemented by the extension
+ * define pre-populated models
+ */
+ async models(): Promise {
+ if ((await this.installationState()) === 'Installed')
+ return models as unknown as Model[]
+ return []
+ }
+
+ override async install(): Promise {
+ const info = await systemInformations()
+ console.debug(
+ `TensorRTLLMExtension installing pre-requisites... ${JSON.stringify(info)}`
+ )
+ const gpuSetting: GpuSetting | undefined = info.gpuSetting
+ if (gpuSetting === undefined || gpuSetting.gpus.length === 0) {
+ console.error('No GPU setting found. Please check your GPU setting.')
+ return
+ }
+
+ // TODO: we only check for the first graphics card. Need to refactor this later.
+ const firstGpu = gpuSetting.gpus[0]
+ if (!firstGpu.name.toLowerCase().includes('nvidia')) {
+ console.error('No Nvidia GPU found. Please check your GPU setting.')
+ return
+ }
+
+ if (firstGpu.arch === undefined) {
+ console.error('No GPU architecture found. Please check your GPU setting.')
+ return
+ }
+
+ if (!this.supportedGpuArch.includes(firstGpu.arch)) {
+ console.error(
+ `Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
+ )
+ return
+ }
+
+ const binaryFolderPath = await executeOnMain(
+ this.nodeModule,
+ 'binaryFolder'
+ )
+ if (!(await fs.existsSync(binaryFolderPath))) {
+ await fs.mkdirSync(binaryFolderPath)
+ }
+
+ const placeholderUrl = DOWNLOAD_RUNNER_URL
+ const tensorrtVersion = TENSORRT_VERSION
+
+ const url = placeholderUrl
+ .replace(//g, tensorrtVersion)
+ .replace(//g, firstGpu.arch)
+
+ const tarball = await baseName(url)
+
+ const tarballFullPath = await joinPath([binaryFolderPath, tarball])
+ const downloadRequest: DownloadRequest = {
+ url,
+ localPath: tarballFullPath,
+ extensionId: EXTENSION_NAME,
+ downloadType: 'extension',
+ }
+ downloadFile(downloadRequest)
+
+ // TODO: wrap this into a Promise
+ const onFileDownloadSuccess = async (state: DownloadState) => {
+ // if other download, ignore
+ if (state.fileName !== tarball) return
+ events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+ await executeOnMain(this.nodeModule, 'decompressRunner', tarballFullPath)
+ events.emit(DownloadEvent.onFileUnzipSuccess, state)
+
+ // Prepopulate models as soon as it's ready
+ this.prePopulateModels().then(() => {
+ showToast(
+ 'Extension installed successfully.',
+ 'New models are added to Model Hub.'
+ )
+ })
+ }
+ events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+ }
+
+ async onModelInit(model: Model): Promise {
+ if (model.engine !== this.provider) return
+
+ if ((await this.installationState()) === 'Installed')
+ return super.onModelInit(model)
+ else {
+ events.emit(ModelEvent.OnModelFail, {
+ ...model,
+ error: {
+ message: 'EXTENSION_IS_NOT_INSTALLED::TensorRT-LLM extension',
+ },
+ })
+ }
+ }
+
+ override async installationState(): Promise {
+ // For now, we just check the executable of nitro x tensor rt
+ const isNitroExecutableAvailable = await executeOnMain(
+ this.nodeModule,
+ 'isNitroExecutableAvailable'
+ )
+
+ return isNitroExecutableAvailable ? 'Installed' : 'NotInstalled'
+ }
+
+ override onInferenceStopped() {
+ if (!this.isRunning) return
+ showToast(
+ 'Unable to Stop Inference',
+ 'The model does not support stopping inference.'
+ )
+ return Promise.resolve()
+ }
+
+ inference(data: MessageRequest): void {
+ if (!this.isRunning) return
+ // TensorRT LLM Extension supports streaming only
+ if (data.model) data.model.parameters.stream = true
+ super.inference(data)
+ }
+}
diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts
new file mode 100644
index 000000000..252468fc1
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/src/node/index.ts
@@ -0,0 +1,191 @@
+import path from 'path'
+import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
+import tcpPortUsed from 'tcp-port-used'
+import fetchRT from 'fetch-retry'
+import { log } from '@janhq/core/node'
+import { existsSync } from 'fs'
+import decompress from 'decompress'
+
+// Polyfill fetch with retry
+const fetchRetry = fetchRT(fetch)
+
+/**
+ * The response object for model init operation.
+ */
+interface ModelLoadParams {
+ engine_path: string
+ ctx_len: number
+}
+
+// The subprocess instance for Engine
+let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
+
+/**
+ * Initializes a engine subprocess to load a machine learning model.
+ * @param params - The model load settings.
+ */
+async function loadModel(params: any): Promise<{ error: Error | undefined }> {
+ // modelFolder is the absolute path to the running model folder
+ // e.g. ~/jan/models/llama-2
+ let modelFolder = params.modelFolder
+
+ const settings: ModelLoadParams = {
+ engine_path: modelFolder,
+ ctx_len: params.model.settings.ctx_len ?? 2048,
+ }
+ return runEngineAndLoadModel(settings)
+}
+
+/**
+ * Stops a Engine subprocess.
+ */
+function unloadModel(): Promise {
+ const controller = new AbortController()
+ setTimeout(() => controller.abort(), 5000)
+ debugLog(`Request to kill engine`)
+
+ subprocess?.kill()
+ return fetch(TERMINATE_ENGINE_URL, {
+ method: 'DELETE',
+ signal: controller.signal,
+ })
+ .then(() => {
+ subprocess = undefined
+ })
+ .catch(() => {}) // Do nothing with this attempt
+ .then(() => tcpPortUsed.waitUntilFree(parseInt(ENGINE_PORT), 300, 5000)) // Wait for port available
+ .then(() => debugLog(`Engine process is terminated`))
+ .catch((err) => {
+ debugLog(
+ `Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
+ )
+ throw 'PORT_NOT_AVAILABLE'
+ })
+}
+/**
+ * 1. Spawn engine process
+ * 2. Load model into engine subprocess
+ * @returns
+ */
+async function runEngineAndLoadModel(settings: ModelLoadParams) {
+ return unloadModel()
+ .then(runEngine)
+ .then(() => loadModelRequest(settings))
+ .catch((err) => {
+ // TODO: Broadcast error so app could display proper error message
+ debugLog(`${err}`, 'Error')
+ return { error: err }
+ })
+}
+
+/**
+ * Loads a LLM model into the Engine subprocess by sending a HTTP POST request.
+ */
+function loadModelRequest(
+ settings: ModelLoadParams
+): Promise<{ error: Error | undefined }> {
+ debugLog(`Loading model with params ${JSON.stringify(settings)}`)
+ return fetchRetry(LOAD_MODEL_URL, {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ },
+ body: JSON.stringify(settings),
+ retries: 3,
+ retryDelay: 500,
+ })
+ .then((res) => {
+ debugLog(`Load model success with response ${JSON.stringify(res)}`)
+ return Promise.resolve({ error: undefined })
+ })
+ .catch((err) => {
+ debugLog(`Load model failed with error ${err}`, 'Error')
+ return Promise.resolve({ error: err })
+ })
+}
+
+/**
+ * Spawns engine subprocess.
+ */
+function runEngine(): Promise {
+ debugLog(`Spawning engine subprocess...`)
+
+ return new Promise((resolve, reject) => {
+ // Current directory by default
+ let binaryFolder = path.join(__dirname, '..', 'bin')
+ // Binary path
+ const binary = path.join(
+ binaryFolder,
+ process.platform === 'win32' ? 'nitro.exe' : 'nitro'
+ )
+
+ const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT]
+ // Execute the binary
+ debugLog(`Spawn nitro at path: ${binary}, and args: ${args}`)
+ subprocess = spawn(binary, args, {
+ cwd: binaryFolder,
+ env: {
+ ...process.env,
+ },
+ })
+
+ // Handle subprocess output
+ subprocess.stdout.on('data', (data: any) => {
+ debugLog(`${data}`)
+ })
+
+ subprocess.stderr.on('data', (data: any) => {
+ debugLog(`${data}`)
+ })
+
+ subprocess.on('close', (code: any) => {
+ debugLog(`Engine exited with code: ${code}`)
+ subprocess = undefined
+ reject(`child process exited with code ${code}`)
+ })
+
+ tcpPortUsed.waitUntilUsed(parseInt(ENGINE_PORT), 300, 30000).then(() => {
+ debugLog(`Engine is ready`)
+ resolve()
+ })
+ })
+}
+
+function debugLog(message: string, level: string = 'Debug') {
+ log(`[TENSORRT_LLM_NITRO]::${level}:${message}`)
+}
+
+const binaryFolder = async (): Promise => {
+ return path.join(__dirname, '..', 'bin')
+}
+
+const decompressRunner = async (zipPath: string) => {
+ const output = path.join(__dirname, '..', 'bin')
+ console.debug(`Decompressing ${zipPath} to ${output}...`)
+ try {
+ const files = await decompress(zipPath, output)
+ console.debug('Decompress finished!', files)
+ } catch (err) {
+ console.error(`Decompress ${zipPath} failed: ${err}`)
+ }
+}
+
+const isNitroExecutableAvailable = async (): Promise => {
+ const binary = path.join(
+ __dirname,
+ '..',
+ 'bin',
+ process.platform === 'win32' ? 'nitro.exe' : 'nitro'
+ )
+
+ return existsSync(binary)
+}
+
+export default {
+ binaryFolder,
+ decompressRunner,
+ loadModel,
+ unloadModel,
+ dispose: unloadModel,
+ isNitroExecutableAvailable,
+}
diff --git a/extensions/tensorrt-llm-extension/tsconfig.json b/extensions/tensorrt-llm-extension/tsconfig.json
new file mode 100644
index 000000000..478a05728
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/tsconfig.json
@@ -0,0 +1,20 @@
+{
+ "compilerOptions": {
+ "moduleResolution": "node",
+ "target": "es5",
+ "module": "ES2020",
+ "lib": ["es2015", "es2016", "es2017", "dom"],
+ "strict": true,
+ "sourceMap": true,
+ "declaration": true,
+ "allowSyntheticDefaultImports": true,
+ "experimentalDecorators": true,
+ "emitDecoratorMetadata": true,
+ "declarationDir": "dist/types",
+ "outDir": "dist",
+ "importHelpers": true,
+ "resolveJsonModule": true,
+ "typeRoots": ["node_modules/@types"]
+ },
+ "include": ["src"]
+}
diff --git a/uikit/src/badge/styles.scss b/uikit/src/badge/styles.scss
index b777892d7..4788f65be 100644
--- a/uikit/src/badge/styles.scss
+++ b/uikit/src/badge/styles.scss
@@ -1,5 +1,5 @@
.badge {
- @apply focus:ring-ring border-border inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-offset-2;
+ @apply focus:ring-ring border-border inline-flex items-center rounded-md border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-offset-2;
&-primary {
@apply border-transparent bg-blue-100 text-blue-600;
diff --git a/web/containers/DropdownListSidebar/index.tsx b/web/containers/DropdownListSidebar/index.tsx
index c05d26e51..70651a4d4 100644
--- a/web/containers/DropdownListSidebar/index.tsx
+++ b/web/containers/DropdownListSidebar/index.tsx
@@ -73,8 +73,9 @@ const DropdownListSidebar = ({
const [copyId, setCopyId] = useState('')
+ // TODO: Update filter condition for the local model
const localModel = downloadedModels.filter(
- (model) => model.engine === InferenceEngine.nitro
+ (model) => model.engine !== InferenceEngine.openai
)
const remoteModel = downloadedModels.filter(
(model) => model.engine === InferenceEngine.openai
@@ -293,7 +294,7 @@ const DropdownListSidebar = ({
{toGibibytes(x.metadata.size)}
- {x.engine == InferenceEngine.nitro && (
+ {x.metadata.size && (
)}
diff --git a/web/containers/Layout/BottomBar/InstallingExtension/InstallingExtensionModal.tsx b/web/containers/Layout/BottomBar/InstallingExtension/InstallingExtensionModal.tsx
new file mode 100644
index 000000000..d46764e88
--- /dev/null
+++ b/web/containers/Layout/BottomBar/InstallingExtension/InstallingExtensionModal.tsx
@@ -0,0 +1,87 @@
+import { useCallback, useEffect } from 'react'
+
+import { abortDownload } from '@janhq/core'
+import {
+ Button,
+ Modal,
+ ModalContent,
+ ModalHeader,
+ ModalTitle,
+ Progress,
+} from '@janhq/uikit'
+import { atom, useAtom, useAtomValue } from 'jotai'
+
+import {
+ formatDownloadPercentage,
+ formatExtensionsName,
+} from '@/utils/converter'
+
+import {
+ InstallingExtensionState,
+ installingExtensionAtom,
+} from '@/helpers/atoms/Extension.atom'
+
+export const showInstallingExtensionModalAtom = atom(false)
+
+const InstallingExtensionModal: React.FC = () => {
+ const [showInstallingExtensionModal, setShowInstallingExtensionModal] =
+ useAtom(showInstallingExtensionModalAtom)
+ const installingExtensions = useAtomValue(installingExtensionAtom)
+
+ useEffect(() => {
+ if (installingExtensions.length === 0) {
+ setShowInstallingExtensionModal(false)
+ }
+ }, [installingExtensions, setShowInstallingExtensionModal])
+
+ const onAbortInstallingExtensionClick = useCallback(
+ (item: InstallingExtensionState) => {
+ if (item.localPath) {
+ abortDownload(item.localPath)
+ }
+ },
+ []
+ )
+
+ return (
+ setShowInstallingExtensionModal(false)}
+ >
+
+
+ Installing Extension
+
+ {Object.values(installingExtensions).map((item) => (
+
+
+
+
+
+ {formatExtensionsName(item.extensionId)}
+
+ {formatDownloadPercentage(item.percentage)}
+
+
+
+
+ ))}
+
+
+ )
+}
+
+export default InstallingExtensionModal
diff --git a/web/containers/Layout/BottomBar/InstallingExtension/index.tsx b/web/containers/Layout/BottomBar/InstallingExtension/index.tsx
new file mode 100644
index 000000000..05e803881
--- /dev/null
+++ b/web/containers/Layout/BottomBar/InstallingExtension/index.tsx
@@ -0,0 +1,52 @@
+import { Fragment, useCallback } from 'react'
+
+import { Progress } from '@janhq/uikit'
+import { useAtomValue, useSetAtom } from 'jotai'
+
+import { showInstallingExtensionModalAtom } from './InstallingExtensionModal'
+
+import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom'
+
+const InstallingExtension: React.FC = () => {
+ const installingExtensions = useAtomValue(installingExtensionAtom)
+ const setShowInstallingExtensionModal = useSetAtom(
+ showInstallingExtensionModalAtom
+ )
+ const shouldShowInstalling = installingExtensions.length > 0
+
+ let totalPercentage = 0
+ let totalExtensions = 0
+ for (const installation of installingExtensions) {
+ totalPercentage += installation.percentage
+ totalExtensions++
+ }
+ const progress = (totalPercentage / totalExtensions) * 100
+
+ const onClick = useCallback(() => {
+ setShowInstallingExtensionModal(true)
+ }, [setShowInstallingExtensionModal])
+
+ return (
+
+ {shouldShowInstalling ? (
+
+
+ Installing Extension
+
+
+
+
+
+ {progress.toFixed(2)}%
+
+
+
+ ) : null}
+
+ )
+}
+
+export default InstallingExtension
diff --git a/web/containers/Layout/BottomBar/index.tsx b/web/containers/Layout/BottomBar/index.tsx
index 2373ac3d4..3683d23db 100644
--- a/web/containers/Layout/BottomBar/index.tsx
+++ b/web/containers/Layout/BottomBar/index.tsx
@@ -16,6 +16,7 @@ import ProgressBar from '@/containers/ProgressBar'
import { appDownloadProgress } from '@/containers/Providers/Jotai'
import ImportingModelState from './ImportingModelState'
+import InstallingExtension from './InstallingExtension'
import SystemMonitor from './SystemMonitor'
import UpdatedFailedModal from './UpdateFailedModal'
@@ -46,6 +47,7 @@ const BottomBar = () => {
+
diff --git a/web/containers/Layout/index.tsx b/web/containers/Layout/index.tsx
index 7e3ad38ab..fb08bc6ac 100644
--- a/web/containers/Layout/index.tsx
+++ b/web/containers/Layout/index.tsx
@@ -22,6 +22,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
+import InstallingExtensionModal from './BottomBar/InstallingExtension/InstallingExtensionModal'
+
import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
const BaseLayout = (props: PropsWithChildren) => {
@@ -68,6 +70,7 @@ const BaseLayout = (props: PropsWithChildren) => {
{importModelStage === 'IMPORTING_MODEL' && }
{importModelStage === 'EDIT_MODEL_INFO' && }
{importModelStage === 'CONFIRM_CANCEL' && }
+
)
}
diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx
index bfc87917b..20fc6dde2 100644
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@@ -7,6 +7,10 @@ import { useSetAtom } from 'jotai'
import { setDownloadStateAtom } from '@/hooks/useDownloadState'
+import { formatExtensionsName } from '@/utils/converter'
+
+import { toaster } from '../Toast'
+
import AppUpdateListener from './AppUpdateListener'
import ClipboardListener from './ClipboardListener'
import EventHandler from './EventHandler'
@@ -14,46 +18,89 @@ import EventHandler from './EventHandler'
import ModelImportListener from './ModelImportListener'
import QuickAskListener from './QuickAskListener'
+import {
+ InstallingExtensionState,
+ removeInstallingExtensionAtom,
+ setInstallingExtensionAtom,
+} from '@/helpers/atoms/Extension.atom'
+
const EventListenerWrapper = ({ children }: PropsWithChildren) => {
const setDownloadState = useSetAtom(setDownloadStateAtom)
+ const setInstallingExtension = useSetAtom(setInstallingExtensionAtom)
+ const removeInstallingExtension = useSetAtom(removeInstallingExtensionAtom)
const onFileDownloadUpdate = useCallback(
async (state: DownloadState) => {
console.debug('onFileDownloadUpdate', state)
- setDownloadState(state)
+ if (state.downloadType === 'extension') {
+ const installingExtensionState: InstallingExtensionState = {
+ extensionId: state.extensionId!,
+ percentage: state.percent,
+ localPath: state.localPath,
+ }
+ setInstallingExtension(state.extensionId!, installingExtensionState)
+ } else {
+ setDownloadState(state)
+ }
},
- [setDownloadState]
+ [setDownloadState, setInstallingExtension]
)
const onFileDownloadError = useCallback(
(state: DownloadState) => {
console.debug('onFileDownloadError', state)
- setDownloadState(state)
+ if (state.downloadType === 'extension') {
+ removeInstallingExtension(state.extensionId!)
+ } else {
+ setDownloadState(state)
+ }
},
- [setDownloadState]
+ [setDownloadState, removeInstallingExtension]
)
const onFileDownloadSuccess = useCallback(
(state: DownloadState) => {
console.debug('onFileDownloadSuccess', state)
- setDownloadState(state)
+ if (state.downloadType !== 'extension') {
+ setDownloadState(state)
+ }
},
[setDownloadState]
)
+ const onFileUnzipSuccess = useCallback(
+ (state: DownloadState) => {
+ console.debug('onFileUnzipSuccess', state)
+ toaster({
+ title: 'Success',
+ description: `Install ${formatExtensionsName(state.extensionId!)} successfully.`,
+ type: 'success',
+ })
+ removeInstallingExtension(state.extensionId!)
+ },
+ [removeInstallingExtension]
+ )
+
useEffect(() => {
console.debug('EventListenerWrapper: registering event listeners...')
events.on(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
events.on(DownloadEvent.onFileDownloadError, onFileDownloadError)
events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+ events.on(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
return () => {
console.debug('EventListenerWrapper: unregistering event listeners...')
events.off(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
events.off(DownloadEvent.onFileDownloadError, onFileDownloadError)
events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+ events.off(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
}
- }, [onFileDownloadUpdate, onFileDownloadError, onFileDownloadSuccess])
+ }, [
+ onFileDownloadUpdate,
+ onFileDownloadError,
+ onFileDownloadSuccess,
+ onFileUnzipSuccess,
+ ])
return (
diff --git a/web/extension/ExtensionManager.ts b/web/extension/ExtensionManager.ts
index 1259021f7..c976010c6 100644
--- a/web/extension/ExtensionManager.ts
+++ b/web/extension/ExtensionManager.ts
@@ -23,7 +23,9 @@ export class ExtensionManager {
* @param type - The type of the extension to retrieve.
* @returns The extension, if found.
*/
- get(type: ExtensionTypeEnum): T | undefined {
+ get(
+ type: ExtensionTypeEnum | string
+ ): T | undefined {
return this.extensions.get(type) as T | undefined
}
diff --git a/web/helpers/atoms/Extension.atom.ts b/web/helpers/atoms/Extension.atom.ts
new file mode 100644
index 000000000..7af755e35
--- /dev/null
+++ b/web/helpers/atoms/Extension.atom.ts
@@ -0,0 +1,40 @@
+import { atom } from 'jotai'
+
+type ExtensionId = string
+
+export type InstallingExtensionState = {
+ extensionId: ExtensionId
+ percentage: number
+ localPath?: string
+}
+
+export const installingExtensionAtom = atom([])
+
+export const setInstallingExtensionAtom = atom(
+ null,
+ (get, set, extensionId: string, state: InstallingExtensionState) => {
+ const current = get(installingExtensionAtom)
+
+ const isExists = current.some((e) => e.extensionId === extensionId)
+ if (isExists) {
+ const newCurrent = current.map((e) => {
+ if (e.extensionId === extensionId) {
+ return state
+ }
+ return e
+ })
+ set(installingExtensionAtom, newCurrent)
+ } else {
+ set(installingExtensionAtom, [...current, state])
+ }
+ }
+)
+
+export const removeInstallingExtensionAtom = atom(
+ null,
+ (get, set, extensionId: string) => {
+ const current = get(installingExtensionAtom)
+ const newCurrent = current.filter((e) => e.extensionId !== extensionId)
+ set(installingExtensionAtom, newCurrent)
+ }
+)
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 600e10783..e6c519f9f 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -40,6 +40,16 @@ export function useActiveModel() {
console.debug(`Model ${modelId} is already initialized. Ignore..`)
return
}
+
+ let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
+
+ // Switch between engines
+ if (model && activeModel && activeModel.engine !== model.engine) {
+ stopModel()
+ // TODO: Refactor inference provider would address this
+ await new Promise((res) => setTimeout(res, 1000))
+ }
+
// TODO: incase we have multiple assistants, the configuration will be from assistant
setLoadModelError(undefined)
@@ -47,8 +57,6 @@ export function useActiveModel() {
setStateModel({ state: 'start', loading: true, model: modelId })
- let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
-
if (!model) {
toaster({
title: `Model ${modelId} not found!`,
diff --git a/web/hooks/useDownloadModel.ts b/web/hooks/useDownloadModel.ts
index 9f6334c71..d0d13d93b 100644
--- a/web/hooks/useDownloadModel.ts
+++ b/web/hooks/useDownloadModel.ts
@@ -8,12 +8,15 @@ import {
joinPath,
ModelArtifact,
DownloadState,
+ GpuSetting,
} from '@janhq/core'
import { useAtomValue, useSetAtom } from 'jotai'
import { setDownloadStateAtom } from './useDownloadState'
+import useGpuSetting from './useGpuSetting'
+
import { extensionManager } from '@/extension/ExtensionManager'
import {
ignoreSslAtom,
@@ -29,6 +32,8 @@ export default function useDownloadModel() {
const setDownloadState = useSetAtom(setDownloadStateAtom)
const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
+ const { getGpuSettings } = useGpuSetting()
+
const downloadModel = useCallback(
async (model: Model) => {
const childProgresses: DownloadState[] = model.sources.map(
@@ -68,10 +73,22 @@ export default function useDownloadModel() {
})
addDownloadingModel(model)
-
- await localDownloadModel(model, ignoreSSL, proxyEnabled ? proxy : '')
+ const gpuSettings = await getGpuSettings()
+ await localDownloadModel(
+ model,
+ ignoreSSL,
+ proxyEnabled ? proxy : '',
+ gpuSettings
+ )
},
- [ignoreSSL, proxy, proxyEnabled, addDownloadingModel, setDownloadState]
+ [
+ ignoreSSL,
+ proxy,
+ proxyEnabled,
+ getGpuSettings,
+ addDownloadingModel,
+ setDownloadState,
+ ]
)
const abortModelDownload = useCallback(async (model: Model) => {
@@ -90,8 +107,9 @@ export default function useDownloadModel() {
const localDownloadModel = async (
model: Model,
ignoreSSL: boolean,
- proxy: string
+ proxy: string,
+ gpuSettings?: GpuSetting
) =>
extensionManager
.get(ExtensionTypeEnum.Model)
- ?.downloadModel(model, { ignoreSSL, proxy })
+ ?.downloadModel(model, gpuSettings, { ignoreSSL, proxy })
diff --git a/web/hooks/useDownloadState.ts b/web/hooks/useDownloadState.ts
index 06de9bef6..03a8883cb 100644
--- a/web/hooks/useDownloadState.ts
+++ b/web/hooks/useDownloadState.ts
@@ -18,123 +18,129 @@ export const modelDownloadStateAtom = atom>({})
export const setDownloadStateAtom = atom(
null,
(get, set, state: DownloadState) => {
- const currentState = { ...get(modelDownloadStateAtom) }
+ try {
+ const currentState = { ...get(modelDownloadStateAtom) }
- if (state.downloadState === 'end') {
- const modelDownloadState = currentState[state.modelId]
+ if (state.downloadState === 'end') {
+ const modelDownloadState = currentState[state.modelId]
- const updatedChildren: DownloadState[] =
- modelDownloadState.children!.filter(
- (m) => m.fileName !== state.fileName
+ const updatedChildren: DownloadState[] = (
+ modelDownloadState.children ?? []
+ ).filter((m) => m.fileName !== state.fileName)
+ updatedChildren.push(state)
+ modelDownloadState.children = updatedChildren
+ currentState[state.modelId] = modelDownloadState
+
+ const isAllChildrenDownloadEnd = modelDownloadState.children?.every(
+ (m) => m.downloadState === 'end'
)
- updatedChildren.push(state)
- modelDownloadState.children = updatedChildren
- currentState[state.modelId] = modelDownloadState
- const isAllChildrenDownloadEnd = modelDownloadState.children?.every(
- (m) => m.downloadState === 'end'
- )
+ if (isAllChildrenDownloadEnd) {
+ // download successfully
+ delete currentState[state.modelId]
+ set(removeDownloadingModelAtom, state.modelId)
- if (isAllChildrenDownloadEnd) {
- // download successfully
+ const model = get(configuredModelsAtom).find(
+ (e) => e.id === state.modelId
+ )
+ if (model) set(downloadedModelsAtom, (prev) => [...prev, model])
+ toaster({
+ title: 'Download Completed',
+ description: `Download ${state.modelId} completed`,
+ type: 'success',
+ })
+ }
+ } else if (state.downloadState === 'error') {
+ // download error
delete currentState[state.modelId]
set(removeDownloadingModelAtom, state.modelId)
-
- const model = get(configuredModelsAtom).find(
- (e) => e.id === state.modelId
- )
- if (model) set(downloadedModelsAtom, (prev) => [...prev, model])
- toaster({
- title: 'Download Completed',
- description: `Download ${state.modelId} completed`,
- type: 'success',
- })
- }
- } else if (state.downloadState === 'error') {
- // download error
- delete currentState[state.modelId]
- set(removeDownloadingModelAtom, state.modelId)
- if (state.error === 'aborted') {
- toaster({
- title: 'Cancel Download',
- description: `Model ${state.modelId} download cancelled`,
- type: 'warning',
- })
- } else {
- let error = state.error
- if (
- typeof error?.includes === 'function' &&
- state.error?.includes('certificate')
- ) {
- error +=
- '. To fix enable "Ignore SSL Certificates" in Advanced settings.'
+ if (state.error === 'aborted') {
+ toaster({
+ title: 'Cancel Download',
+ description: `Model ${state.modelId} download cancelled`,
+ type: 'warning',
+ })
+ } else {
+ let error = state.error
+ if (
+ typeof error?.includes === 'function' &&
+ state.error?.includes('certificate')
+ ) {
+ error +=
+ '. To fix enable "Ignore SSL Certificates" in Advanced settings.'
+ }
+ toaster({
+ title: 'Download Failed',
+ description: `Model ${state.modelId} download failed: ${error}`,
+ type: 'error',
+ })
+ }
+ } else {
+ // download in progress
+ if (state.size.total === 0) {
+ // this is initial state, just set the state
+ currentState[state.modelId] = state
+ set(modelDownloadStateAtom, currentState)
+ return
}
- toaster({
- title: 'Download Failed',
- description: `Model ${state.modelId} download failed: ${error}`,
- type: 'error',
- })
- }
- } else {
- // download in progress
- if (state.size.total === 0) {
- // this is initial state, just set the state
- currentState[state.modelId] = state
- set(modelDownloadStateAtom, currentState)
- return
- }
- const modelDownloadState = currentState[state.modelId]
- if (!modelDownloadState) {
- console.debug('setDownloadStateAtom: modelDownloadState not found')
- return
- }
+ const modelDownloadState = currentState[state.modelId]
+ if (!modelDownloadState) {
+ console.debug('setDownloadStateAtom: modelDownloadState not found')
+ return
+ }
- // delete the children if the filename is matched and replace the new state
- const updatedChildren: DownloadState[] =
- modelDownloadState.children!.filter(
- (m) => m.fileName !== state.fileName
+ // delete the children if the filename is matched and replace the new state
+ const updatedChildren: DownloadState[] = (
+ modelDownloadState.children ?? []
+ ).filter((m) => m.fileName !== state.fileName)
+
+ updatedChildren.push(state)
+
+ // re-calculate the overall progress if we have all the children download data
+ const isAnyChildDownloadNotReady = updatedChildren.some(
+ (m) =>
+ m.size.total === 0 &&
+ !modelDownloadState.children?.some(
+ (e) => e.fileName === m.fileName && e.downloadState === 'end'
+ ) &&
+ modelDownloadState.children?.some((e) => e.fileName === m.fileName)
)
- updatedChildren.push(state)
+ modelDownloadState.children = updatedChildren
+ if (isAnyChildDownloadNotReady) {
+ // just update the children
+ currentState[state.modelId] = modelDownloadState
+ set(modelDownloadStateAtom, currentState)
+ return
+ }
- // re-calculate the overall progress if we have all the children download data
- const isAnyChildDownloadNotReady = updatedChildren.some(
- (m) => m.size.total === 0
- )
+ const parentTotalSize = modelDownloadState.size.total
+ if (parentTotalSize === 0) {
+ // calculate the total size of the parent by sum all children total size
+ const totalSize = updatedChildren.reduce(
+ (acc, m) => acc + m.size.total,
+ 0
+ )
- modelDownloadState.children = updatedChildren
+ modelDownloadState.size.total = totalSize
+ }
- if (isAnyChildDownloadNotReady) {
- // just update the children
- currentState[state.modelId] = modelDownloadState
- set(modelDownloadStateAtom, currentState)
-
- return
- }
-
- const parentTotalSize = modelDownloadState.size.total
- if (parentTotalSize === 0) {
- // calculate the total size of the parent by sum all children total size
- const totalSize = updatedChildren.reduce(
- (acc, m) => acc + m.size.total,
+ // calculate the total transferred size by sum all children transferred size
+ const transferredSize = updatedChildren.reduce(
+ (acc, m) => acc + m.size.transferred,
0
)
-
- modelDownloadState.size.total = totalSize
+ modelDownloadState.size.transferred = transferredSize
+ modelDownloadState.percent =
+ parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
+ currentState[state.modelId] = modelDownloadState
}
- // calculate the total transferred size by sum all children transferred size
- const transferredSize = updatedChildren.reduce(
- (acc, m) => acc + m.size.transferred,
- 0
- )
- modelDownloadState.size.transferred = transferredSize
- modelDownloadState.percent =
- parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
- currentState[state.modelId] = modelDownloadState
+ set(modelDownloadStateAtom, currentState)
+ } catch (e) {
+ console.debug('setDownloadStateAtom: state', state)
+ console.debug('setDownloadStateAtom: error', e)
}
-
- set(modelDownloadStateAtom, currentState)
}
)
diff --git a/web/hooks/useGpuSetting.ts b/web/hooks/useGpuSetting.ts
new file mode 100644
index 000000000..36f51ed57
--- /dev/null
+++ b/web/hooks/useGpuSetting.ts
@@ -0,0 +1,21 @@
+import { useCallback } from 'react'
+
+import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
+
+import { extensionManager } from '@/extension'
+
+export default function useGpuSetting() {
+ const getGpuSettings = useCallback(async () => {
+ const gpuSetting = await extensionManager
+ ?.get(ExtensionTypeEnum.SystemMonitoring)
+ ?.getGpuSetting()
+
+ if (!gpuSetting) {
+ console.debug('No GPU setting found')
+ return undefined
+ }
+ return gpuSetting
+ }, [])
+
+ return { getGpuSettings }
+}
diff --git a/web/next.config.js b/web/next.config.js
index a4b3e6d43..48ea0703e 100644
--- a/web/next.config.js
+++ b/web/next.config.js
@@ -38,6 +38,7 @@ const nextConfig = {
isMac: process.platform === 'darwin',
isWindows: process.platform === 'win32',
isLinux: process.platform === 'linux',
+ PLATFORM: JSON.stringify(process.platform),
}),
]
return config
diff --git a/web/screens/Chat/ChatInput/index.tsx b/web/screens/Chat/ChatInput/index.tsx
index c90a12cd2..8707e8bcd 100644
--- a/web/screens/Chat/ChatInput/index.tsx
+++ b/web/screens/Chat/ChatInput/index.tsx
@@ -244,16 +244,13 @@ const ChatInput: React.FC = () => {
{
if (
- !activeThread?.assistants[0].model.settings
- .vision_model ||
activeThread?.assistants[0].model.settings
.text_model !== false
) {
diff --git a/web/screens/Chat/ErrorMessage/index.tsx b/web/screens/Chat/ErrorMessage/index.tsx
index 25cec1cb9..5be87a59d 100644
--- a/web/screens/Chat/ErrorMessage/index.tsx
+++ b/web/screens/Chat/ErrorMessage/index.tsx
@@ -7,11 +7,14 @@ import ModalTroubleShooting, {
modalTroubleShootingAtom,
} from '@/containers/ModalTroubleShoot'
+import { MainViewState } from '@/constants/screens'
+
import { loadModelErrorAtom } from '@/hooks/useActiveModel'
import useSendChatMessage from '@/hooks/useSendChatMessage'
import { getErrorTitle } from '@/utils/errorMessage'
+import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
@@ -19,6 +22,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
const { resendChatMessage } = useSendChatMessage()
const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
const loadModelError = useAtomValue(loadModelErrorAtom)
+ const setMainState = useSetAtom(mainViewStateAtom)
const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE'
const regenerateMessage = async () => {
@@ -70,6 +74,23 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
+ ) : loadModelError?.includes('EXTENSION_IS_NOT_INSTALLED') ? (
+
+
+ Model is currently unavailable. Please switch to a different
+ model or install the{' '}
+ {' '}
+ to continue using it.
+
+
) : (
= ({ model }) => {
Format
- {model.format}
+
+ {model.format}
+
diff --git a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
index 38e7f65a6..465e69fa6 100644
--- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
@@ -152,6 +152,7 @@ const ExploreModelItemHeader: React.FC = ({ model, onClick, open }) => {
{model.name}
+
@@ -172,4 +173,23 @@ const ExploreModelItemHeader: React.FC = ({ model, onClick, open }) => {
)
}
+type EngineBadgeProps = {
+ engine: string
+}
+
+const EngineBadge: React.FC = ({ engine }) => {
+ const title = 'TensorRT-LLM'
+
+ switch (engine) {
+ case 'nitro-tensorrt-llm':
+ return (
+
+ {title}
+
+ )
+ default:
+ return null
+ }
+}
+
export default ExploreModelItemHeader
diff --git a/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx b/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
new file mode 100644
index 000000000..8033cd588
--- /dev/null
+++ b/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
@@ -0,0 +1,226 @@
+import { useCallback, useEffect, useState } from 'react'
+
+import {
+ Compatibility,
+ GpuSetting,
+ InstallationState,
+ abortDownload,
+ systemInformations,
+} from '@janhq/core'
+import {
+ Button,
+ Progress,
+ Tooltip,
+ TooltipArrow,
+ TooltipContent,
+ TooltipPortal,
+ TooltipTrigger,
+} from '@janhq/uikit'
+
+import { InfoCircledIcon } from '@radix-ui/react-icons'
+import { useAtomValue } from 'jotai'
+
+import { extensionManager } from '@/extension'
+import Extension from '@/extension/Extension'
+import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom'
+
+type Props = {
+ item: Extension
+}
+
+const TensorRtExtensionItem: React.FC = ({ item }) => {
+ const [compatibility, setCompatibility] = useState(
+ undefined
+ )
+ const [installState, setInstallState] =
+ useState('NotRequired')
+ const installingExtensions = useAtomValue(installingExtensionAtom)
+ const [isGpuSupported, setIsGpuSupported] = useState(false)
+
+ const isInstalling = installingExtensions.some(
+ (e) => e.extensionId === item.name
+ )
+
+ const progress = isInstalling
+ ? installingExtensions.find((e) => e.extensionId === item.name)
+ ?.percentage ?? -1
+ : -1
+
+ useEffect(() => {
+ const getSystemInfos = async () => {
+ const info = await systemInformations()
+ if (!info) {
+ setIsGpuSupported(false)
+ return
+ }
+
+ const gpuSettings: GpuSetting | undefined = info.gpuSetting
+ if (!gpuSettings || gpuSettings.gpus.length === 0) {
+ setIsGpuSupported(false)
+ return
+ }
+
+ const arch = gpuSettings.gpus[0].arch
+ if (!arch) {
+ setIsGpuSupported(false)
+ return
+ }
+
+ const supportedGpuArch = ['turing', 'ampere', 'ada']
+ setIsGpuSupported(supportedGpuArch.includes(arch))
+ }
+ getSystemInfos()
+ }, [])
+
+ useEffect(() => {
+ const getExtensionInstallationState = async () => {
+ const extension = extensionManager.get(item.name ?? '')
+ if (!extension) return
+
+ if (typeof extension?.installationState === 'function') {
+ const installState = await extension.installationState()
+ setInstallState(installState)
+ }
+ }
+
+ getExtensionInstallationState()
+ }, [item.name, isInstalling])
+
+ useEffect(() => {
+ const extension = extensionManager.get(item.name ?? '')
+ if (!extension) return
+ setCompatibility(extension.compatibility())
+ }, [setCompatibility, item.name])
+
+ const onInstallClick = useCallback(async () => {
+ const extension = extensionManager.get(item.name ?? '')
+ if (!extension) return
+
+ await extension.install()
+ }, [item.name])
+
+ const onCancelInstallingClick = () => {
+ const extension = installingExtensions.find(
+ (e) => e.extensionId === item.name
+ )
+ if (extension?.localPath) {
+ abortDownload(extension.localPath)
+ }
+ }
+
+ return (
+
+
+
+
+ TensorRT-LLM Extension
+
+
+ v{item.version}
+
+
+
+ {item.description}
+
+
+
+ {(!compatibility || compatibility['platform']?.includes(PLATFORM)) &&
+ isGpuSupported ? (
+
+
+
+ ) : (
+
+
+ Incompatible{' '}
+
+
+
+
+
+
+ {compatibility &&
+ !compatibility['platform']?.includes(PLATFORM) ? (
+
+ Only available on{' '}
+ {compatibility?.platform
+ ?.map((e: string) =>
+ e === 'win32'
+ ? 'Windows'
+ : e === 'linux'
+ ? 'Linux'
+ : 'MacOS'
+ )
+ .join(', ')}
+
+ ) : (
+
+ Your GPUs are not compatible with this extension
+
+ )}
+
+
+
+
+
+
+ )}
+
+ )
+}
+
+type InstallStateProps = {
+ installProgress: number
+ installState: InstallationState
+ onInstallClick: () => void
+ onCancelClick: () => void
+}
+
+const InstallStateIndicator: React.FC = ({
+ installProgress,
+ installState,
+ onInstallClick,
+ onCancelClick,
+}) => {
+ if (installProgress !== -1) {
+ const progress = installProgress * 100
+ return (
+
+
+
+
+
+ {progress.toFixed(0)}%
+
+
+
+ )
+ }
+
+ // TODO: NamH check for dark mode here
+ switch (installState) {
+ case 'Installed':
+ return (
+
+ Installed
+
+ )
+ case 'NotInstalled':
+ return (
+
+ )
+ default:
+ return
+ }
+}
+
+export default TensorRtExtensionItem
diff --git a/web/screens/Settings/CoreExtensions/index.tsx b/web/screens/Settings/CoreExtensions/index.tsx
index 8c9f92d7a..f5b66abeb 100644
--- a/web/screens/Settings/CoreExtensions/index.tsx
+++ b/web/screens/Settings/CoreExtensions/index.tsx
@@ -4,13 +4,18 @@ import React, { useState, useEffect, useRef } from 'react'
import { Button, ScrollArea } from '@janhq/uikit'
+import Loader from '@/containers/Loader'
+
import { formatExtensionsName } from '@/utils/converter'
+import TensorRtExtensionItem from './TensorRtExtensionItem'
+
import { extensionManager } from '@/extension'
import Extension from '@/extension/Extension'
const ExtensionCatalog = () => {
const [activeExtensions, setActiveExtensions] = useState([])
+ const [showLoading, setShowLoading] = useState(false)
const fileInputRef = useRef(null)
/**
* Fetches the active extensions and their preferences from the `extensions` and `preferences` modules.
@@ -63,65 +68,76 @@ const ExtensionCatalog = () => {
const handleFileChange = (event: React.ChangeEvent) => {
const file = event.target.files?.[0]
if (file) {
+ setShowLoading(true)
install(event)
}
}
return (
-
-
- {activeExtensions.map((item, i) => {
- return (
-
-
-
-
- {formatExtensionsName(item.name ?? item.description ?? '')}
-
-
- v{item.version}
+ <>
+
+
+ {activeExtensions.map((item, i) => {
+ // TODO: this is bad code, rewrite it
+ if (item.name === '@janhq/tensorrt-llm-extension') {
+ return
+ }
+
+ return (
+
+
+
+
+ {formatExtensionsName(
+ item.name ?? item.description ?? ''
+ )}
+
+
+ v{item.version}
+
+
+
+ {item.description}
-
- {item.description}
-
+ )
+ })}
+ {/* Manual Installation */}
+
+
+
+
+ Manual Installation
+
+
+
+ Select a extension file to install (.tgz)
+
- )
- })}
- {/* Manual Installation */}
-
-
-
-
- Manual Installation
-
+
+
+
-
- Select a extension file to install (.tgz)
-
-
-
-
-
-
-
+
+ {showLoading && }
+ >
)
}
diff --git a/web/services/appService.ts b/web/services/appService.ts
new file mode 100644
index 000000000..9327d55c3
--- /dev/null
+++ b/web/services/appService.ts
@@ -0,0 +1,24 @@
+import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
+
+import { toaster } from '@/containers/Toast'
+
+import { extensionManager } from '@/extension'
+
+export const appService = {
+ systemInformations: async () => {
+ const gpuSetting = await extensionManager
+ ?.get (ExtensionTypeEnum.SystemMonitoring)
+ ?.getGpuSetting()
+
+ return {
+ gpuSetting,
+ // TODO: Other system information
+ }
+ },
+ showToast: (title: string, description: string) => {
+ toaster({
+ title,
+ description: description,
+ })
+ },
+}
diff --git a/web/services/coreService.ts b/web/services/coreService.ts
index c010c6cec..a483cc452 100644
--- a/web/services/coreService.ts
+++ b/web/services/coreService.ts
@@ -1,5 +1,7 @@
+import { appService } from './appService'
import { EventEmitter } from './eventsService'
import { restAPI } from './restService'
+
export const setupCoreServices = () => {
if (typeof window === 'undefined') {
console.debug('undefine', window)
@@ -10,7 +12,10 @@ export const setupCoreServices = () => {
if (!window.core) {
window.core = {
events: new EventEmitter(),
- api: window.electronAPI ?? restAPI,
+ api: {
+ ...(window.electronAPI ? window.electronAPI : restAPI),
+ ...appService,
+ },
}
}
}
diff --git a/web/types/index.d.ts b/web/types/index.d.ts
index 833c3e2bd..ed83e0d14 100644
--- a/web/types/index.d.ts
+++ b/web/types/index.d.ts
@@ -11,6 +11,7 @@ declare global {
declare const isMac: boolean
declare const isWindows: boolean
declare const isLinux: boolean
+ declare const PLATFORM: string
interface Core {
api: APIFunctions
events: EventEmitter
|