fix: move tensorrt executable to engine (#2400)
* fix: move tensorrt executable to engine Signed-off-by: James <james@jan.ai> * some update Signed-off-by: hiro <hiro@jan.ai> * chore: bump tensorrt version * fix: wrong destroy path * fix: install extensions in parallel * chore: update path for tensorrt engine (#2404) Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> --------- Signed-off-by: James <james@jan.ai> Signed-off-by: hiro <hiro@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: hiro <hiro@jan.ai> Co-authored-by: Louis <louis@jan.ai>
This commit is contained in:
parent
60cf8de832
commit
c81a33f382
@ -33,7 +33,7 @@ export enum AppRoute {
|
||||
stopServer = 'stopServer',
|
||||
log = 'log',
|
||||
logServer = 'logServer',
|
||||
systemInformations = 'systemInformations',
|
||||
systemInformation = 'systemInformation',
|
||||
showToast = 'showToast',
|
||||
}
|
||||
|
||||
@ -95,6 +95,7 @@ export enum FileManagerRoute {
|
||||
getUserHomePath = 'getUserHomePath',
|
||||
fileStat = 'fileStat',
|
||||
writeBlob = 'writeBlob',
|
||||
mkdir = 'mkdir',
|
||||
}
|
||||
|
||||
export type ApiFunction = (...args: any[]) => any
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { DownloadRequest, FileStat, NetworkConfig } from './types'
|
||||
import { DownloadRequest, FileStat, NetworkConfig, SystemInformation } from './types'
|
||||
|
||||
/**
|
||||
* Execute a extension module function in main process
|
||||
@ -110,7 +110,8 @@ const isSubdirectory: (from: string, to: string) => Promise<boolean> = (from: st
|
||||
* Get system information
|
||||
* @returns {Promise<any>} - A promise that resolves with the system information.
|
||||
*/
|
||||
const systemInformations: () => Promise<any> = () => global.core.api?.systemInformations()
|
||||
const systemInformation: () => Promise<SystemInformation> = () =>
|
||||
global.core.api?.systemInformation()
|
||||
|
||||
/**
|
||||
* Show toast message from browser processes.
|
||||
@ -146,7 +147,7 @@ export {
|
||||
log,
|
||||
isSubdirectory,
|
||||
getUserHomePath,
|
||||
systemInformations,
|
||||
systemInformation,
|
||||
showToast,
|
||||
FileStat,
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { executeOnMain, getJanDataFolderPath, joinPath } from '../../core'
|
||||
import { executeOnMain, getJanDataFolderPath, joinPath, systemInformation } from '../../core'
|
||||
import { events } from '../../events'
|
||||
import { Model, ModelEvent } from '../../types'
|
||||
import { OAIEngine } from './OAIEngine'
|
||||
@ -30,11 +30,11 @@ export abstract class LocalOAIEngine extends OAIEngine {
|
||||
if (model.engine.toString() !== this.provider) return
|
||||
|
||||
const modelFolder = await joinPath([await getJanDataFolderPath(), this.modelFolder, model.id])
|
||||
|
||||
const systemInfo = await systemInformation()
|
||||
const res = await executeOnMain(this.nodeModule, this.loadModelFunctionName, {
|
||||
modelFolder,
|
||||
model,
|
||||
})
|
||||
}, systemInfo)
|
||||
|
||||
if (res?.error) {
|
||||
events.emit(ModelEvent.OnModelFail, {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { BaseExtension, ExtensionTypeEnum } from '../extension'
|
||||
import { GpuSetting, MonitoringInterface } from '../index'
|
||||
import { GpuSetting, MonitoringInterface, OperatingSystemInfo } from '../index'
|
||||
|
||||
/**
|
||||
* Monitoring extension for system monitoring.
|
||||
@ -16,4 +16,5 @@ export abstract class MonitoringExtension extends BaseExtension implements Monit
|
||||
abstract getGpuSetting(): Promise<GpuSetting>
|
||||
abstract getResourcesInfo(): Promise<any>
|
||||
abstract getCurrentLoad(): Promise<any>
|
||||
abstract getOsInfo(): Promise<OperatingSystemInfo>
|
||||
}
|
||||
|
||||
@ -37,6 +37,8 @@ const readdirSync = (...args: any[]) => global.core.api?.readdirSync(...args)
|
||||
*/
|
||||
const mkdirSync = (...args: any[]) => global.core.api?.mkdirSync(...args)
|
||||
|
||||
const mkdir = (...args: any[]) => global.core.api?.mkdir(...args)
|
||||
|
||||
/**
|
||||
* Removes a directory at the specified path.
|
||||
* @returns {Promise<any>} A Promise that resolves when the directory is removed successfully.
|
||||
@ -92,6 +94,7 @@ export const fs = {
|
||||
existsSync,
|
||||
readdirSync,
|
||||
mkdirSync,
|
||||
mkdir,
|
||||
rmdirSync,
|
||||
unlinkSync,
|
||||
appendFileSync,
|
||||
|
||||
@ -88,4 +88,16 @@ export class FSExt implements Processor {
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
mkdir(path: string): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
fs.mkdir(path, { recursive: true }, (err) => {
|
||||
if (err) {
|
||||
reject(err)
|
||||
} else {
|
||||
resolve()
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -93,8 +93,7 @@ export function persistExtensions() {
|
||||
*/
|
||||
export async function installExtensions(extensions: any) {
|
||||
const installed: Extension[] = []
|
||||
for (const ext of extensions) {
|
||||
// Set install options and activation based on input type
|
||||
const installations = extensions.map((ext: any): Promise<void> => {
|
||||
const isObject = typeof ext === 'object'
|
||||
const spec = isObject ? [ext.specifier, ext] : [ext]
|
||||
const activate = isObject ? ext.activate !== false : true
|
||||
@ -102,15 +101,17 @@ export async function installExtensions(extensions: any) {
|
||||
// Install and possibly activate extension
|
||||
const extension = new Extension(...spec)
|
||||
if (!extension.origin) {
|
||||
continue
|
||||
return Promise.resolve()
|
||||
}
|
||||
await extension._install()
|
||||
return extension._install().then(() => {
|
||||
if (activate) extension.setActive(true)
|
||||
|
||||
// Add extension to store if needed
|
||||
addExtension(extension)
|
||||
installed.push(extension)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
await Promise.all(installations)
|
||||
|
||||
// Return list of all installed extensions
|
||||
return installed
|
||||
|
||||
@ -30,3 +30,27 @@ export type GpuSettingInfo = {
|
||||
name: string
|
||||
arch?: string
|
||||
}
|
||||
|
||||
export type SystemInformation = {
|
||||
gpuSetting: GpuSetting
|
||||
osInfo?: OperatingSystemInfo
|
||||
}
|
||||
|
||||
export const SupportedPlatforms = ['win32', 'linux', 'darwin'] as const
|
||||
export type SupportedPlatformTuple = typeof SupportedPlatforms
|
||||
export type SupportedPlatform = SupportedPlatformTuple[number]
|
||||
|
||||
export type OperatingSystemInfo = {
|
||||
platform: SupportedPlatform | 'unknown'
|
||||
arch: string
|
||||
release: string
|
||||
machine: string
|
||||
version: string
|
||||
totalMem: number
|
||||
freeMem: number
|
||||
}
|
||||
|
||||
export type CpuCoreInfo = {
|
||||
model: string
|
||||
speed: number
|
||||
}
|
||||
|
||||
@ -1,4 +1,9 @@
|
||||
import { GpuSetting, MonitoringExtension, executeOnMain } from '@janhq/core'
|
||||
import {
|
||||
GpuSetting,
|
||||
MonitoringExtension,
|
||||
OperatingSystemInfo,
|
||||
executeOnMain,
|
||||
} from '@janhq/core'
|
||||
|
||||
/**
|
||||
* JanMonitoringExtension is a extension that provides system monitoring functionality.
|
||||
@ -41,4 +46,8 @@ export default class JanMonitoringExtension extends MonitoringExtension {
|
||||
getCurrentLoad(): Promise<any> {
|
||||
return executeOnMain(NODE, 'getCurrentLoad')
|
||||
}
|
||||
|
||||
getOsInfo(): Promise<OperatingSystemInfo> {
|
||||
return executeOnMain(NODE, 'getOsInfo')
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,9 +1,16 @@
|
||||
import { GpuSetting, GpuSettingInfo, ResourceInfo } from '@janhq/core'
|
||||
import {
|
||||
GpuSetting,
|
||||
GpuSettingInfo,
|
||||
OperatingSystemInfo,
|
||||
ResourceInfo,
|
||||
SupportedPlatforms,
|
||||
} from '@janhq/core'
|
||||
import { getJanDataFolderPath, log } from '@janhq/core/node'
|
||||
import { mem, cpu } from 'node-os-utils'
|
||||
import { exec } from 'child_process'
|
||||
import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
|
||||
import path from 'path'
|
||||
import os from 'os'
|
||||
|
||||
/**
|
||||
* Path to the settings directory
|
||||
@ -320,3 +327,20 @@ const updateCudaExistence = (
|
||||
data.is_initial = false
|
||||
return data
|
||||
}
|
||||
|
||||
export const getOsInfo = (): OperatingSystemInfo => {
|
||||
const platform =
|
||||
SupportedPlatforms.find((p) => p === process.platform) || 'unknown'
|
||||
|
||||
const osInfo: OperatingSystemInfo = {
|
||||
platform: platform,
|
||||
arch: process.arch,
|
||||
release: os.release(),
|
||||
machine: os.machine(),
|
||||
version: os.version(),
|
||||
totalMem: os.totalmem(),
|
||||
freeMem: os.freemem(),
|
||||
}
|
||||
|
||||
return osInfo
|
||||
}
|
||||
|
||||
@ -18,6 +18,8 @@
|
||||
"0.1.0"
|
||||
]
|
||||
},
|
||||
"tensorrtVersion": "0.1.6",
|
||||
"provider": "nitro-tensorrt-llm",
|
||||
"scripts": {
|
||||
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
||||
"build:publish:win32": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
|
||||
@ -16,9 +16,10 @@ export default [
|
||||
plugins: [
|
||||
replace({
|
||||
EXTENSION_NAME: JSON.stringify(packageJson.name),
|
||||
TENSORRT_VERSION: JSON.stringify('0.1.5'),
|
||||
TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
|
||||
PROVIDER: JSON.stringify(packageJson.provider),
|
||||
DOWNLOAD_RUNNER_URL:
|
||||
process.platform === 'darwin' || process.platform === 'win32'
|
||||
process.platform === 'win32'
|
||||
? JSON.stringify(
|
||||
'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v<version>/nitro-windows-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
|
||||
)
|
||||
@ -52,11 +53,14 @@ export default [
|
||||
},
|
||||
plugins: [
|
||||
replace({
|
||||
EXTENSION_NAME: JSON.stringify(packageJson.name),
|
||||
TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
|
||||
PROVIDER: JSON.stringify(packageJson.provider),
|
||||
LOAD_MODEL_URL: JSON.stringify(
|
||||
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel`
|
||||
),
|
||||
TERMINATE_ENGINE_URL: JSON.stringify(
|
||||
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/processmanager/destroy`
|
||||
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/processmanager/destroy`
|
||||
),
|
||||
ENGINE_HOST: JSON.stringify(packageJson.config?.host ?? '127.0.0.1'),
|
||||
ENGINE_PORT: JSON.stringify(packageJson.config?.port ?? '3928'),
|
||||
|
||||
@ -8,3 +8,4 @@ declare const DOWNLOAD_RUNNER_URL: string
|
||||
declare const TENSORRT_VERSION: string
|
||||
declare const COMPATIBILITY: object
|
||||
declare const EXTENSION_NAME: string
|
||||
declare const PROVIDER: string
|
||||
|
||||
@ -16,11 +16,12 @@ import {
|
||||
executeOnMain,
|
||||
joinPath,
|
||||
showToast,
|
||||
systemInformations,
|
||||
systemInformation,
|
||||
LocalOAIEngine,
|
||||
fs,
|
||||
MessageRequest,
|
||||
ModelEvent,
|
||||
getJanDataFolderPath,
|
||||
} from '@janhq/core'
|
||||
import models from '../models.json'
|
||||
|
||||
@ -34,11 +35,12 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
|
||||
* Override custom function name for loading and unloading model
|
||||
* Which are implemented from node module
|
||||
*/
|
||||
override provider = 'nitro-tensorrt-llm'
|
||||
override provider = PROVIDER
|
||||
override inferenceUrl = INFERENCE_URL
|
||||
override nodeModule = NODE
|
||||
|
||||
private supportedGpuArch = ['turing', 'ampere', 'ada']
|
||||
private supportedPlatform = ['win32', 'linux']
|
||||
|
||||
compatibility() {
|
||||
return COMPATIBILITY as unknown as Compatibility
|
||||
@ -54,7 +56,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
|
||||
}
|
||||
|
||||
override async install(): Promise<void> {
|
||||
const info = await systemInformations()
|
||||
const info = await systemInformation()
|
||||
console.debug(
|
||||
`TensorRTLLMExtension installing pre-requisites... ${JSON.stringify(info)}`
|
||||
)
|
||||
@ -83,12 +85,19 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
|
||||
return
|
||||
}
|
||||
|
||||
const binaryFolderPath = await executeOnMain(
|
||||
this.nodeModule,
|
||||
'binaryFolder'
|
||||
)
|
||||
if (!(await fs.existsSync(binaryFolderPath))) {
|
||||
await fs.mkdirSync(binaryFolderPath)
|
||||
const janDataFolderPath = await getJanDataFolderPath()
|
||||
const engineVersion = TENSORRT_VERSION
|
||||
|
||||
const executableFolderPath = await joinPath([
|
||||
janDataFolderPath,
|
||||
'engines',
|
||||
this.provider,
|
||||
engineVersion,
|
||||
firstGpu.arch,
|
||||
])
|
||||
|
||||
if (!(await fs.existsSync(executableFolderPath))) {
|
||||
await fs.mkdir(executableFolderPath)
|
||||
}
|
||||
|
||||
const placeholderUrl = DOWNLOAD_RUNNER_URL
|
||||
@ -100,7 +109,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
|
||||
|
||||
const tarball = await baseName(url)
|
||||
|
||||
const tarballFullPath = await joinPath([binaryFolderPath, tarball])
|
||||
const tarballFullPath = await joinPath([executableFolderPath, tarball])
|
||||
const downloadRequest: DownloadRequest = {
|
||||
url,
|
||||
localPath: tarballFullPath,
|
||||
@ -109,12 +118,16 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
|
||||
}
|
||||
downloadFile(downloadRequest)
|
||||
|
||||
// TODO: wrap this into a Promise
|
||||
const onFileDownloadSuccess = async (state: DownloadState) => {
|
||||
// if other download, ignore
|
||||
if (state.fileName !== tarball) return
|
||||
events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
|
||||
await executeOnMain(this.nodeModule, 'decompressRunner', tarballFullPath)
|
||||
await executeOnMain(
|
||||
this.nodeModule,
|
||||
'decompressRunner',
|
||||
tarballFullPath,
|
||||
executableFolderPath
|
||||
)
|
||||
events.emit(DownloadEvent.onFileUnzipSuccess, state)
|
||||
|
||||
// Prepopulate models as soon as it's ready
|
||||
@ -144,13 +157,65 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
|
||||
}
|
||||
|
||||
override async installationState(): Promise<InstallationState> {
|
||||
// For now, we just check the executable of nitro x tensor rt
|
||||
const isNitroExecutableAvailable = await executeOnMain(
|
||||
this.nodeModule,
|
||||
'isNitroExecutableAvailable'
|
||||
)
|
||||
const info = await systemInformation()
|
||||
|
||||
return isNitroExecutableAvailable ? 'Installed' : 'NotInstalled'
|
||||
const gpuSetting: GpuSetting | undefined = info.gpuSetting
|
||||
if (gpuSetting === undefined) {
|
||||
console.warn(
|
||||
'No GPU setting found. TensorRT-LLM extension is not installed'
|
||||
)
|
||||
return 'NotInstalled' // TODO: maybe disabled / incompatible is more appropriate
|
||||
}
|
||||
|
||||
if (gpuSetting.gpus.length === 0) {
|
||||
console.warn('No GPU found. TensorRT-LLM extension is not installed')
|
||||
return 'NotInstalled'
|
||||
}
|
||||
|
||||
const firstGpu = gpuSetting.gpus[0]
|
||||
if (!firstGpu.name.toLowerCase().includes('nvidia')) {
|
||||
console.error('No Nvidia GPU found. Please check your GPU setting.')
|
||||
return 'NotInstalled'
|
||||
}
|
||||
|
||||
if (firstGpu.arch === undefined) {
|
||||
console.error('No GPU architecture found. Please check your GPU setting.')
|
||||
return 'NotInstalled'
|
||||
}
|
||||
|
||||
if (!this.supportedGpuArch.includes(firstGpu.arch)) {
|
||||
console.error(
|
||||
`Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
|
||||
)
|
||||
return 'NotInstalled'
|
||||
}
|
||||
|
||||
const osInfo = info.osInfo
|
||||
if (!osInfo) {
|
||||
console.error('No OS information found. Please check your OS setting.')
|
||||
return 'NotInstalled'
|
||||
}
|
||||
|
||||
if (!this.supportedPlatform.includes(osInfo.platform)) {
|
||||
console.error(
|
||||
`Your OS: ${osInfo.platform} is not supported. Only Windows and Linux are supported.`
|
||||
)
|
||||
return 'NotInstalled'
|
||||
}
|
||||
const janDataFolderPath = await getJanDataFolderPath()
|
||||
const engineVersion = TENSORRT_VERSION
|
||||
|
||||
const enginePath = await joinPath([
|
||||
janDataFolderPath,
|
||||
'engines',
|
||||
this.provider,
|
||||
engineVersion,
|
||||
firstGpu.arch,
|
||||
osInfo.platform === 'win32' ? 'nitro.exe' : 'nitro',
|
||||
])
|
||||
|
||||
// For now, we just check the executable of nitro x tensor rt
|
||||
return (await fs.existsSync(enginePath)) ? 'Installed' : 'NotInstalled'
|
||||
}
|
||||
|
||||
override onInferenceStopped() {
|
||||
|
||||
@ -2,13 +2,16 @@ import path from 'path'
|
||||
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
|
||||
import tcpPortUsed from 'tcp-port-used'
|
||||
import fetchRT from 'fetch-retry'
|
||||
import { log } from '@janhq/core/node'
|
||||
import { existsSync } from 'fs'
|
||||
import { log, getJanDataFolderPath } from '@janhq/core/node'
|
||||
import decompress from 'decompress'
|
||||
import { SystemInformation } from '@janhq/core'
|
||||
|
||||
// Polyfill fetch with retry
|
||||
const fetchRetry = fetchRT(fetch)
|
||||
|
||||
const supportedPlatform = (): string[] => ['win32', 'linux']
|
||||
const supportedGpuArch = (): string[] => ['turing', 'ampere', 'ada']
|
||||
|
||||
/**
|
||||
* The response object for model init operation.
|
||||
*/
|
||||
@ -24,7 +27,10 @@ let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
|
||||
* Initializes a engine subprocess to load a machine learning model.
|
||||
* @param params - The model load settings.
|
||||
*/
|
||||
async function loadModel(params: any): Promise<{ error: Error | undefined }> {
|
||||
async function loadModel(
|
||||
params: any,
|
||||
systemInfo?: SystemInformation
|
||||
): Promise<{ error: Error | undefined }> {
|
||||
// modelFolder is the absolute path to the running model folder
|
||||
// e.g. ~/jan/models/llama-2
|
||||
let modelFolder = params.modelFolder
|
||||
@ -33,7 +39,10 @@ async function loadModel(params: any): Promise<{ error: Error | undefined }> {
|
||||
engine_path: modelFolder,
|
||||
ctx_len: params.model.settings.ctx_len ?? 2048,
|
||||
}
|
||||
return runEngineAndLoadModel(settings)
|
||||
if (!systemInfo) {
|
||||
throw new Error('Cannot get system info. Unable to start nitro x tensorrt.')
|
||||
}
|
||||
return runEngineAndLoadModel(settings, systemInfo)
|
||||
}
|
||||
|
||||
/**
|
||||
@ -67,9 +76,12 @@ function unloadModel(): Promise<any> {
|
||||
* 2. Load model into engine subprocess
|
||||
* @returns
|
||||
*/
|
||||
async function runEngineAndLoadModel(settings: ModelLoadParams) {
|
||||
async function runEngineAndLoadModel(
|
||||
settings: ModelLoadParams,
|
||||
systemInfo: SystemInformation
|
||||
) {
|
||||
return unloadModel()
|
||||
.then(runEngine)
|
||||
.then(() => runEngine(systemInfo))
|
||||
.then(() => loadModelRequest(settings))
|
||||
.catch((err) => {
|
||||
// TODO: Broadcast error so app could display proper error message
|
||||
@ -81,7 +93,7 @@ async function runEngineAndLoadModel(settings: ModelLoadParams) {
|
||||
/**
|
||||
* Loads a LLM model into the Engine subprocess by sending a HTTP POST request.
|
||||
*/
|
||||
function loadModelRequest(
|
||||
async function loadModelRequest(
|
||||
settings: ModelLoadParams
|
||||
): Promise<{ error: Error | undefined }> {
|
||||
debugLog(`Loading model with params ${JSON.stringify(settings)}`)
|
||||
@ -107,23 +119,66 @@ function loadModelRequest(
|
||||
/**
|
||||
* Spawns engine subprocess.
|
||||
*/
|
||||
function runEngine(): Promise<any> {
|
||||
async function runEngine(systemInfo: SystemInformation): Promise<void> {
|
||||
debugLog(`Spawning engine subprocess...`)
|
||||
if (systemInfo.gpuSetting == null) {
|
||||
return Promise.reject(
|
||||
'No GPU information found. Please check your GPU setting.'
|
||||
)
|
||||
}
|
||||
|
||||
if (systemInfo.gpuSetting.gpus.length === 0) {
|
||||
return Promise.reject('No GPU found. Please check your GPU setting.')
|
||||
}
|
||||
|
||||
if (systemInfo.osInfo == null) {
|
||||
return Promise.reject(
|
||||
'No OS information found. Please check your OS setting.'
|
||||
)
|
||||
}
|
||||
const platform = systemInfo.osInfo.platform
|
||||
if (platform == null || supportedPlatform().includes(platform) === false) {
|
||||
return Promise.reject(
|
||||
'No OS architecture found. Please check your OS setting.'
|
||||
)
|
||||
}
|
||||
|
||||
const gpu = systemInfo.gpuSetting.gpus[0]
|
||||
if (gpu.name.toLowerCase().includes('nvidia') === false) {
|
||||
return Promise.reject('No Nvidia GPU found. Please check your GPU setting.')
|
||||
}
|
||||
const gpuArch = gpu.arch
|
||||
if (gpuArch == null || supportedGpuArch().includes(gpuArch) === false) {
|
||||
return Promise.reject(
|
||||
`Your GPU: ${gpu.name} is not supported. Only ${supportedGpuArch().join(
|
||||
', '
|
||||
)} series are supported.`
|
||||
)
|
||||
}
|
||||
const janDataFolderPath = await getJanDataFolderPath()
|
||||
const tensorRtVersion = TENSORRT_VERSION
|
||||
const provider = PROVIDER
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
// Current directory by default
|
||||
let binaryFolder = path.join(__dirname, '..', 'bin')
|
||||
// Binary path
|
||||
const binary = path.join(
|
||||
binaryFolder,
|
||||
process.platform === 'win32' ? 'nitro.exe' : 'nitro'
|
||||
|
||||
const executableFolderPath = path.join(
|
||||
janDataFolderPath,
|
||||
'engines',
|
||||
provider,
|
||||
tensorRtVersion,
|
||||
gpuArch
|
||||
)
|
||||
const nitroExecutablePath = path.join(
|
||||
executableFolderPath,
|
||||
platform === 'win32' ? 'nitro.exe' : 'nitro'
|
||||
)
|
||||
|
||||
const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT]
|
||||
// Execute the binary
|
||||
debugLog(`Spawn nitro at path: ${binary}, and args: ${args}`)
|
||||
subprocess = spawn(binary, args, {
|
||||
cwd: binaryFolder,
|
||||
debugLog(`Spawn nitro at path: ${nitroExecutablePath}, and args: ${args}`)
|
||||
subprocess = spawn(nitroExecutablePath, args, {
|
||||
cwd: executableFolderPath,
|
||||
env: {
|
||||
...process.env,
|
||||
},
|
||||
@ -155,12 +210,7 @@ function debugLog(message: string, level: string = 'Debug') {
|
||||
log(`[TENSORRT_LLM_NITRO]::${level}:${message}`)
|
||||
}
|
||||
|
||||
const binaryFolder = async (): Promise<string> => {
|
||||
return path.join(__dirname, '..', 'bin')
|
||||
}
|
||||
|
||||
const decompressRunner = async (zipPath: string) => {
|
||||
const output = path.join(__dirname, '..', 'bin')
|
||||
const decompressRunner = async (zipPath: string, output: string) => {
|
||||
console.debug(`Decompressing ${zipPath} to ${output}...`)
|
||||
try {
|
||||
const files = await decompress(zipPath, output)
|
||||
@ -170,22 +220,11 @@ const decompressRunner = async (zipPath: string) => {
|
||||
}
|
||||
}
|
||||
|
||||
const isNitroExecutableAvailable = async (): Promise<boolean> => {
|
||||
const binary = path.join(
|
||||
__dirname,
|
||||
'..',
|
||||
'bin',
|
||||
process.platform === 'win32' ? 'nitro.exe' : 'nitro'
|
||||
)
|
||||
|
||||
return existsSync(binary)
|
||||
}
|
||||
|
||||
export default {
|
||||
binaryFolder,
|
||||
supportedPlatform,
|
||||
supportedGpuArch,
|
||||
decompressRunner,
|
||||
loadModel,
|
||||
unloadModel,
|
||||
dispose: unloadModel,
|
||||
isNitroExecutableAvailable,
|
||||
}
|
||||
|
||||
@ -5,7 +5,7 @@ import {
|
||||
GpuSetting,
|
||||
InstallationState,
|
||||
abortDownload,
|
||||
systemInformations,
|
||||
systemInformation,
|
||||
} from '@janhq/core'
|
||||
import {
|
||||
Badge,
|
||||
@ -51,7 +51,7 @@ const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
|
||||
|
||||
useEffect(() => {
|
||||
const getSystemInfos = async () => {
|
||||
const info = await systemInformations()
|
||||
const info = await systemInformation()
|
||||
if (!info) {
|
||||
setIsGpuSupported(false)
|
||||
return
|
||||
@ -112,7 +112,7 @@ const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
|
||||
}
|
||||
|
||||
const description = marked.parse(item.description ?? '', { async: false })
|
||||
console.log(description)
|
||||
|
||||
return (
|
||||
<div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none">
|
||||
<div className="flex-1 flex-shrink-0 space-y-1.5">
|
||||
|
||||
@ -1,20 +1,32 @@
|
||||
import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
|
||||
import {
|
||||
ExtensionTypeEnum,
|
||||
MonitoringExtension,
|
||||
SystemInformation,
|
||||
} from '@janhq/core'
|
||||
|
||||
import { toaster } from '@/containers/Toast'
|
||||
|
||||
import { extensionManager } from '@/extension'
|
||||
|
||||
export const appService = {
|
||||
systemInformations: async () => {
|
||||
const gpuSetting = await extensionManager
|
||||
?.get<MonitoringExtension>(ExtensionTypeEnum.SystemMonitoring)
|
||||
?.getGpuSetting()
|
||||
systemInformation: async (): Promise<SystemInformation | undefined> => {
|
||||
const monitorExtension = extensionManager?.get<MonitoringExtension>(
|
||||
ExtensionTypeEnum.SystemMonitoring
|
||||
)
|
||||
if (!monitorExtension) {
|
||||
console.warn('System monitoring extension not found')
|
||||
return undefined
|
||||
}
|
||||
|
||||
const gpuSetting = await monitorExtension.getGpuSetting()
|
||||
const osInfo = await monitorExtension.getOsInfo()
|
||||
|
||||
return {
|
||||
gpuSetting,
|
||||
// TODO: Other system information
|
||||
osInfo,
|
||||
}
|
||||
},
|
||||
|
||||
showToast: (title: string, description: string) => {
|
||||
toaster({
|
||||
title,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user