Louis d85d02693b
feat: Nitro-Tensorrt-LLM Extension (#2280)
* feat: tensorrt-llm-extension

* fix: loading

* feat: add download tensorrt llm runner

Signed-off-by: James <james@jan.ai>

* feat: update to rollupjs instead of webpack for monitoring extension

Signed-off-by: James <james@jan.ai>

* feat: move update nvidia info to monitor extension

Signed-off-by: James <james@jan.ai>

* allow download tensorrt

Signed-off-by: James <james@jan.ai>

* update

Signed-off-by: James <james@jan.ai>

* allow download tensor rt based on gpu setting

Signed-off-by: James <james@jan.ai>

* update downloaded models

Signed-off-by: James <james@jan.ai>

* feat: add extension compatibility

* dynamic tensor rt engines

Signed-off-by: James <james@jan.ai>

* update models

Signed-off-by: James <james@jan.ai>

* chore: remove ts-ignore

* feat: getting installation state from extension

Signed-off-by: James <james@jan.ai>

* chore: adding type for decompress

Signed-off-by: James <james@jan.ai>

* feat: update according Louis's comment

Signed-off-by: James <james@jan.ai>

* feat: add progress for installing extension

Signed-off-by: James <james@jan.ai>

* chore: remove args from extension installation

* fix: model download does not work properly

* fix: do not allow user to stop tensorrtllm inference

* fix: extension installed style

* fix: download tensorrt does not update state

Signed-off-by: James <james@jan.ai>

* chore: replace int4 by fl16

* feat: modal for installing extension

Signed-off-by: James <james@jan.ai>

* fix: start download immediately after press install

Signed-off-by: James <james@jan.ai>

* fix: error switching between engines

* feat: rename inference provider to ai engine and refactor to core

* fix: missing ulid

* fix: core bundler

* feat: add cancel extension installing

Signed-off-by: James <james@jan.ai>

* remove mocking for mac

Signed-off-by: James <james@jan.ai>

* fix: show models only when extension is ready

* add tensorrt badge for model

Signed-off-by: James <james@jan.ai>

* fix: copy

* fix: add compatible check (#2342)

* fix: add compatible check

Signed-off-by: James <james@jan.ai>

* fix: copy

* fix: font

* fix: copy

* fix: broken monitoring extension

* chore: bump engine

* fix: copy

* fix: model copy

* fix: copy

* fix: model json

---------

Signed-off-by: James <james@jan.ai>
Co-authored-by: James <james@jan.ai>
Co-authored-by: Louis <louis@jan.ai>

* fix: vulkan support

* fix: installation button padding

* fix: empty script

* fix: remove hard code string

---------

Signed-off-by: James <james@jan.ai>
Co-authored-by: James <james@jan.ai>
Co-authored-by: NamH <NamNh0122@gmail.com>
2024-03-14 14:07:22 +07:00

87 lines
2.7 KiB
TypeScript

import { getJanDataFolderPath } from '@janhq/core/node'
import { readFileSync } from 'fs'
import * as path from 'path'
export interface NitroExecutableOptions {
executablePath: string
cudaVisibleDevices: string
vkVisibleDevices: string
}
export const GPU_INFO_FILE = path.join(
getJanDataFolderPath(),
'settings',
'settings.json'
)
/**
* Find which executable file to run based on the current platform.
* @returns The name of the executable file to run.
*/
export const executableNitroFile = (): NitroExecutableOptions => {
let binaryFolder = path.join(__dirname, '..', 'bin') // Current directory by default
let cudaVisibleDevices = ''
let vkVisibleDevices = ''
let binaryName = 'nitro'
/**
* The binary folder is different for each platform.
*/
if (process.platform === 'win32') {
/**
* For Windows: win-cpu, win-vulkan, win-cuda-11-7, win-cuda-12-0
*/
let gpuInfo = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
if (gpuInfo['run_mode'] === 'cpu') {
binaryFolder = path.join(binaryFolder, 'win-cpu')
} else {
if (gpuInfo['cuda']?.version === '11') {
binaryFolder = path.join(binaryFolder, 'win-cuda-11-7')
} else {
binaryFolder = path.join(binaryFolder, 'win-cuda-12-0')
}
cudaVisibleDevices = gpuInfo['gpus_in_use'].join(',')
}
if (gpuInfo['vulkan'] === true) {
binaryFolder = path.join(__dirname, '..', 'bin')
binaryFolder = path.join(binaryFolder, 'win-vulkan')
vkVisibleDevices = gpuInfo['gpus_in_use'].toString()
}
binaryName = 'nitro.exe'
} else if (process.platform === 'darwin') {
/**
* For MacOS: mac-arm64 (Silicon), mac-x64 (InteL)
*/
if (process.arch === 'arm64') {
binaryFolder = path.join(binaryFolder, 'mac-arm64')
} else {
binaryFolder = path.join(binaryFolder, 'mac-x64')
}
} else {
/**
* For Linux: linux-cpu, linux-vulkan, linux-cuda-11-7, linux-cuda-12-0
*/
let gpuInfo = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
if (gpuInfo['run_mode'] === 'cpu') {
binaryFolder = path.join(binaryFolder, 'linux-cpu')
} else {
if (gpuInfo['cuda']?.version === '11') {
binaryFolder = path.join(binaryFolder, 'linux-cuda-11-7')
} else {
binaryFolder = path.join(binaryFolder, 'linux-cuda-12-0')
}
cudaVisibleDevices = gpuInfo['gpus_in_use'].join(',')
}
if (gpuInfo['vulkan'] === true) {
binaryFolder = path.join(__dirname, '..', 'bin')
binaryFolder = path.join(binaryFolder, 'linux-vulkan')
vkVisibleDevices = gpuInfo['gpus_in_use'].toString()
}
}
return {
executablePath: path.join(binaryFolder, binaryName),
cudaVisibleDevices,
vkVisibleDevices,
}
}