chore: deprecate tensorrt-llm extension (#4453)
This commit is contained in:
parent
58bb1b4939
commit
06ee10be1b
@ -1,79 +0,0 @@
|
||||
# Tensorrt-LLM Extension
|
||||
|
||||
Created using Jan extension example
|
||||
|
||||
# Create a Jan Extension using Typescript
|
||||
|
||||
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
|
||||
|
||||
## Create Your Own Extension
|
||||
|
||||
To create your own extension, you can use this repository as a template! Just follow the below instructions:
|
||||
|
||||
1. Click the Use this template button at the top of the repository
|
||||
2. Select Create a new repository
|
||||
3. Select an owner and name for your new repository
|
||||
4. Click Create repository
|
||||
5. Clone your new repository
|
||||
|
||||
## Initial Setup
|
||||
|
||||
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
|
||||
|
||||
> [!NOTE]
|
||||
>
|
||||
> You'll need to have a reasonably modern version of
|
||||
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
|
||||
> [`nodenv`](https://github.com/nodenv/nodenv) or
|
||||
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
|
||||
> root of your repository to install the version specified in
|
||||
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
|
||||
|
||||
1. :hammer_and_wrench: Install the dependencies
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
1. :building_construction: Package the TypeScript for distribution
|
||||
|
||||
```bash
|
||||
npm run bundle
|
||||
```
|
||||
|
||||
1. :white_check_mark: Check your artifact
|
||||
|
||||
There will be a tgz file in your extension directory now
|
||||
|
||||
## Update the Extension Metadata
|
||||
|
||||
The [`package.json`](package.json) file defines metadata about your extension, such as
|
||||
extension name, main entry, description and version.
|
||||
|
||||
When you copy this repository, update `package.json` with the name, description for your extension.
|
||||
|
||||
## Update the Extension Code
|
||||
|
||||
The [`src/`](./src/) directory is the heart of your extension! This contains the
|
||||
source code that will be run when your extension functions are invoked. You can replace the
|
||||
contents of this directory with your own code.
|
||||
|
||||
There are a few things to keep in mind when writing your extension code:
|
||||
|
||||
- Most Jan Extension functions are processed asynchronously.
|
||||
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
|
||||
|
||||
```typescript
|
||||
import { events, MessageEvent, MessageRequest } from '@janhq/core'
|
||||
|
||||
function onStart(): Promise<any> {
|
||||
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
|
||||
this.inference(data)
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
For more information about the Jan Extension Core module, see the
|
||||
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
|
||||
|
||||
So, what are you waiting for? Go ahead and start customizing your extension!
|
||||
@ -1,9 +0,0 @@
|
||||
/** @type {import('ts-jest').JestConfigWithTsJest} */
|
||||
module.exports = {
|
||||
preset: 'ts-jest',
|
||||
testEnvironment: 'node',
|
||||
transform: {
|
||||
'node_modules/@janhq/core/.+\\.(j|t)s?$': 'ts-jest',
|
||||
},
|
||||
transformIgnorePatterns: ['node_modules/(?!@janhq/core/.*)'],
|
||||
}
|
||||
@ -1,78 +0,0 @@
|
||||
{
|
||||
"name": "@janhq/tensorrt-llm-extension",
|
||||
"productName": "TensorRT-LLM Inference Engine",
|
||||
"version": "0.0.3",
|
||||
"description": "This extension enables Nvidia's TensorRT-LLM for the fastest GPU acceleration. See the [setup guide](https://jan.ai/guides/providers/tensorrt-llm/) for next steps.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
"license": "AGPL-3.0",
|
||||
"config": {
|
||||
"host": "127.0.0.1",
|
||||
"port": "3929"
|
||||
},
|
||||
"compatibility": {
|
||||
"platform": [
|
||||
"win32"
|
||||
],
|
||||
"app": [
|
||||
"0.1.0"
|
||||
]
|
||||
},
|
||||
"tensorrtVersion": "0.1.8",
|
||||
"provider": "nitro-tensorrt-llm",
|
||||
"scripts": {
|
||||
"test": "jest",
|
||||
"build": "rolldown -c rolldown.config.mjs",
|
||||
"build:publish": "rimraf *.tgz --glob || true && yarn build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install"
|
||||
},
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
"./main": "./dist/node/index.cjs.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/decompress": "4.2.7",
|
||||
"@types/jest": "^29.5.12",
|
||||
"@types/node": "^20.11.4",
|
||||
"@types/os-utils": "^0.0.4",
|
||||
"@types/tcp-port-used": "^1.0.4",
|
||||
"cpx": "^1.5.0",
|
||||
"download-cli": "^1.1.1",
|
||||
"jest": "^29.7.0",
|
||||
"jest-junit": "^16.0.0",
|
||||
"jest-runner": "^29.7.0",
|
||||
"rimraf": "^3.0.2",
|
||||
"rolldown": "1.0.0-beta.1",
|
||||
"run-script-os": "^1.1.6",
|
||||
"ts-jest": "^29.2.5",
|
||||
"typescript": "^5.2.2"
|
||||
},
|
||||
"dependencies": {
|
||||
"@janhq/core": "../../core/package.tgz",
|
||||
"decompress": "^4.2.1",
|
||||
"fetch-retry": "^5.0.6",
|
||||
"rxjs": "^7.8.1",
|
||||
"tcp-port-used": "^1.0.2",
|
||||
"terminate": "^2.6.1",
|
||||
"ulidx": "^2.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"files": [
|
||||
"dist/*",
|
||||
"package.json",
|
||||
"README.md"
|
||||
],
|
||||
"bundleDependencies": [
|
||||
"tcp-port-used",
|
||||
"fetch-retry",
|
||||
"decompress",
|
||||
"@janhq/core",
|
||||
"terminate"
|
||||
],
|
||||
"installConfig": {
|
||||
"hoistingLimits": "workspaces"
|
||||
},
|
||||
"packageManager": "yarn@4.5.3"
|
||||
}
|
||||
@ -1,156 +0,0 @@
|
||||
[
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "config.json",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/config.json"
|
||||
},
|
||||
{
|
||||
"filename": "mistral_float16_tp1_rank0.engine",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine"
|
||||
},
|
||||
{
|
||||
"filename": "tokenizer.model",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
|
||||
},
|
||||
{
|
||||
"filename": "special_tokens_map.json",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
|
||||
},
|
||||
{
|
||||
"filename": "tokenizer.json",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
|
||||
},
|
||||
{
|
||||
"filename": "tokenizer_config.json",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
|
||||
},
|
||||
{
|
||||
"filename": "model.cache",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/model.cache"
|
||||
}
|
||||
],
|
||||
"id": "llamacorn-1.1b-chat-fp16",
|
||||
"object": "model",
|
||||
"name": "LlamaCorn 1.1B Chat FP16",
|
||||
"version": "1.0",
|
||||
"description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM",
|
||||
"format": "TensorRT-LLM",
|
||||
"settings": {
|
||||
"ctx_len": 2048,
|
||||
"text_model": false
|
||||
},
|
||||
"parameters": {
|
||||
"max_tokens": 4096
|
||||
},
|
||||
"metadata": {
|
||||
"author": "LLama",
|
||||
"tags": ["TensorRT-LLM", "1B", "Finetuned"],
|
||||
"size": 2151000000
|
||||
},
|
||||
"engine": "nitro-tensorrt-llm"
|
||||
},
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "config.json",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/config.json"
|
||||
},
|
||||
{
|
||||
"filename": "mistral_float16_tp1_rank0.engine",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine"
|
||||
},
|
||||
{
|
||||
"filename": "tokenizer.model",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
|
||||
},
|
||||
{
|
||||
"filename": "special_tokens_map.json",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
|
||||
},
|
||||
{
|
||||
"filename": "tokenizer.json",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
|
||||
},
|
||||
{
|
||||
"filename": "tokenizer_config.json",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
|
||||
},
|
||||
{
|
||||
"filename": "model.cache",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/model.cache"
|
||||
}
|
||||
],
|
||||
"id": "tinyjensen-1.1b-chat-fp16",
|
||||
"object": "model",
|
||||
"name": "TinyJensen 1.1B Chat FP16",
|
||||
"version": "1.0",
|
||||
"description": "Do you want to chat with Jensen Huan? Here you are",
|
||||
"format": "TensorRT-LLM",
|
||||
"settings": {
|
||||
"ctx_len": 2048,
|
||||
"text_model": false
|
||||
},
|
||||
"parameters": {
|
||||
"max_tokens": 4096
|
||||
},
|
||||
"metadata": {
|
||||
"author": "LLama",
|
||||
"tags": ["TensorRT-LLM", "1B", "Finetuned"],
|
||||
"size": 2151000000
|
||||
},
|
||||
"engine": "nitro-tensorrt-llm"
|
||||
},
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "config.json",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/config.json"
|
||||
},
|
||||
{
|
||||
"filename": "mistral_float16_tp1_rank0.engine",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/mistral_float16_tp1_rank0.engine"
|
||||
},
|
||||
{
|
||||
"filename": "tokenizer.model",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.model"
|
||||
},
|
||||
{
|
||||
"filename": "special_tokens_map.json",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/special_tokens_map.json"
|
||||
},
|
||||
{
|
||||
"filename": "tokenizer.json",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.json"
|
||||
},
|
||||
{
|
||||
"filename": "tokenizer_config.json",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer_config.json"
|
||||
},
|
||||
{
|
||||
"filename": "model.cache",
|
||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/model.cache"
|
||||
}
|
||||
],
|
||||
"id": "mistral-7b-instruct-int4",
|
||||
"object": "model",
|
||||
"name": "Mistral 7B Instruct v0.1 INT4",
|
||||
"version": "1.0",
|
||||
"description": "Mistral 7B Instruct v0.1 INT4",
|
||||
"format": "TensorRT-LLM",
|
||||
"settings": {
|
||||
"ctx_len": 2048,
|
||||
"text_model": false,
|
||||
"prompt_template": "[INST] {prompt} [/INST]"
|
||||
},
|
||||
"parameters": {
|
||||
"max_tokens": 4096
|
||||
},
|
||||
"metadata": {
|
||||
"author": "MistralAI",
|
||||
"tags": ["TensorRT-LLM", "7B", "Finetuned"],
|
||||
"size": 3840000000
|
||||
},
|
||||
"engine": "nitro-tensorrt-llm"
|
||||
}
|
||||
]
|
||||
@ -1,59 +0,0 @@
|
||||
import { defineConfig } from 'rolldown'
|
||||
import packageJson from './package.json' with { type: 'json' }
|
||||
import modelsJson from './resources/models.json' with { type: 'json' }
|
||||
|
||||
export default defineConfig([
|
||||
{
|
||||
input: 'src/index.ts',
|
||||
output: {
|
||||
format: 'esm',
|
||||
file: 'dist/index.js',
|
||||
},
|
||||
platform: 'browser',
|
||||
define: {
|
||||
MODELS: JSON.stringify(modelsJson),
|
||||
TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
|
||||
PROVIDER: JSON.stringify(packageJson.provider),
|
||||
DOWNLOAD_RUNNER_URL:
|
||||
process.platform === 'win32'
|
||||
? JSON.stringify(
|
||||
'https://github.com/janhq/cortex.tensorrt-llm/releases/download/windows-v<version>-tensorrt-llm-v0.7.1/nitro-windows-v<version>-tensorrt-llm-v0.7.1-amd64-all-arch.tar.gz'
|
||||
)
|
||||
: JSON.stringify(
|
||||
'https://github.com/janhq/cortex.tensorrt-llm/releases/download/linux-v<version>/nitro-linux-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
|
||||
),
|
||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||
INFERENCE_URL: JSON.stringify(
|
||||
process.env.INFERENCE_URL ||
|
||||
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/v1/chat/completions`
|
||||
),
|
||||
COMPATIBILITY: JSON.stringify(packageJson.compatibility),
|
||||
},
|
||||
},
|
||||
{
|
||||
input: 'src/node/index.ts',
|
||||
external: ['@janhq/core/node'],
|
||||
output: {
|
||||
format: 'cjs',
|
||||
file: 'dist/node/index.cjs.js',
|
||||
sourcemap: false,
|
||||
inlineDynamicImports: true,
|
||||
},
|
||||
replace: {
|
||||
TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
|
||||
PROVIDER: JSON.stringify(packageJson.provider),
|
||||
LOAD_MODEL_URL: JSON.stringify(
|
||||
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel`
|
||||
),
|
||||
TERMINATE_ENGINE_URL: JSON.stringify(
|
||||
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/processmanager/destroy`
|
||||
),
|
||||
ENGINE_HOST: JSON.stringify(packageJson.config?.host ?? '127.0.0.1'),
|
||||
ENGINE_PORT: JSON.stringify(packageJson.config?.port ?? '3928'),
|
||||
},
|
||||
resolve: {
|
||||
extensions: ['.js', '.ts', '.json'],
|
||||
},
|
||||
platform: 'node',
|
||||
},
|
||||
])
|
||||
@ -1,11 +0,0 @@
|
||||
declare const NODE: string
|
||||
declare const INFERENCE_URL: string
|
||||
declare const LOAD_MODEL_URL: string
|
||||
declare const TERMINATE_ENGINE_URL: string
|
||||
declare const ENGINE_HOST: string
|
||||
declare const ENGINE_PORT: string
|
||||
declare const DOWNLOAD_RUNNER_URL: string
|
||||
declare const TENSORRT_VERSION: string
|
||||
declare const COMPATIBILITY: object
|
||||
declare const PROVIDER: string
|
||||
declare const MODELS: Array<any>
|
||||
@ -1,186 +0,0 @@
|
||||
import TensorRTLLMExtension from '../src/index'
|
||||
import {
|
||||
executeOnMain,
|
||||
systemInformation,
|
||||
fs,
|
||||
baseName,
|
||||
joinPath,
|
||||
downloadFile,
|
||||
} from '@janhq/core'
|
||||
|
||||
jest.mock('@janhq/core', () => ({
|
||||
...jest.requireActual('@janhq/core/node'),
|
||||
LocalOAIEngine: jest.fn().mockImplementation(function () {
|
||||
// @ts-ignore
|
||||
this.registerModels = () => {
|
||||
return Promise.resolve()
|
||||
}
|
||||
// @ts-ignore
|
||||
return this
|
||||
}),
|
||||
systemInformation: jest.fn(),
|
||||
fs: {
|
||||
existsSync: jest.fn(),
|
||||
mkdir: jest.fn(),
|
||||
},
|
||||
joinPath: jest.fn(),
|
||||
baseName: jest.fn(),
|
||||
downloadFile: jest.fn(),
|
||||
executeOnMain: jest.fn(),
|
||||
showToast: jest.fn(),
|
||||
events: {
|
||||
emit: jest.fn(),
|
||||
// @ts-ignore
|
||||
on: (event, func) => {
|
||||
func({ fileName: './' })
|
||||
},
|
||||
off: jest.fn(),
|
||||
},
|
||||
}))
|
||||
|
||||
// @ts-ignore
|
||||
global.COMPATIBILITY = {
|
||||
platform: ['win32'],
|
||||
}
|
||||
// @ts-ignore
|
||||
global.PROVIDER = 'tensorrt-llm'
|
||||
// @ts-ignore
|
||||
global.INFERENCE_URL = 'http://localhost:5000'
|
||||
// @ts-ignore
|
||||
global.NODE = 'node'
|
||||
// @ts-ignore
|
||||
global.MODELS = []
|
||||
// @ts-ignore
|
||||
global.TENSORRT_VERSION = ''
|
||||
// @ts-ignore
|
||||
global.DOWNLOAD_RUNNER_URL = ''
|
||||
|
||||
describe('TensorRTLLMExtension', () => {
|
||||
let extension: TensorRTLLMExtension
|
||||
|
||||
beforeEach(() => {
|
||||
// @ts-ignore
|
||||
extension = new TensorRTLLMExtension()
|
||||
jest.clearAllMocks()
|
||||
})
|
||||
|
||||
describe('compatibility', () => {
|
||||
it('should return the correct compatibility', () => {
|
||||
const result = extension.compatibility()
|
||||
expect(result).toEqual({
|
||||
platform: ['win32'],
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('install', () => {
|
||||
it('should install if compatible', async () => {
|
||||
const mockSystemInfo: any = {
|
||||
osInfo: { platform: 'win32' },
|
||||
gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] },
|
||||
}
|
||||
;(executeOnMain as jest.Mock).mockResolvedValue({})
|
||||
;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
|
||||
;(fs.existsSync as jest.Mock).mockResolvedValue(false)
|
||||
;(fs.mkdir as jest.Mock).mockResolvedValue(undefined)
|
||||
;(baseName as jest.Mock).mockResolvedValue('./')
|
||||
;(joinPath as jest.Mock).mockResolvedValue('./')
|
||||
;(downloadFile as jest.Mock).mockResolvedValue({})
|
||||
|
||||
await extension.install()
|
||||
|
||||
expect(executeOnMain).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should not install if not compatible', async () => {
|
||||
const mockSystemInfo: any = {
|
||||
osInfo: { platform: 'linux' },
|
||||
gpuSetting: { gpus: [{ arch: 'pascal', name: 'NVIDIA GPU' }] },
|
||||
}
|
||||
;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
|
||||
|
||||
jest.spyOn(extension, 'registerModels').mockReturnValue(Promise.resolve())
|
||||
await extension.install()
|
||||
|
||||
expect(executeOnMain).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
describe('installationState', () => {
|
||||
it('should return NotCompatible if not compatible', async () => {
|
||||
const mockSystemInfo: any = {
|
||||
osInfo: { platform: 'linux' },
|
||||
gpuSetting: { gpus: [{ arch: 'pascal', name: 'NVIDIA GPU' }] },
|
||||
}
|
||||
;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
|
||||
|
||||
const result = await extension.installationState()
|
||||
|
||||
expect(result).toBe('NotCompatible')
|
||||
})
|
||||
|
||||
it('should return Installed if executable exists', async () => {
|
||||
const mockSystemInfo: any = {
|
||||
osInfo: { platform: 'win32' },
|
||||
gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] },
|
||||
}
|
||||
;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
|
||||
;(fs.existsSync as jest.Mock).mockResolvedValue(true)
|
||||
|
||||
const result = await extension.installationState()
|
||||
|
||||
expect(result).toBe('Installed')
|
||||
})
|
||||
|
||||
it('should return NotInstalled if executable does not exist', async () => {
|
||||
const mockSystemInfo: any = {
|
||||
osInfo: { platform: 'win32' },
|
||||
gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] },
|
||||
}
|
||||
;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
|
||||
;(fs.existsSync as jest.Mock).mockResolvedValue(false)
|
||||
|
||||
const result = await extension.installationState()
|
||||
|
||||
expect(result).toBe('NotInstalled')
|
||||
})
|
||||
})
|
||||
|
||||
describe('isCompatible', () => {
|
||||
it('should return true for compatible system', () => {
|
||||
const mockInfo: any = {
|
||||
osInfo: { platform: 'win32' },
|
||||
gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] },
|
||||
}
|
||||
|
||||
const result = extension.isCompatible(mockInfo)
|
||||
|
||||
expect(result).toBe(true)
|
||||
})
|
||||
|
||||
it('should return false for incompatible system', () => {
|
||||
const mockInfo: any = {
|
||||
osInfo: { platform: 'linux' },
|
||||
gpuSetting: { gpus: [{ arch: 'pascal', name: 'AMD GPU' }] },
|
||||
}
|
||||
|
||||
const result = extension.isCompatible(mockInfo)
|
||||
|
||||
expect(result).toBe(false)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('GitHub Release File URL Test', () => {
|
||||
const url = 'https://github.com/janhq/cortex.tensorrt-llm/releases/download/windows-v0.1.8-tensorrt-llm-v0.7.1/nitro-windows-v0.1.8-tensorrt-llm-v0.7.1-amd64-all-arch.tar.gz';
|
||||
|
||||
it('should return a status code 200 for the release file URL', async () => {
|
||||
const response = await fetch(url, { method: 'HEAD' });
|
||||
expect(response.status).toBe(200);
|
||||
});
|
||||
|
||||
it('should not return a 404 status', async () => {
|
||||
const response = await fetch(url, { method: 'HEAD' });
|
||||
expect(response.status).not.toBe(404);
|
||||
});
|
||||
});
|
||||
@ -1,197 +0,0 @@
|
||||
/**
|
||||
* @module tensorrt-llm-extension/src/index
|
||||
*/
|
||||
|
||||
import {
|
||||
Compatibility,
|
||||
DownloadEvent,
|
||||
DownloadRequest,
|
||||
DownloadState,
|
||||
InstallationState,
|
||||
baseName,
|
||||
downloadFile,
|
||||
events,
|
||||
executeOnMain,
|
||||
joinPath,
|
||||
showToast,
|
||||
systemInformation,
|
||||
LocalOAIEngine,
|
||||
fs,
|
||||
MessageRequest,
|
||||
ModelEvent,
|
||||
getJanDataFolderPath,
|
||||
SystemInformation,
|
||||
Model,
|
||||
} from '@janhq/core'
|
||||
|
||||
/**
|
||||
* TensorRTLLMExtension - Implementation of LocalOAIEngine
|
||||
* @extends BaseOAILocalInferenceProvider
|
||||
* Provide pre-populated models for TensorRTLLM
|
||||
*/
|
||||
export default class TensorRTLLMExtension extends LocalOAIEngine {
|
||||
/**
|
||||
* Override custom function name for loading and unloading model
|
||||
* Which are implemented from node module
|
||||
*/
|
||||
override provider = PROVIDER
|
||||
override inferenceUrl = INFERENCE_URL
|
||||
override nodeModule = NODE
|
||||
|
||||
private supportedGpuArch = ['ampere', 'ada']
|
||||
|
||||
override compatibility() {
|
||||
return COMPATIBILITY as unknown as Compatibility
|
||||
}
|
||||
|
||||
override async onLoad(): Promise<void> {
|
||||
super.onLoad()
|
||||
|
||||
if ((await this.installationState()) === 'Installed') {
|
||||
const models = MODELS as unknown as Model[]
|
||||
this.registerModels(models)
|
||||
}
|
||||
}
|
||||
|
||||
override async install(): Promise<void> {
|
||||
await this.removePopulatedModels()
|
||||
|
||||
const info = await systemInformation()
|
||||
|
||||
if (!this.isCompatible(info)) return
|
||||
|
||||
const janDataFolderPath = await getJanDataFolderPath()
|
||||
const engineVersion = TENSORRT_VERSION
|
||||
|
||||
const executableFolderPath = await joinPath([
|
||||
janDataFolderPath,
|
||||
'engines',
|
||||
this.provider,
|
||||
engineVersion,
|
||||
info.gpuSetting?.gpus[0].arch,
|
||||
])
|
||||
|
||||
if (!(await fs.existsSync(executableFolderPath))) {
|
||||
await fs.mkdir(executableFolderPath)
|
||||
}
|
||||
|
||||
const placeholderUrl = DOWNLOAD_RUNNER_URL
|
||||
const tensorrtVersion = TENSORRT_VERSION
|
||||
|
||||
const url = placeholderUrl
|
||||
.replace(/<version>/g, tensorrtVersion)
|
||||
.replace(/<gpuarch>/g, info.gpuSetting!.gpus[0]!.arch!)
|
||||
|
||||
const tarball = await baseName(url)
|
||||
|
||||
const tarballFullPath = await joinPath([executableFolderPath, tarball])
|
||||
const downloadRequest: DownloadRequest = {
|
||||
url,
|
||||
localPath: tarballFullPath,
|
||||
extensionId: this.name,
|
||||
downloadType: 'extension',
|
||||
}
|
||||
downloadFile(downloadRequest)
|
||||
|
||||
const onFileDownloadSuccess = async (state: DownloadState) => {
|
||||
// if other download, ignore
|
||||
if (state.fileName !== tarball) return
|
||||
events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
|
||||
await executeOnMain(
|
||||
this.nodeModule,
|
||||
'decompressRunner',
|
||||
tarballFullPath,
|
||||
executableFolderPath
|
||||
)
|
||||
events.emit(DownloadEvent.onFileUnzipSuccess, state)
|
||||
|
||||
// Prepopulate models as soon as it's ready
|
||||
const models = MODELS as unknown as Model[]
|
||||
this.registerModels(models).then(() => {
|
||||
showToast(
|
||||
'Extension installed successfully.',
|
||||
'New models are added to Model Hub.'
|
||||
)
|
||||
})
|
||||
}
|
||||
events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
|
||||
}
|
||||
|
||||
private async removePopulatedModels(): Promise<void> {
|
||||
const models = MODELS as unknown as Model[]
|
||||
console.debug(`removePopulatedModels`, JSON.stringify(models))
|
||||
const janDataFolderPath = await getJanDataFolderPath()
|
||||
const modelFolderPath = await joinPath([janDataFolderPath, 'models'])
|
||||
|
||||
for (const model of models) {
|
||||
const modelPath = await joinPath([modelFolderPath, model.id])
|
||||
|
||||
try {
|
||||
await fs.rm(modelPath)
|
||||
} catch (err) {
|
||||
console.error(`Error removing model ${modelPath}`, err)
|
||||
}
|
||||
}
|
||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||
}
|
||||
|
||||
override async loadModel(model: Model): Promise<void> {
|
||||
if ((await this.installationState()) === 'Installed')
|
||||
return super.loadModel(model)
|
||||
|
||||
throw new Error('EXTENSION_IS_NOT_INSTALLED::TensorRT-LLM extension')
|
||||
}
|
||||
|
||||
override async installationState(): Promise<InstallationState> {
|
||||
const info = await systemInformation()
|
||||
|
||||
if (!this.isCompatible(info)) return 'NotCompatible'
|
||||
const firstGpu = info.gpuSetting?.gpus[0]
|
||||
const janDataFolderPath = await getJanDataFolderPath()
|
||||
const engineVersion = TENSORRT_VERSION
|
||||
|
||||
const enginePath = await joinPath([
|
||||
janDataFolderPath,
|
||||
'engines',
|
||||
this.provider,
|
||||
engineVersion,
|
||||
firstGpu.arch,
|
||||
info.osInfo.platform === 'win32' ? 'nitro.exe' : 'nitro',
|
||||
])
|
||||
|
||||
// For now, we just check the executable of nitro x tensor rt
|
||||
return (await fs.existsSync(enginePath)) ? 'Installed' : 'NotInstalled'
|
||||
}
|
||||
|
||||
override stopInference() {
|
||||
if (!this.loadedModel) return
|
||||
showToast(
|
||||
'Unable to Stop Inference',
|
||||
'The model does not support stopping inference.'
|
||||
)
|
||||
return Promise.resolve()
|
||||
}
|
||||
|
||||
override async inference(data: MessageRequest) {
|
||||
if (!this.loadedModel) return
|
||||
// TensorRT LLM Extension supports streaming only
|
||||
if (data.model && data.model.parameters) data.model.parameters.stream = true
|
||||
super.inference(data)
|
||||
}
|
||||
|
||||
isCompatible(info: SystemInformation): info is Required<SystemInformation> & {
|
||||
gpuSetting: { gpus: { arch: string }[] }
|
||||
} {
|
||||
const firstGpu = info.gpuSetting?.gpus[0]
|
||||
return (
|
||||
!!info.osInfo &&
|
||||
!!info.gpuSetting &&
|
||||
!!firstGpu &&
|
||||
info.gpuSetting.gpus.length > 0 &&
|
||||
this.compatibility().platform.includes(info.osInfo.platform) &&
|
||||
!!firstGpu.arch &&
|
||||
firstGpu.name.toLowerCase().includes('nvidia') &&
|
||||
this.supportedGpuArch.includes(firstGpu.arch)
|
||||
)
|
||||
}
|
||||
}
|
||||
@ -1,325 +0,0 @@
|
||||
import path from 'path'
|
||||
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
|
||||
import tcpPortUsed from 'tcp-port-used'
|
||||
import fetchRT from 'fetch-retry'
|
||||
import {
|
||||
log,
|
||||
getJanDataFolderPath,
|
||||
SystemInformation,
|
||||
PromptTemplate,
|
||||
} from '@janhq/core/node'
|
||||
import decompress from 'decompress'
|
||||
import terminate from 'terminate'
|
||||
|
||||
// Polyfill fetch with retry
|
||||
const fetchRetry = fetchRT(fetch)
|
||||
|
||||
const supportedPlatform = (): string[] => ['win32', 'linux']
|
||||
const supportedGpuArch = (): string[] => ['ampere', 'ada']
|
||||
const PORT_CHECK_INTERVAL = 100
|
||||
|
||||
/**
|
||||
* The response object for model init operation.
|
||||
*/
|
||||
interface ModelLoadParams {
|
||||
engine_path: string
|
||||
ctx_len: number
|
||||
}
|
||||
|
||||
// The subprocess instance for Engine
|
||||
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
|
||||
|
||||
/**
|
||||
* Initializes a engine subprocess to load a machine learning model.
|
||||
* @param params - The model load settings.
|
||||
*/
|
||||
async function loadModel(
|
||||
params: any,
|
||||
systemInfo?: SystemInformation
|
||||
): Promise<{ error: Error | undefined }> {
|
||||
// modelFolder is the absolute path to the running model folder
|
||||
// e.g. ~/jan/models/llama-2
|
||||
let modelFolder = params.modelFolder
|
||||
|
||||
if (params.model.settings?.prompt_template) {
|
||||
const promptTemplate = params.model.settings.prompt_template
|
||||
const prompt = promptTemplateConverter(promptTemplate)
|
||||
if (prompt?.error) {
|
||||
return Promise.reject(prompt.error)
|
||||
}
|
||||
params.model.settings.system_prompt = prompt.system_prompt
|
||||
params.model.settings.user_prompt = prompt.user_prompt
|
||||
params.model.settings.ai_prompt = prompt.ai_prompt
|
||||
}
|
||||
|
||||
const settings: ModelLoadParams = {
|
||||
engine_path: modelFolder,
|
||||
ctx_len: params.model.settings.ctx_len ?? 2048,
|
||||
...params.model.settings,
|
||||
}
|
||||
if (!systemInfo) {
|
||||
throw new Error('Cannot get system info. Unable to start nitro x tensorrt.')
|
||||
}
|
||||
return runEngineAndLoadModel(settings, systemInfo)
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops a Engine subprocess.
|
||||
*/
|
||||
function unloadModel(): Promise<void> {
|
||||
const controller = new AbortController()
|
||||
setTimeout(() => controller.abort(), 5000)
|
||||
debugLog(`Request to kill engine`)
|
||||
|
||||
const killRequest = () => {
|
||||
return fetch(TERMINATE_ENGINE_URL, {
|
||||
method: 'DELETE',
|
||||
signal: controller.signal,
|
||||
})
|
||||
.then(() => {
|
||||
subprocess = undefined
|
||||
})
|
||||
.catch(() => {}) // Do nothing with this attempt
|
||||
.then(() =>
|
||||
tcpPortUsed.waitUntilFree(
|
||||
parseInt(ENGINE_PORT),
|
||||
PORT_CHECK_INTERVAL,
|
||||
5000
|
||||
)
|
||||
) // Wait for port available
|
||||
.then(() => debugLog(`Engine process is terminated`))
|
||||
.catch((err) => {
|
||||
debugLog(
|
||||
`Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
|
||||
)
|
||||
throw 'PORT_NOT_AVAILABLE'
|
||||
})
|
||||
}
|
||||
|
||||
if (subprocess?.pid) {
|
||||
log(`[CORTEX]:: Killing PID ${subprocess.pid}`)
|
||||
const pid = subprocess.pid
|
||||
return new Promise((resolve, reject) => {
|
||||
terminate(pid, function (err) {
|
||||
if (err) {
|
||||
return killRequest()
|
||||
} else {
|
||||
return tcpPortUsed
|
||||
.waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
|
||||
.then(() => resolve())
|
||||
.then(() => log(`[CORTEX]:: cortex process is terminated`))
|
||||
.catch(() => {
|
||||
killRequest()
|
||||
})
|
||||
}
|
||||
})
|
||||
})
|
||||
} else {
|
||||
return killRequest()
|
||||
}
|
||||
}
|
||||
/**
|
||||
* 1. Spawn engine process
|
||||
* 2. Load model into engine subprocess
|
||||
* @returns
|
||||
*/
|
||||
async function runEngineAndLoadModel(
|
||||
settings: ModelLoadParams,
|
||||
systemInfo: SystemInformation
|
||||
) {
|
||||
return unloadModel()
|
||||
.then(() => runEngine(systemInfo))
|
||||
.then(() => loadModelRequest(settings))
|
||||
.catch((err) => {
|
||||
// TODO: Broadcast error so app could display proper error message
|
||||
debugLog(`${err}`, 'Error')
|
||||
return { error: err }
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a LLM model into the Engine subprocess by sending a HTTP POST request.
|
||||
*/
|
||||
async function loadModelRequest(
|
||||
settings: ModelLoadParams
|
||||
): Promise<{ error: Error | undefined }> {
|
||||
debugLog(`Loading model with params ${JSON.stringify(settings)}`)
|
||||
return fetchRetry(LOAD_MODEL_URL, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(settings),
|
||||
retries: 3,
|
||||
retryDelay: 500,
|
||||
})
|
||||
.then((res) => {
|
||||
debugLog(`Load model success with response ${JSON.stringify(res)}`)
|
||||
return Promise.resolve({ error: undefined })
|
||||
})
|
||||
.catch((err) => {
|
||||
debugLog(`Load model failed with error ${err}`, 'Error')
|
||||
return Promise.resolve({ error: err })
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawns engine subprocess.
|
||||
*/
|
||||
async function runEngine(systemInfo: SystemInformation): Promise<void> {
|
||||
debugLog(`Spawning engine subprocess...`)
|
||||
if (systemInfo.gpuSetting == null) {
|
||||
return Promise.reject(
|
||||
'No GPU information found. Please check your GPU setting.'
|
||||
)
|
||||
}
|
||||
|
||||
if (systemInfo.gpuSetting?.gpus.length === 0) {
|
||||
return Promise.reject('No GPU found. Please check your GPU setting.')
|
||||
}
|
||||
|
||||
if (systemInfo.osInfo == null) {
|
||||
return Promise.reject(
|
||||
'No OS information found. Please check your OS setting.'
|
||||
)
|
||||
}
|
||||
const platform = systemInfo.osInfo.platform
|
||||
if (platform == null || supportedPlatform().includes(platform) === false) {
|
||||
return Promise.reject(
|
||||
'No OS architecture found. Please check your OS setting.'
|
||||
)
|
||||
}
|
||||
|
||||
const gpu = systemInfo.gpuSetting?.gpus[0]
|
||||
if (gpu.name.toLowerCase().includes('nvidia') === false) {
|
||||
return Promise.reject('No Nvidia GPU found. Please check your GPU setting.')
|
||||
}
|
||||
const gpuArch = gpu.arch
|
||||
if (gpuArch == null || supportedGpuArch().includes(gpuArch) === false) {
|
||||
return Promise.reject(
|
||||
`Your GPU: ${gpu.name} is not supported. Only ${supportedGpuArch().join(
|
||||
', '
|
||||
)} series are supported.`
|
||||
)
|
||||
}
|
||||
const janDataFolderPath = await getJanDataFolderPath()
|
||||
const tensorRtVersion = TENSORRT_VERSION
|
||||
const provider = PROVIDER
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
// Current directory by default
|
||||
|
||||
const executableFolderPath = path.join(
|
||||
janDataFolderPath,
|
||||
'engines',
|
||||
provider,
|
||||
tensorRtVersion,
|
||||
gpuArch
|
||||
)
|
||||
const nitroExecutablePath = path.join(
|
||||
executableFolderPath,
|
||||
platform === 'win32' ? 'nitro.exe' : 'nitro'
|
||||
)
|
||||
|
||||
const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT]
|
||||
// Execute the binary
|
||||
debugLog(`Spawn nitro at path: ${nitroExecutablePath}, and args: ${args}`)
|
||||
subprocess = spawn(nitroExecutablePath, args, {
|
||||
cwd: executableFolderPath,
|
||||
env: {
|
||||
...process.env,
|
||||
},
|
||||
})
|
||||
|
||||
// Handle subprocess output
|
||||
subprocess.stdout.on('data', (data: any) => {
|
||||
debugLog(`${data}`)
|
||||
})
|
||||
|
||||
subprocess.stderr.on('data', (data: any) => {
|
||||
debugLog(`${data}`)
|
||||
})
|
||||
|
||||
subprocess.on('close', (code: any) => {
|
||||
debugLog(`Engine exited with code: ${code}`)
|
||||
subprocess = undefined
|
||||
reject(`child process exited with code ${code}`)
|
||||
})
|
||||
|
||||
tcpPortUsed
|
||||
.waitUntilUsed(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 30000)
|
||||
.then(() => {
|
||||
debugLog(`Engine is ready`)
|
||||
resolve()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
function debugLog(message: string, level: string = 'Debug') {
|
||||
log(`[TENSORRT_LLM_NITRO]::${level}:${message}`)
|
||||
}
|
||||
|
||||
const decompressRunner = async (zipPath: string, output: string) => {
|
||||
console.debug(`Decompressing ${zipPath} to ${output}...`)
|
||||
try {
|
||||
const files = await decompress(zipPath, output)
|
||||
console.debug('Decompress finished!', files)
|
||||
} catch (err) {
|
||||
console.error(`Decompress ${zipPath} failed: ${err}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse prompt template into agrs settings
|
||||
* @param promptTemplate Template as string
|
||||
* @returns
|
||||
*/
|
||||
function promptTemplateConverter(promptTemplate: string): PromptTemplate {
|
||||
// Split the string using the markers
|
||||
const systemMarker = '{system_message}'
|
||||
const promptMarker = '{prompt}'
|
||||
|
||||
if (
|
||||
promptTemplate.includes(systemMarker) &&
|
||||
promptTemplate.includes(promptMarker)
|
||||
) {
|
||||
// Find the indices of the markers
|
||||
const systemIndex = promptTemplate.indexOf(systemMarker)
|
||||
const promptIndex = promptTemplate.indexOf(promptMarker)
|
||||
|
||||
// Extract the parts of the string
|
||||
const system_prompt = promptTemplate.substring(0, systemIndex)
|
||||
const user_prompt = promptTemplate.substring(
|
||||
systemIndex + systemMarker.length,
|
||||
promptIndex
|
||||
)
|
||||
const ai_prompt = promptTemplate.substring(
|
||||
promptIndex + promptMarker.length
|
||||
)
|
||||
|
||||
// Return the split parts
|
||||
return { system_prompt, user_prompt, ai_prompt }
|
||||
} else if (promptTemplate.includes(promptMarker)) {
|
||||
// Extract the parts of the string for the case where only promptMarker is present
|
||||
const promptIndex = promptTemplate.indexOf(promptMarker)
|
||||
const user_prompt = promptTemplate.substring(0, promptIndex)
|
||||
const ai_prompt = promptTemplate.substring(
|
||||
promptIndex + promptMarker.length
|
||||
)
|
||||
|
||||
// Return the split parts
|
||||
return { user_prompt, ai_prompt }
|
||||
}
|
||||
|
||||
// Return an error if none of the conditions are met
|
||||
return { error: 'Cannot split prompt template' }
|
||||
}
|
||||
|
||||
export default {
|
||||
supportedPlatform,
|
||||
supportedGpuArch,
|
||||
decompressRunner,
|
||||
loadModel,
|
||||
unloadModel,
|
||||
dispose: unloadModel,
|
||||
}
|
||||
@ -1,21 +0,0 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"moduleResolution": "node",
|
||||
"target": "ES2015",
|
||||
"module": "ES2020",
|
||||
"lib": ["es2015", "es2016", "es2017", "dom"],
|
||||
"strict": true,
|
||||
"sourceMap": true,
|
||||
"declaration": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"experimentalDecorators": true,
|
||||
"emitDecoratorMetadata": true,
|
||||
"declarationDir": "dist/types",
|
||||
"outDir": "dist",
|
||||
"importHelpers": true,
|
||||
"resolveJsonModule": true,
|
||||
"typeRoots": ["node_modules/@types"]
|
||||
},
|
||||
"include": ["src"],
|
||||
"exclude": ["**/*.test.ts"]
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user