chore: deprecate tensorrt-llm extension (#4453)
This commit is contained in:
parent
58bb1b4939
commit
06ee10be1b
@ -1,79 +0,0 @@
|
|||||||
# Tensorrt-LLM Extension
|
|
||||||
|
|
||||||
Created using Jan extension example
|
|
||||||
|
|
||||||
# Create a Jan Extension using Typescript
|
|
||||||
|
|
||||||
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
|
|
||||||
|
|
||||||
## Create Your Own Extension
|
|
||||||
|
|
||||||
To create your own extension, you can use this repository as a template! Just follow the below instructions:
|
|
||||||
|
|
||||||
1. Click the Use this template button at the top of the repository
|
|
||||||
2. Select Create a new repository
|
|
||||||
3. Select an owner and name for your new repository
|
|
||||||
4. Click Create repository
|
|
||||||
5. Clone your new repository
|
|
||||||
|
|
||||||
## Initial Setup
|
|
||||||
|
|
||||||
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
>
|
|
||||||
> You'll need to have a reasonably modern version of
|
|
||||||
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
|
|
||||||
> [`nodenv`](https://github.com/nodenv/nodenv) or
|
|
||||||
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
|
|
||||||
> root of your repository to install the version specified in
|
|
||||||
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
|
|
||||||
|
|
||||||
1. :hammer_and_wrench: Install the dependencies
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm install
|
|
||||||
```
|
|
||||||
|
|
||||||
1. :building_construction: Package the TypeScript for distribution
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm run bundle
|
|
||||||
```
|
|
||||||
|
|
||||||
1. :white_check_mark: Check your artifact
|
|
||||||
|
|
||||||
There will be a tgz file in your extension directory now
|
|
||||||
|
|
||||||
## Update the Extension Metadata
|
|
||||||
|
|
||||||
The [`package.json`](package.json) file defines metadata about your extension, such as
|
|
||||||
extension name, main entry, description and version.
|
|
||||||
|
|
||||||
When you copy this repository, update `package.json` with the name, description for your extension.
|
|
||||||
|
|
||||||
## Update the Extension Code
|
|
||||||
|
|
||||||
The [`src/`](./src/) directory is the heart of your extension! This contains the
|
|
||||||
source code that will be run when your extension functions are invoked. You can replace the
|
|
||||||
contents of this directory with your own code.
|
|
||||||
|
|
||||||
There are a few things to keep in mind when writing your extension code:
|
|
||||||
|
|
||||||
- Most Jan Extension functions are processed asynchronously.
|
|
||||||
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
import { events, MessageEvent, MessageRequest } from '@janhq/core'
|
|
||||||
|
|
||||||
function onStart(): Promise<any> {
|
|
||||||
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
|
|
||||||
this.inference(data)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
For more information about the Jan Extension Core module, see the
|
|
||||||
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
|
|
||||||
|
|
||||||
So, what are you waiting for? Go ahead and start customizing your extension!
|
|
||||||
@ -1,9 +0,0 @@
|
|||||||
/** @type {import('ts-jest').JestConfigWithTsJest} */
|
|
||||||
module.exports = {
|
|
||||||
preset: 'ts-jest',
|
|
||||||
testEnvironment: 'node',
|
|
||||||
transform: {
|
|
||||||
'node_modules/@janhq/core/.+\\.(j|t)s?$': 'ts-jest',
|
|
||||||
},
|
|
||||||
transformIgnorePatterns: ['node_modules/(?!@janhq/core/.*)'],
|
|
||||||
}
|
|
||||||
@ -1,78 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "@janhq/tensorrt-llm-extension",
|
|
||||||
"productName": "TensorRT-LLM Inference Engine",
|
|
||||||
"version": "0.0.3",
|
|
||||||
"description": "This extension enables Nvidia's TensorRT-LLM for the fastest GPU acceleration. See the [setup guide](https://jan.ai/guides/providers/tensorrt-llm/) for next steps.",
|
|
||||||
"main": "dist/index.js",
|
|
||||||
"node": "dist/node/index.cjs.js",
|
|
||||||
"author": "Jan <service@jan.ai>",
|
|
||||||
"license": "AGPL-3.0",
|
|
||||||
"config": {
|
|
||||||
"host": "127.0.0.1",
|
|
||||||
"port": "3929"
|
|
||||||
},
|
|
||||||
"compatibility": {
|
|
||||||
"platform": [
|
|
||||||
"win32"
|
|
||||||
],
|
|
||||||
"app": [
|
|
||||||
"0.1.0"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"tensorrtVersion": "0.1.8",
|
|
||||||
"provider": "nitro-tensorrt-llm",
|
|
||||||
"scripts": {
|
|
||||||
"test": "jest",
|
|
||||||
"build": "rolldown -c rolldown.config.mjs",
|
|
||||||
"build:publish": "rimraf *.tgz --glob || true && yarn build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install"
|
|
||||||
},
|
|
||||||
"exports": {
|
|
||||||
".": "./dist/index.js",
|
|
||||||
"./main": "./dist/node/index.cjs.js"
|
|
||||||
},
|
|
||||||
"devDependencies": {
|
|
||||||
"@types/decompress": "4.2.7",
|
|
||||||
"@types/jest": "^29.5.12",
|
|
||||||
"@types/node": "^20.11.4",
|
|
||||||
"@types/os-utils": "^0.0.4",
|
|
||||||
"@types/tcp-port-used": "^1.0.4",
|
|
||||||
"cpx": "^1.5.0",
|
|
||||||
"download-cli": "^1.1.1",
|
|
||||||
"jest": "^29.7.0",
|
|
||||||
"jest-junit": "^16.0.0",
|
|
||||||
"jest-runner": "^29.7.0",
|
|
||||||
"rimraf": "^3.0.2",
|
|
||||||
"rolldown": "1.0.0-beta.1",
|
|
||||||
"run-script-os": "^1.1.6",
|
|
||||||
"ts-jest": "^29.2.5",
|
|
||||||
"typescript": "^5.2.2"
|
|
||||||
},
|
|
||||||
"dependencies": {
|
|
||||||
"@janhq/core": "../../core/package.tgz",
|
|
||||||
"decompress": "^4.2.1",
|
|
||||||
"fetch-retry": "^5.0.6",
|
|
||||||
"rxjs": "^7.8.1",
|
|
||||||
"tcp-port-used": "^1.0.2",
|
|
||||||
"terminate": "^2.6.1",
|
|
||||||
"ulidx": "^2.3.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=18.0.0"
|
|
||||||
},
|
|
||||||
"files": [
|
|
||||||
"dist/*",
|
|
||||||
"package.json",
|
|
||||||
"README.md"
|
|
||||||
],
|
|
||||||
"bundleDependencies": [
|
|
||||||
"tcp-port-used",
|
|
||||||
"fetch-retry",
|
|
||||||
"decompress",
|
|
||||||
"@janhq/core",
|
|
||||||
"terminate"
|
|
||||||
],
|
|
||||||
"installConfig": {
|
|
||||||
"hoistingLimits": "workspaces"
|
|
||||||
},
|
|
||||||
"packageManager": "yarn@4.5.3"
|
|
||||||
}
|
|
||||||
@ -1,156 +0,0 @@
|
|||||||
[
|
|
||||||
{
|
|
||||||
"sources": [
|
|
||||||
{
|
|
||||||
"filename": "config.json",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/config.json"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "mistral_float16_tp1_rank0.engine",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "tokenizer.model",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "special_tokens_map.json",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "tokenizer.json",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "tokenizer_config.json",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "model.cache",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/model.cache"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "llamacorn-1.1b-chat-fp16",
|
|
||||||
"object": "model",
|
|
||||||
"name": "LlamaCorn 1.1B Chat FP16",
|
|
||||||
"version": "1.0",
|
|
||||||
"description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM",
|
|
||||||
"format": "TensorRT-LLM",
|
|
||||||
"settings": {
|
|
||||||
"ctx_len": 2048,
|
|
||||||
"text_model": false
|
|
||||||
},
|
|
||||||
"parameters": {
|
|
||||||
"max_tokens": 4096
|
|
||||||
},
|
|
||||||
"metadata": {
|
|
||||||
"author": "LLama",
|
|
||||||
"tags": ["TensorRT-LLM", "1B", "Finetuned"],
|
|
||||||
"size": 2151000000
|
|
||||||
},
|
|
||||||
"engine": "nitro-tensorrt-llm"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"sources": [
|
|
||||||
{
|
|
||||||
"filename": "config.json",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/config.json"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "mistral_float16_tp1_rank0.engine",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "tokenizer.model",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "special_tokens_map.json",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "tokenizer.json",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "tokenizer_config.json",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "model.cache",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/model.cache"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "tinyjensen-1.1b-chat-fp16",
|
|
||||||
"object": "model",
|
|
||||||
"name": "TinyJensen 1.1B Chat FP16",
|
|
||||||
"version": "1.0",
|
|
||||||
"description": "Do you want to chat with Jensen Huan? Here you are",
|
|
||||||
"format": "TensorRT-LLM",
|
|
||||||
"settings": {
|
|
||||||
"ctx_len": 2048,
|
|
||||||
"text_model": false
|
|
||||||
},
|
|
||||||
"parameters": {
|
|
||||||
"max_tokens": 4096
|
|
||||||
},
|
|
||||||
"metadata": {
|
|
||||||
"author": "LLama",
|
|
||||||
"tags": ["TensorRT-LLM", "1B", "Finetuned"],
|
|
||||||
"size": 2151000000
|
|
||||||
},
|
|
||||||
"engine": "nitro-tensorrt-llm"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"sources": [
|
|
||||||
{
|
|
||||||
"filename": "config.json",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/config.json"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "mistral_float16_tp1_rank0.engine",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/mistral_float16_tp1_rank0.engine"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "tokenizer.model",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.model"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "special_tokens_map.json",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/special_tokens_map.json"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "tokenizer.json",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.json"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "tokenizer_config.json",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer_config.json"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "model.cache",
|
|
||||||
"url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/model.cache"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "mistral-7b-instruct-int4",
|
|
||||||
"object": "model",
|
|
||||||
"name": "Mistral 7B Instruct v0.1 INT4",
|
|
||||||
"version": "1.0",
|
|
||||||
"description": "Mistral 7B Instruct v0.1 INT4",
|
|
||||||
"format": "TensorRT-LLM",
|
|
||||||
"settings": {
|
|
||||||
"ctx_len": 2048,
|
|
||||||
"text_model": false,
|
|
||||||
"prompt_template": "[INST] {prompt} [/INST]"
|
|
||||||
},
|
|
||||||
"parameters": {
|
|
||||||
"max_tokens": 4096
|
|
||||||
},
|
|
||||||
"metadata": {
|
|
||||||
"author": "MistralAI",
|
|
||||||
"tags": ["TensorRT-LLM", "7B", "Finetuned"],
|
|
||||||
"size": 3840000000
|
|
||||||
},
|
|
||||||
"engine": "nitro-tensorrt-llm"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
@ -1,59 +0,0 @@
|
|||||||
import { defineConfig } from 'rolldown'
|
|
||||||
import packageJson from './package.json' with { type: 'json' }
|
|
||||||
import modelsJson from './resources/models.json' with { type: 'json' }
|
|
||||||
|
|
||||||
export default defineConfig([
|
|
||||||
{
|
|
||||||
input: 'src/index.ts',
|
|
||||||
output: {
|
|
||||||
format: 'esm',
|
|
||||||
file: 'dist/index.js',
|
|
||||||
},
|
|
||||||
platform: 'browser',
|
|
||||||
define: {
|
|
||||||
MODELS: JSON.stringify(modelsJson),
|
|
||||||
TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
|
|
||||||
PROVIDER: JSON.stringify(packageJson.provider),
|
|
||||||
DOWNLOAD_RUNNER_URL:
|
|
||||||
process.platform === 'win32'
|
|
||||||
? JSON.stringify(
|
|
||||||
'https://github.com/janhq/cortex.tensorrt-llm/releases/download/windows-v<version>-tensorrt-llm-v0.7.1/nitro-windows-v<version>-tensorrt-llm-v0.7.1-amd64-all-arch.tar.gz'
|
|
||||||
)
|
|
||||||
: JSON.stringify(
|
|
||||||
'https://github.com/janhq/cortex.tensorrt-llm/releases/download/linux-v<version>/nitro-linux-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
|
|
||||||
),
|
|
||||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
|
||||||
INFERENCE_URL: JSON.stringify(
|
|
||||||
process.env.INFERENCE_URL ||
|
|
||||||
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/v1/chat/completions`
|
|
||||||
),
|
|
||||||
COMPATIBILITY: JSON.stringify(packageJson.compatibility),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: 'src/node/index.ts',
|
|
||||||
external: ['@janhq/core/node'],
|
|
||||||
output: {
|
|
||||||
format: 'cjs',
|
|
||||||
file: 'dist/node/index.cjs.js',
|
|
||||||
sourcemap: false,
|
|
||||||
inlineDynamicImports: true,
|
|
||||||
},
|
|
||||||
replace: {
|
|
||||||
TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
|
|
||||||
PROVIDER: JSON.stringify(packageJson.provider),
|
|
||||||
LOAD_MODEL_URL: JSON.stringify(
|
|
||||||
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel`
|
|
||||||
),
|
|
||||||
TERMINATE_ENGINE_URL: JSON.stringify(
|
|
||||||
`${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/processmanager/destroy`
|
|
||||||
),
|
|
||||||
ENGINE_HOST: JSON.stringify(packageJson.config?.host ?? '127.0.0.1'),
|
|
||||||
ENGINE_PORT: JSON.stringify(packageJson.config?.port ?? '3928'),
|
|
||||||
},
|
|
||||||
resolve: {
|
|
||||||
extensions: ['.js', '.ts', '.json'],
|
|
||||||
},
|
|
||||||
platform: 'node',
|
|
||||||
},
|
|
||||||
])
|
|
||||||
@ -1,11 +0,0 @@
|
|||||||
declare const NODE: string
|
|
||||||
declare const INFERENCE_URL: string
|
|
||||||
declare const LOAD_MODEL_URL: string
|
|
||||||
declare const TERMINATE_ENGINE_URL: string
|
|
||||||
declare const ENGINE_HOST: string
|
|
||||||
declare const ENGINE_PORT: string
|
|
||||||
declare const DOWNLOAD_RUNNER_URL: string
|
|
||||||
declare const TENSORRT_VERSION: string
|
|
||||||
declare const COMPATIBILITY: object
|
|
||||||
declare const PROVIDER: string
|
|
||||||
declare const MODELS: Array<any>
|
|
||||||
@ -1,186 +0,0 @@
|
|||||||
import TensorRTLLMExtension from '../src/index'
|
|
||||||
import {
|
|
||||||
executeOnMain,
|
|
||||||
systemInformation,
|
|
||||||
fs,
|
|
||||||
baseName,
|
|
||||||
joinPath,
|
|
||||||
downloadFile,
|
|
||||||
} from '@janhq/core'
|
|
||||||
|
|
||||||
jest.mock('@janhq/core', () => ({
|
|
||||||
...jest.requireActual('@janhq/core/node'),
|
|
||||||
LocalOAIEngine: jest.fn().mockImplementation(function () {
|
|
||||||
// @ts-ignore
|
|
||||||
this.registerModels = () => {
|
|
||||||
return Promise.resolve()
|
|
||||||
}
|
|
||||||
// @ts-ignore
|
|
||||||
return this
|
|
||||||
}),
|
|
||||||
systemInformation: jest.fn(),
|
|
||||||
fs: {
|
|
||||||
existsSync: jest.fn(),
|
|
||||||
mkdir: jest.fn(),
|
|
||||||
},
|
|
||||||
joinPath: jest.fn(),
|
|
||||||
baseName: jest.fn(),
|
|
||||||
downloadFile: jest.fn(),
|
|
||||||
executeOnMain: jest.fn(),
|
|
||||||
showToast: jest.fn(),
|
|
||||||
events: {
|
|
||||||
emit: jest.fn(),
|
|
||||||
// @ts-ignore
|
|
||||||
on: (event, func) => {
|
|
||||||
func({ fileName: './' })
|
|
||||||
},
|
|
||||||
off: jest.fn(),
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
|
|
||||||
// @ts-ignore
|
|
||||||
global.COMPATIBILITY = {
|
|
||||||
platform: ['win32'],
|
|
||||||
}
|
|
||||||
// @ts-ignore
|
|
||||||
global.PROVIDER = 'tensorrt-llm'
|
|
||||||
// @ts-ignore
|
|
||||||
global.INFERENCE_URL = 'http://localhost:5000'
|
|
||||||
// @ts-ignore
|
|
||||||
global.NODE = 'node'
|
|
||||||
// @ts-ignore
|
|
||||||
global.MODELS = []
|
|
||||||
// @ts-ignore
|
|
||||||
global.TENSORRT_VERSION = ''
|
|
||||||
// @ts-ignore
|
|
||||||
global.DOWNLOAD_RUNNER_URL = ''
|
|
||||||
|
|
||||||
describe('TensorRTLLMExtension', () => {
|
|
||||||
let extension: TensorRTLLMExtension
|
|
||||||
|
|
||||||
beforeEach(() => {
|
|
||||||
// @ts-ignore
|
|
||||||
extension = new TensorRTLLMExtension()
|
|
||||||
jest.clearAllMocks()
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('compatibility', () => {
|
|
||||||
it('should return the correct compatibility', () => {
|
|
||||||
const result = extension.compatibility()
|
|
||||||
expect(result).toEqual({
|
|
||||||
platform: ['win32'],
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('install', () => {
|
|
||||||
it('should install if compatible', async () => {
|
|
||||||
const mockSystemInfo: any = {
|
|
||||||
osInfo: { platform: 'win32' },
|
|
||||||
gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] },
|
|
||||||
}
|
|
||||||
;(executeOnMain as jest.Mock).mockResolvedValue({})
|
|
||||||
;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
|
|
||||||
;(fs.existsSync as jest.Mock).mockResolvedValue(false)
|
|
||||||
;(fs.mkdir as jest.Mock).mockResolvedValue(undefined)
|
|
||||||
;(baseName as jest.Mock).mockResolvedValue('./')
|
|
||||||
;(joinPath as jest.Mock).mockResolvedValue('./')
|
|
||||||
;(downloadFile as jest.Mock).mockResolvedValue({})
|
|
||||||
|
|
||||||
await extension.install()
|
|
||||||
|
|
||||||
expect(executeOnMain).toHaveBeenCalled()
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should not install if not compatible', async () => {
|
|
||||||
const mockSystemInfo: any = {
|
|
||||||
osInfo: { platform: 'linux' },
|
|
||||||
gpuSetting: { gpus: [{ arch: 'pascal', name: 'NVIDIA GPU' }] },
|
|
||||||
}
|
|
||||||
;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
|
|
||||||
|
|
||||||
jest.spyOn(extension, 'registerModels').mockReturnValue(Promise.resolve())
|
|
||||||
await extension.install()
|
|
||||||
|
|
||||||
expect(executeOnMain).not.toHaveBeenCalled()
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('installationState', () => {
|
|
||||||
it('should return NotCompatible if not compatible', async () => {
|
|
||||||
const mockSystemInfo: any = {
|
|
||||||
osInfo: { platform: 'linux' },
|
|
||||||
gpuSetting: { gpus: [{ arch: 'pascal', name: 'NVIDIA GPU' }] },
|
|
||||||
}
|
|
||||||
;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
|
|
||||||
|
|
||||||
const result = await extension.installationState()
|
|
||||||
|
|
||||||
expect(result).toBe('NotCompatible')
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should return Installed if executable exists', async () => {
|
|
||||||
const mockSystemInfo: any = {
|
|
||||||
osInfo: { platform: 'win32' },
|
|
||||||
gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] },
|
|
||||||
}
|
|
||||||
;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
|
|
||||||
;(fs.existsSync as jest.Mock).mockResolvedValue(true)
|
|
||||||
|
|
||||||
const result = await extension.installationState()
|
|
||||||
|
|
||||||
expect(result).toBe('Installed')
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should return NotInstalled if executable does not exist', async () => {
|
|
||||||
const mockSystemInfo: any = {
|
|
||||||
osInfo: { platform: 'win32' },
|
|
||||||
gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] },
|
|
||||||
}
|
|
||||||
;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
|
|
||||||
;(fs.existsSync as jest.Mock).mockResolvedValue(false)
|
|
||||||
|
|
||||||
const result = await extension.installationState()
|
|
||||||
|
|
||||||
expect(result).toBe('NotInstalled')
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('isCompatible', () => {
|
|
||||||
it('should return true for compatible system', () => {
|
|
||||||
const mockInfo: any = {
|
|
||||||
osInfo: { platform: 'win32' },
|
|
||||||
gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] },
|
|
||||||
}
|
|
||||||
|
|
||||||
const result = extension.isCompatible(mockInfo)
|
|
||||||
|
|
||||||
expect(result).toBe(true)
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should return false for incompatible system', () => {
|
|
||||||
const mockInfo: any = {
|
|
||||||
osInfo: { platform: 'linux' },
|
|
||||||
gpuSetting: { gpus: [{ arch: 'pascal', name: 'AMD GPU' }] },
|
|
||||||
}
|
|
||||||
|
|
||||||
const result = extension.isCompatible(mockInfo)
|
|
||||||
|
|
||||||
expect(result).toBe(false)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('GitHub Release File URL Test', () => {
|
|
||||||
const url = 'https://github.com/janhq/cortex.tensorrt-llm/releases/download/windows-v0.1.8-tensorrt-llm-v0.7.1/nitro-windows-v0.1.8-tensorrt-llm-v0.7.1-amd64-all-arch.tar.gz';
|
|
||||||
|
|
||||||
it('should return a status code 200 for the release file URL', async () => {
|
|
||||||
const response = await fetch(url, { method: 'HEAD' });
|
|
||||||
expect(response.status).toBe(200);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should not return a 404 status', async () => {
|
|
||||||
const response = await fetch(url, { method: 'HEAD' });
|
|
||||||
expect(response.status).not.toBe(404);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
@ -1,197 +0,0 @@
|
|||||||
/**
|
|
||||||
* @module tensorrt-llm-extension/src/index
|
|
||||||
*/
|
|
||||||
|
|
||||||
import {
|
|
||||||
Compatibility,
|
|
||||||
DownloadEvent,
|
|
||||||
DownloadRequest,
|
|
||||||
DownloadState,
|
|
||||||
InstallationState,
|
|
||||||
baseName,
|
|
||||||
downloadFile,
|
|
||||||
events,
|
|
||||||
executeOnMain,
|
|
||||||
joinPath,
|
|
||||||
showToast,
|
|
||||||
systemInformation,
|
|
||||||
LocalOAIEngine,
|
|
||||||
fs,
|
|
||||||
MessageRequest,
|
|
||||||
ModelEvent,
|
|
||||||
getJanDataFolderPath,
|
|
||||||
SystemInformation,
|
|
||||||
Model,
|
|
||||||
} from '@janhq/core'
|
|
||||||
|
|
||||||
/**
|
|
||||||
* TensorRTLLMExtension - Implementation of LocalOAIEngine
|
|
||||||
* @extends BaseOAILocalInferenceProvider
|
|
||||||
* Provide pre-populated models for TensorRTLLM
|
|
||||||
*/
|
|
||||||
export default class TensorRTLLMExtension extends LocalOAIEngine {
|
|
||||||
/**
|
|
||||||
* Override custom function name for loading and unloading model
|
|
||||||
* Which are implemented from node module
|
|
||||||
*/
|
|
||||||
override provider = PROVIDER
|
|
||||||
override inferenceUrl = INFERENCE_URL
|
|
||||||
override nodeModule = NODE
|
|
||||||
|
|
||||||
private supportedGpuArch = ['ampere', 'ada']
|
|
||||||
|
|
||||||
override compatibility() {
|
|
||||||
return COMPATIBILITY as unknown as Compatibility
|
|
||||||
}
|
|
||||||
|
|
||||||
override async onLoad(): Promise<void> {
|
|
||||||
super.onLoad()
|
|
||||||
|
|
||||||
if ((await this.installationState()) === 'Installed') {
|
|
||||||
const models = MODELS as unknown as Model[]
|
|
||||||
this.registerModels(models)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
override async install(): Promise<void> {
|
|
||||||
await this.removePopulatedModels()
|
|
||||||
|
|
||||||
const info = await systemInformation()
|
|
||||||
|
|
||||||
if (!this.isCompatible(info)) return
|
|
||||||
|
|
||||||
const janDataFolderPath = await getJanDataFolderPath()
|
|
||||||
const engineVersion = TENSORRT_VERSION
|
|
||||||
|
|
||||||
const executableFolderPath = await joinPath([
|
|
||||||
janDataFolderPath,
|
|
||||||
'engines',
|
|
||||||
this.provider,
|
|
||||||
engineVersion,
|
|
||||||
info.gpuSetting?.gpus[0].arch,
|
|
||||||
])
|
|
||||||
|
|
||||||
if (!(await fs.existsSync(executableFolderPath))) {
|
|
||||||
await fs.mkdir(executableFolderPath)
|
|
||||||
}
|
|
||||||
|
|
||||||
const placeholderUrl = DOWNLOAD_RUNNER_URL
|
|
||||||
const tensorrtVersion = TENSORRT_VERSION
|
|
||||||
|
|
||||||
const url = placeholderUrl
|
|
||||||
.replace(/<version>/g, tensorrtVersion)
|
|
||||||
.replace(/<gpuarch>/g, info.gpuSetting!.gpus[0]!.arch!)
|
|
||||||
|
|
||||||
const tarball = await baseName(url)
|
|
||||||
|
|
||||||
const tarballFullPath = await joinPath([executableFolderPath, tarball])
|
|
||||||
const downloadRequest: DownloadRequest = {
|
|
||||||
url,
|
|
||||||
localPath: tarballFullPath,
|
|
||||||
extensionId: this.name,
|
|
||||||
downloadType: 'extension',
|
|
||||||
}
|
|
||||||
downloadFile(downloadRequest)
|
|
||||||
|
|
||||||
const onFileDownloadSuccess = async (state: DownloadState) => {
|
|
||||||
// if other download, ignore
|
|
||||||
if (state.fileName !== tarball) return
|
|
||||||
events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
|
|
||||||
await executeOnMain(
|
|
||||||
this.nodeModule,
|
|
||||||
'decompressRunner',
|
|
||||||
tarballFullPath,
|
|
||||||
executableFolderPath
|
|
||||||
)
|
|
||||||
events.emit(DownloadEvent.onFileUnzipSuccess, state)
|
|
||||||
|
|
||||||
// Prepopulate models as soon as it's ready
|
|
||||||
const models = MODELS as unknown as Model[]
|
|
||||||
this.registerModels(models).then(() => {
|
|
||||||
showToast(
|
|
||||||
'Extension installed successfully.',
|
|
||||||
'New models are added to Model Hub.'
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
|
|
||||||
}
|
|
||||||
|
|
||||||
private async removePopulatedModels(): Promise<void> {
|
|
||||||
const models = MODELS as unknown as Model[]
|
|
||||||
console.debug(`removePopulatedModels`, JSON.stringify(models))
|
|
||||||
const janDataFolderPath = await getJanDataFolderPath()
|
|
||||||
const modelFolderPath = await joinPath([janDataFolderPath, 'models'])
|
|
||||||
|
|
||||||
for (const model of models) {
|
|
||||||
const modelPath = await joinPath([modelFolderPath, model.id])
|
|
||||||
|
|
||||||
try {
|
|
||||||
await fs.rm(modelPath)
|
|
||||||
} catch (err) {
|
|
||||||
console.error(`Error removing model ${modelPath}`, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
|
||||||
}
|
|
||||||
|
|
||||||
override async loadModel(model: Model): Promise<void> {
|
|
||||||
if ((await this.installationState()) === 'Installed')
|
|
||||||
return super.loadModel(model)
|
|
||||||
|
|
||||||
throw new Error('EXTENSION_IS_NOT_INSTALLED::TensorRT-LLM extension')
|
|
||||||
}
|
|
||||||
|
|
||||||
override async installationState(): Promise<InstallationState> {
|
|
||||||
const info = await systemInformation()
|
|
||||||
|
|
||||||
if (!this.isCompatible(info)) return 'NotCompatible'
|
|
||||||
const firstGpu = info.gpuSetting?.gpus[0]
|
|
||||||
const janDataFolderPath = await getJanDataFolderPath()
|
|
||||||
const engineVersion = TENSORRT_VERSION
|
|
||||||
|
|
||||||
const enginePath = await joinPath([
|
|
||||||
janDataFolderPath,
|
|
||||||
'engines',
|
|
||||||
this.provider,
|
|
||||||
engineVersion,
|
|
||||||
firstGpu.arch,
|
|
||||||
info.osInfo.platform === 'win32' ? 'nitro.exe' : 'nitro',
|
|
||||||
])
|
|
||||||
|
|
||||||
// For now, we just check the executable of nitro x tensor rt
|
|
||||||
return (await fs.existsSync(enginePath)) ? 'Installed' : 'NotInstalled'
|
|
||||||
}
|
|
||||||
|
|
||||||
override stopInference() {
|
|
||||||
if (!this.loadedModel) return
|
|
||||||
showToast(
|
|
||||||
'Unable to Stop Inference',
|
|
||||||
'The model does not support stopping inference.'
|
|
||||||
)
|
|
||||||
return Promise.resolve()
|
|
||||||
}
|
|
||||||
|
|
||||||
override async inference(data: MessageRequest) {
|
|
||||||
if (!this.loadedModel) return
|
|
||||||
// TensorRT LLM Extension supports streaming only
|
|
||||||
if (data.model && data.model.parameters) data.model.parameters.stream = true
|
|
||||||
super.inference(data)
|
|
||||||
}
|
|
||||||
|
|
||||||
isCompatible(info: SystemInformation): info is Required<SystemInformation> & {
|
|
||||||
gpuSetting: { gpus: { arch: string }[] }
|
|
||||||
} {
|
|
||||||
const firstGpu = info.gpuSetting?.gpus[0]
|
|
||||||
return (
|
|
||||||
!!info.osInfo &&
|
|
||||||
!!info.gpuSetting &&
|
|
||||||
!!firstGpu &&
|
|
||||||
info.gpuSetting.gpus.length > 0 &&
|
|
||||||
this.compatibility().platform.includes(info.osInfo.platform) &&
|
|
||||||
!!firstGpu.arch &&
|
|
||||||
firstGpu.name.toLowerCase().includes('nvidia') &&
|
|
||||||
this.supportedGpuArch.includes(firstGpu.arch)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,325 +0,0 @@
|
|||||||
import path from 'path'
|
|
||||||
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
|
|
||||||
import tcpPortUsed from 'tcp-port-used'
|
|
||||||
import fetchRT from 'fetch-retry'
|
|
||||||
import {
|
|
||||||
log,
|
|
||||||
getJanDataFolderPath,
|
|
||||||
SystemInformation,
|
|
||||||
PromptTemplate,
|
|
||||||
} from '@janhq/core/node'
|
|
||||||
import decompress from 'decompress'
|
|
||||||
import terminate from 'terminate'
|
|
||||||
|
|
||||||
// Polyfill fetch with retry
|
|
||||||
const fetchRetry = fetchRT(fetch)
|
|
||||||
|
|
||||||
const supportedPlatform = (): string[] => ['win32', 'linux']
|
|
||||||
const supportedGpuArch = (): string[] => ['ampere', 'ada']
|
|
||||||
const PORT_CHECK_INTERVAL = 100
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The response object for model init operation.
|
|
||||||
*/
|
|
||||||
interface ModelLoadParams {
|
|
||||||
engine_path: string
|
|
||||||
ctx_len: number
|
|
||||||
}
|
|
||||||
|
|
||||||
// The subprocess instance for Engine
|
|
||||||
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes a engine subprocess to load a machine learning model.
|
|
||||||
* @param params - The model load settings.
|
|
||||||
*/
|
|
||||||
async function loadModel(
|
|
||||||
params: any,
|
|
||||||
systemInfo?: SystemInformation
|
|
||||||
): Promise<{ error: Error | undefined }> {
|
|
||||||
// modelFolder is the absolute path to the running model folder
|
|
||||||
// e.g. ~/jan/models/llama-2
|
|
||||||
let modelFolder = params.modelFolder
|
|
||||||
|
|
||||||
if (params.model.settings?.prompt_template) {
|
|
||||||
const promptTemplate = params.model.settings.prompt_template
|
|
||||||
const prompt = promptTemplateConverter(promptTemplate)
|
|
||||||
if (prompt?.error) {
|
|
||||||
return Promise.reject(prompt.error)
|
|
||||||
}
|
|
||||||
params.model.settings.system_prompt = prompt.system_prompt
|
|
||||||
params.model.settings.user_prompt = prompt.user_prompt
|
|
||||||
params.model.settings.ai_prompt = prompt.ai_prompt
|
|
||||||
}
|
|
||||||
|
|
||||||
const settings: ModelLoadParams = {
|
|
||||||
engine_path: modelFolder,
|
|
||||||
ctx_len: params.model.settings.ctx_len ?? 2048,
|
|
||||||
...params.model.settings,
|
|
||||||
}
|
|
||||||
if (!systemInfo) {
|
|
||||||
throw new Error('Cannot get system info. Unable to start nitro x tensorrt.')
|
|
||||||
}
|
|
||||||
return runEngineAndLoadModel(settings, systemInfo)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Stops a Engine subprocess.
|
|
||||||
*/
|
|
||||||
function unloadModel(): Promise<void> {
|
|
||||||
const controller = new AbortController()
|
|
||||||
setTimeout(() => controller.abort(), 5000)
|
|
||||||
debugLog(`Request to kill engine`)
|
|
||||||
|
|
||||||
const killRequest = () => {
|
|
||||||
return fetch(TERMINATE_ENGINE_URL, {
|
|
||||||
method: 'DELETE',
|
|
||||||
signal: controller.signal,
|
|
||||||
})
|
|
||||||
.then(() => {
|
|
||||||
subprocess = undefined
|
|
||||||
})
|
|
||||||
.catch(() => {}) // Do nothing with this attempt
|
|
||||||
.then(() =>
|
|
||||||
tcpPortUsed.waitUntilFree(
|
|
||||||
parseInt(ENGINE_PORT),
|
|
||||||
PORT_CHECK_INTERVAL,
|
|
||||||
5000
|
|
||||||
)
|
|
||||||
) // Wait for port available
|
|
||||||
.then(() => debugLog(`Engine process is terminated`))
|
|
||||||
.catch((err) => {
|
|
||||||
debugLog(
|
|
||||||
`Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
|
|
||||||
)
|
|
||||||
throw 'PORT_NOT_AVAILABLE'
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
if (subprocess?.pid) {
|
|
||||||
log(`[CORTEX]:: Killing PID ${subprocess.pid}`)
|
|
||||||
const pid = subprocess.pid
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
terminate(pid, function (err) {
|
|
||||||
if (err) {
|
|
||||||
return killRequest()
|
|
||||||
} else {
|
|
||||||
return tcpPortUsed
|
|
||||||
.waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
|
|
||||||
.then(() => resolve())
|
|
||||||
.then(() => log(`[CORTEX]:: cortex process is terminated`))
|
|
||||||
.catch(() => {
|
|
||||||
killRequest()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
})
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
return killRequest()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* 1. Spawn engine process
|
|
||||||
* 2. Load model into engine subprocess
|
|
||||||
* @returns
|
|
||||||
*/
|
|
||||||
async function runEngineAndLoadModel(
|
|
||||||
settings: ModelLoadParams,
|
|
||||||
systemInfo: SystemInformation
|
|
||||||
) {
|
|
||||||
return unloadModel()
|
|
||||||
.then(() => runEngine(systemInfo))
|
|
||||||
.then(() => loadModelRequest(settings))
|
|
||||||
.catch((err) => {
|
|
||||||
// TODO: Broadcast error so app could display proper error message
|
|
||||||
debugLog(`${err}`, 'Error')
|
|
||||||
return { error: err }
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Loads a LLM model into the Engine subprocess by sending a HTTP POST request.
|
|
||||||
*/
|
|
||||||
async function loadModelRequest(
|
|
||||||
settings: ModelLoadParams
|
|
||||||
): Promise<{ error: Error | undefined }> {
|
|
||||||
debugLog(`Loading model with params ${JSON.stringify(settings)}`)
|
|
||||||
return fetchRetry(LOAD_MODEL_URL, {
|
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
},
|
|
||||||
body: JSON.stringify(settings),
|
|
||||||
retries: 3,
|
|
||||||
retryDelay: 500,
|
|
||||||
})
|
|
||||||
.then((res) => {
|
|
||||||
debugLog(`Load model success with response ${JSON.stringify(res)}`)
|
|
||||||
return Promise.resolve({ error: undefined })
|
|
||||||
})
|
|
||||||
.catch((err) => {
|
|
||||||
debugLog(`Load model failed with error ${err}`, 'Error')
|
|
||||||
return Promise.resolve({ error: err })
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Spawns engine subprocess.
|
|
||||||
*/
|
|
||||||
async function runEngine(systemInfo: SystemInformation): Promise<void> {
|
|
||||||
debugLog(`Spawning engine subprocess...`)
|
|
||||||
if (systemInfo.gpuSetting == null) {
|
|
||||||
return Promise.reject(
|
|
||||||
'No GPU information found. Please check your GPU setting.'
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (systemInfo.gpuSetting?.gpus.length === 0) {
|
|
||||||
return Promise.reject('No GPU found. Please check your GPU setting.')
|
|
||||||
}
|
|
||||||
|
|
||||||
if (systemInfo.osInfo == null) {
|
|
||||||
return Promise.reject(
|
|
||||||
'No OS information found. Please check your OS setting.'
|
|
||||||
)
|
|
||||||
}
|
|
||||||
const platform = systemInfo.osInfo.platform
|
|
||||||
if (platform == null || supportedPlatform().includes(platform) === false) {
|
|
||||||
return Promise.reject(
|
|
||||||
'No OS architecture found. Please check your OS setting.'
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
const gpu = systemInfo.gpuSetting?.gpus[0]
|
|
||||||
if (gpu.name.toLowerCase().includes('nvidia') === false) {
|
|
||||||
return Promise.reject('No Nvidia GPU found. Please check your GPU setting.')
|
|
||||||
}
|
|
||||||
const gpuArch = gpu.arch
|
|
||||||
if (gpuArch == null || supportedGpuArch().includes(gpuArch) === false) {
|
|
||||||
return Promise.reject(
|
|
||||||
`Your GPU: ${gpu.name} is not supported. Only ${supportedGpuArch().join(
|
|
||||||
', '
|
|
||||||
)} series are supported.`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
const janDataFolderPath = await getJanDataFolderPath()
|
|
||||||
const tensorRtVersion = TENSORRT_VERSION
|
|
||||||
const provider = PROVIDER
|
|
||||||
|
|
||||||
return new Promise<void>((resolve, reject) => {
|
|
||||||
// Current directory by default
|
|
||||||
|
|
||||||
const executableFolderPath = path.join(
|
|
||||||
janDataFolderPath,
|
|
||||||
'engines',
|
|
||||||
provider,
|
|
||||||
tensorRtVersion,
|
|
||||||
gpuArch
|
|
||||||
)
|
|
||||||
const nitroExecutablePath = path.join(
|
|
||||||
executableFolderPath,
|
|
||||||
platform === 'win32' ? 'nitro.exe' : 'nitro'
|
|
||||||
)
|
|
||||||
|
|
||||||
const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT]
|
|
||||||
// Execute the binary
|
|
||||||
debugLog(`Spawn nitro at path: ${nitroExecutablePath}, and args: ${args}`)
|
|
||||||
subprocess = spawn(nitroExecutablePath, args, {
|
|
||||||
cwd: executableFolderPath,
|
|
||||||
env: {
|
|
||||||
...process.env,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
// Handle subprocess output
|
|
||||||
subprocess.stdout.on('data', (data: any) => {
|
|
||||||
debugLog(`${data}`)
|
|
||||||
})
|
|
||||||
|
|
||||||
subprocess.stderr.on('data', (data: any) => {
|
|
||||||
debugLog(`${data}`)
|
|
||||||
})
|
|
||||||
|
|
||||||
subprocess.on('close', (code: any) => {
|
|
||||||
debugLog(`Engine exited with code: ${code}`)
|
|
||||||
subprocess = undefined
|
|
||||||
reject(`child process exited with code ${code}`)
|
|
||||||
})
|
|
||||||
|
|
||||||
tcpPortUsed
|
|
||||||
.waitUntilUsed(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 30000)
|
|
||||||
.then(() => {
|
|
||||||
debugLog(`Engine is ready`)
|
|
||||||
resolve()
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
function debugLog(message: string, level: string = 'Debug') {
|
|
||||||
log(`[TENSORRT_LLM_NITRO]::${level}:${message}`)
|
|
||||||
}
|
|
||||||
|
|
||||||
const decompressRunner = async (zipPath: string, output: string) => {
|
|
||||||
console.debug(`Decompressing ${zipPath} to ${output}...`)
|
|
||||||
try {
|
|
||||||
const files = await decompress(zipPath, output)
|
|
||||||
console.debug('Decompress finished!', files)
|
|
||||||
} catch (err) {
|
|
||||||
console.error(`Decompress ${zipPath} failed: ${err}`)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parse prompt template into agrs settings
|
|
||||||
* @param promptTemplate Template as string
|
|
||||||
* @returns
|
|
||||||
*/
|
|
||||||
function promptTemplateConverter(promptTemplate: string): PromptTemplate {
|
|
||||||
// Split the string using the markers
|
|
||||||
const systemMarker = '{system_message}'
|
|
||||||
const promptMarker = '{prompt}'
|
|
||||||
|
|
||||||
if (
|
|
||||||
promptTemplate.includes(systemMarker) &&
|
|
||||||
promptTemplate.includes(promptMarker)
|
|
||||||
) {
|
|
||||||
// Find the indices of the markers
|
|
||||||
const systemIndex = promptTemplate.indexOf(systemMarker)
|
|
||||||
const promptIndex = promptTemplate.indexOf(promptMarker)
|
|
||||||
|
|
||||||
// Extract the parts of the string
|
|
||||||
const system_prompt = promptTemplate.substring(0, systemIndex)
|
|
||||||
const user_prompt = promptTemplate.substring(
|
|
||||||
systemIndex + systemMarker.length,
|
|
||||||
promptIndex
|
|
||||||
)
|
|
||||||
const ai_prompt = promptTemplate.substring(
|
|
||||||
promptIndex + promptMarker.length
|
|
||||||
)
|
|
||||||
|
|
||||||
// Return the split parts
|
|
||||||
return { system_prompt, user_prompt, ai_prompt }
|
|
||||||
} else if (promptTemplate.includes(promptMarker)) {
|
|
||||||
// Extract the parts of the string for the case where only promptMarker is present
|
|
||||||
const promptIndex = promptTemplate.indexOf(promptMarker)
|
|
||||||
const user_prompt = promptTemplate.substring(0, promptIndex)
|
|
||||||
const ai_prompt = promptTemplate.substring(
|
|
||||||
promptIndex + promptMarker.length
|
|
||||||
)
|
|
||||||
|
|
||||||
// Return the split parts
|
|
||||||
return { user_prompt, ai_prompt }
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return an error if none of the conditions are met
|
|
||||||
return { error: 'Cannot split prompt template' }
|
|
||||||
}
|
|
||||||
|
|
||||||
export default {
|
|
||||||
supportedPlatform,
|
|
||||||
supportedGpuArch,
|
|
||||||
decompressRunner,
|
|
||||||
loadModel,
|
|
||||||
unloadModel,
|
|
||||||
dispose: unloadModel,
|
|
||||||
}
|
|
||||||
@ -1,21 +0,0 @@
|
|||||||
{
|
|
||||||
"compilerOptions": {
|
|
||||||
"moduleResolution": "node",
|
|
||||||
"target": "ES2015",
|
|
||||||
"module": "ES2020",
|
|
||||||
"lib": ["es2015", "es2016", "es2017", "dom"],
|
|
||||||
"strict": true,
|
|
||||||
"sourceMap": true,
|
|
||||||
"declaration": true,
|
|
||||||
"allowSyntheticDefaultImports": true,
|
|
||||||
"experimentalDecorators": true,
|
|
||||||
"emitDecoratorMetadata": true,
|
|
||||||
"declarationDir": "dist/types",
|
|
||||||
"outDir": "dist",
|
|
||||||
"importHelpers": true,
|
|
||||||
"resolveJsonModule": true,
|
|
||||||
"typeRoots": ["node_modules/@types"]
|
|
||||||
},
|
|
||||||
"include": ["src"],
|
|
||||||
"exclude": ["**/*.test.ts"]
|
|
||||||
}
|
|
||||||
Loading…
x
Reference in New Issue
Block a user