diff --git a/extensions/tensorrt-llm-extension/README.md b/extensions/tensorrt-llm-extension/README.md deleted file mode 100644 index 34a670516..000000000 --- a/extensions/tensorrt-llm-extension/README.md +++ /dev/null @@ -1,79 +0,0 @@ -# Tensorrt-LLM Extension - -Created using Jan extension example - -# Create a Jan Extension using Typescript - -Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀 - -## Create Your Own Extension - -To create your own extension, you can use this repository as a template! Just follow the below instructions: - -1. Click the Use this template button at the top of the repository -2. Select Create a new repository -3. Select an owner and name for your new repository -4. Click Create repository -5. Clone your new repository - -## Initial Setup - -After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension. - -> [!NOTE] -> -> You'll need to have a reasonably modern version of -> [Node.js](https://nodejs.org) handy. If you are using a version manager like -> [`nodenv`](https://github.com/nodenv/nodenv) or -> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the -> root of your repository to install the version specified in -> [`package.json`](./package.json). Otherwise, 20.x or later should work! - -1. :hammer_and_wrench: Install the dependencies - - ```bash - npm install - ``` - -1. :building_construction: Package the TypeScript for distribution - - ```bash - npm run bundle - ``` - -1. :white_check_mark: Check your artifact - - There will be a tgz file in your extension directory now - -## Update the Extension Metadata - -The [`package.json`](package.json) file defines metadata about your extension, such as -extension name, main entry, description and version. - -When you copy this repository, update `package.json` with the name, description for your extension. - -## Update the Extension Code - -The [`src/`](./src/) directory is the heart of your extension! This contains the -source code that will be run when your extension functions are invoked. You can replace the -contents of this directory with your own code. - -There are a few things to keep in mind when writing your extension code: - -- Most Jan Extension functions are processed asynchronously. - In `index.ts`, you will see that the extension function will return a `Promise`. - - ```typescript - import { events, MessageEvent, MessageRequest } from '@janhq/core' - - function onStart(): Promise { - return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => - this.inference(data) - ) - } - ``` - - For more information about the Jan Extension Core module, see the - [documentation](https://github.com/janhq/jan/blob/main/core/README.md). - -So, what are you waiting for? Go ahead and start customizing your extension! diff --git a/extensions/tensorrt-llm-extension/jest.config.js b/extensions/tensorrt-llm-extension/jest.config.js deleted file mode 100644 index 3e32adceb..000000000 --- a/extensions/tensorrt-llm-extension/jest.config.js +++ /dev/null @@ -1,9 +0,0 @@ -/** @type {import('ts-jest').JestConfigWithTsJest} */ -module.exports = { - preset: 'ts-jest', - testEnvironment: 'node', - transform: { - 'node_modules/@janhq/core/.+\\.(j|t)s?$': 'ts-jest', - }, - transformIgnorePatterns: ['node_modules/(?!@janhq/core/.*)'], -} diff --git a/extensions/tensorrt-llm-extension/package.json b/extensions/tensorrt-llm-extension/package.json deleted file mode 100644 index 1cc098dde..000000000 --- a/extensions/tensorrt-llm-extension/package.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "name": "@janhq/tensorrt-llm-extension", - "productName": "TensorRT-LLM Inference Engine", - "version": "0.0.3", - "description": "This extension enables Nvidia's TensorRT-LLM for the fastest GPU acceleration. See the [setup guide](https://jan.ai/guides/providers/tensorrt-llm/) for next steps.", - "main": "dist/index.js", - "node": "dist/node/index.cjs.js", - "author": "Jan ", - "license": "AGPL-3.0", - "config": { - "host": "127.0.0.1", - "port": "3929" - }, - "compatibility": { - "platform": [ - "win32" - ], - "app": [ - "0.1.0" - ] - }, - "tensorrtVersion": "0.1.8", - "provider": "nitro-tensorrt-llm", - "scripts": { - "test": "jest", - "build": "rolldown -c rolldown.config.mjs", - "build:publish": "rimraf *.tgz --glob || true && yarn build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install" - }, - "exports": { - ".": "./dist/index.js", - "./main": "./dist/node/index.cjs.js" - }, - "devDependencies": { - "@types/decompress": "4.2.7", - "@types/jest": "^29.5.12", - "@types/node": "^20.11.4", - "@types/os-utils": "^0.0.4", - "@types/tcp-port-used": "^1.0.4", - "cpx": "^1.5.0", - "download-cli": "^1.1.1", - "jest": "^29.7.0", - "jest-junit": "^16.0.0", - "jest-runner": "^29.7.0", - "rimraf": "^3.0.2", - "rolldown": "1.0.0-beta.1", - "run-script-os": "^1.1.6", - "ts-jest": "^29.2.5", - "typescript": "^5.2.2" - }, - "dependencies": { - "@janhq/core": "../../core/package.tgz", - "decompress": "^4.2.1", - "fetch-retry": "^5.0.6", - "rxjs": "^7.8.1", - "tcp-port-used": "^1.0.2", - "terminate": "^2.6.1", - "ulidx": "^2.3.0" - }, - "engines": { - "node": ">=18.0.0" - }, - "files": [ - "dist/*", - "package.json", - "README.md" - ], - "bundleDependencies": [ - "tcp-port-used", - "fetch-retry", - "decompress", - "@janhq/core", - "terminate" - ], - "installConfig": { - "hoistingLimits": "workspaces" - }, - "packageManager": "yarn@4.5.3" -} diff --git a/extensions/tensorrt-llm-extension/resources/models.json b/extensions/tensorrt-llm-extension/resources/models.json deleted file mode 100644 index 387b71104..000000000 --- a/extensions/tensorrt-llm-extension/resources/models.json +++ /dev/null @@ -1,156 +0,0 @@ -[ - { - "sources": [ - { - "filename": "config.json", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/config.json" - }, - { - "filename": "mistral_float16_tp1_rank0.engine", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine" - }, - { - "filename": "tokenizer.model", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.model" - }, - { - "filename": "special_tokens_map.json", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json" - }, - { - "filename": "tokenizer.json", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.json" - }, - { - "filename": "tokenizer_config.json", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json" - }, - { - "filename": "model.cache", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/model.cache" - } - ], - "id": "llamacorn-1.1b-chat-fp16", - "object": "model", - "name": "LlamaCorn 1.1B Chat FP16", - "version": "1.0", - "description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM", - "format": "TensorRT-LLM", - "settings": { - "ctx_len": 2048, - "text_model": false - }, - "parameters": { - "max_tokens": 4096 - }, - "metadata": { - "author": "LLama", - "tags": ["TensorRT-LLM", "1B", "Finetuned"], - "size": 2151000000 - }, - "engine": "nitro-tensorrt-llm" - }, - { - "sources": [ - { - "filename": "config.json", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/config.json" - }, - { - "filename": "mistral_float16_tp1_rank0.engine", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine" - }, - { - "filename": "tokenizer.model", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.model" - }, - { - "filename": "special_tokens_map.json", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json" - }, - { - "filename": "tokenizer.json", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.json" - }, - { - "filename": "tokenizer_config.json", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json" - }, - { - "filename": "model.cache", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/model.cache" - } - ], - "id": "tinyjensen-1.1b-chat-fp16", - "object": "model", - "name": "TinyJensen 1.1B Chat FP16", - "version": "1.0", - "description": "Do you want to chat with Jensen Huan? Here you are", - "format": "TensorRT-LLM", - "settings": { - "ctx_len": 2048, - "text_model": false - }, - "parameters": { - "max_tokens": 4096 - }, - "metadata": { - "author": "LLama", - "tags": ["TensorRT-LLM", "1B", "Finetuned"], - "size": 2151000000 - }, - "engine": "nitro-tensorrt-llm" - }, - { - "sources": [ - { - "filename": "config.json", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/config.json" - }, - { - "filename": "mistral_float16_tp1_rank0.engine", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/mistral_float16_tp1_rank0.engine" - }, - { - "filename": "tokenizer.model", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.model" - }, - { - "filename": "special_tokens_map.json", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/special_tokens_map.json" - }, - { - "filename": "tokenizer.json", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.json" - }, - { - "filename": "tokenizer_config.json", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer_config.json" - }, - { - "filename": "model.cache", - "url": "https://catalog.jan.ai/dist/models///tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/model.cache" - } - ], - "id": "mistral-7b-instruct-int4", - "object": "model", - "name": "Mistral 7B Instruct v0.1 INT4", - "version": "1.0", - "description": "Mistral 7B Instruct v0.1 INT4", - "format": "TensorRT-LLM", - "settings": { - "ctx_len": 2048, - "text_model": false, - "prompt_template": "[INST] {prompt} [/INST]" - }, - "parameters": { - "max_tokens": 4096 - }, - "metadata": { - "author": "MistralAI", - "tags": ["TensorRT-LLM", "7B", "Finetuned"], - "size": 3840000000 - }, - "engine": "nitro-tensorrt-llm" - } -] diff --git a/extensions/tensorrt-llm-extension/rolldown.config.mjs b/extensions/tensorrt-llm-extension/rolldown.config.mjs deleted file mode 100644 index bb02c785b..000000000 --- a/extensions/tensorrt-llm-extension/rolldown.config.mjs +++ /dev/null @@ -1,59 +0,0 @@ -import { defineConfig } from 'rolldown' -import packageJson from './package.json' with { type: 'json' } -import modelsJson from './resources/models.json' with { type: 'json' } - -export default defineConfig([ - { - input: 'src/index.ts', - output: { - format: 'esm', - file: 'dist/index.js', - }, - platform: 'browser', - define: { - MODELS: JSON.stringify(modelsJson), - TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion), - PROVIDER: JSON.stringify(packageJson.provider), - DOWNLOAD_RUNNER_URL: - process.platform === 'win32' - ? JSON.stringify( - 'https://github.com/janhq/cortex.tensorrt-llm/releases/download/windows-v-tensorrt-llm-v0.7.1/nitro-windows-v-tensorrt-llm-v0.7.1-amd64-all-arch.tar.gz' - ) - : JSON.stringify( - 'https://github.com/janhq/cortex.tensorrt-llm/releases/download/linux-v/nitro-linux-v-amd64-tensorrt-llm-.tar.gz' - ), - NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), - INFERENCE_URL: JSON.stringify( - process.env.INFERENCE_URL || - `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/v1/chat/completions` - ), - COMPATIBILITY: JSON.stringify(packageJson.compatibility), - }, - }, - { - input: 'src/node/index.ts', - external: ['@janhq/core/node'], - output: { - format: 'cjs', - file: 'dist/node/index.cjs.js', - sourcemap: false, - inlineDynamicImports: true, - }, - replace: { - TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion), - PROVIDER: JSON.stringify(packageJson.provider), - LOAD_MODEL_URL: JSON.stringify( - `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel` - ), - TERMINATE_ENGINE_URL: JSON.stringify( - `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/processmanager/destroy` - ), - ENGINE_HOST: JSON.stringify(packageJson.config?.host ?? '127.0.0.1'), - ENGINE_PORT: JSON.stringify(packageJson.config?.port ?? '3928'), - }, - resolve: { - extensions: ['.js', '.ts', '.json'], - }, - platform: 'node', - }, -]) diff --git a/extensions/tensorrt-llm-extension/src/@types/global.d.ts b/extensions/tensorrt-llm-extension/src/@types/global.d.ts deleted file mode 100644 index b550080f7..000000000 --- a/extensions/tensorrt-llm-extension/src/@types/global.d.ts +++ /dev/null @@ -1,11 +0,0 @@ -declare const NODE: string -declare const INFERENCE_URL: string -declare const LOAD_MODEL_URL: string -declare const TERMINATE_ENGINE_URL: string -declare const ENGINE_HOST: string -declare const ENGINE_PORT: string -declare const DOWNLOAD_RUNNER_URL: string -declare const TENSORRT_VERSION: string -declare const COMPATIBILITY: object -declare const PROVIDER: string -declare const MODELS: Array diff --git a/extensions/tensorrt-llm-extension/src/index.test.ts b/extensions/tensorrt-llm-extension/src/index.test.ts deleted file mode 100644 index 48d6e71d7..000000000 --- a/extensions/tensorrt-llm-extension/src/index.test.ts +++ /dev/null @@ -1,186 +0,0 @@ -import TensorRTLLMExtension from '../src/index' -import { - executeOnMain, - systemInformation, - fs, - baseName, - joinPath, - downloadFile, -} from '@janhq/core' - -jest.mock('@janhq/core', () => ({ - ...jest.requireActual('@janhq/core/node'), - LocalOAIEngine: jest.fn().mockImplementation(function () { - // @ts-ignore - this.registerModels = () => { - return Promise.resolve() - } - // @ts-ignore - return this - }), - systemInformation: jest.fn(), - fs: { - existsSync: jest.fn(), - mkdir: jest.fn(), - }, - joinPath: jest.fn(), - baseName: jest.fn(), - downloadFile: jest.fn(), - executeOnMain: jest.fn(), - showToast: jest.fn(), - events: { - emit: jest.fn(), - // @ts-ignore - on: (event, func) => { - func({ fileName: './' }) - }, - off: jest.fn(), - }, -})) - -// @ts-ignore -global.COMPATIBILITY = { - platform: ['win32'], -} -// @ts-ignore -global.PROVIDER = 'tensorrt-llm' -// @ts-ignore -global.INFERENCE_URL = 'http://localhost:5000' -// @ts-ignore -global.NODE = 'node' -// @ts-ignore -global.MODELS = [] -// @ts-ignore -global.TENSORRT_VERSION = '' -// @ts-ignore -global.DOWNLOAD_RUNNER_URL = '' - -describe('TensorRTLLMExtension', () => { - let extension: TensorRTLLMExtension - - beforeEach(() => { - // @ts-ignore - extension = new TensorRTLLMExtension() - jest.clearAllMocks() - }) - - describe('compatibility', () => { - it('should return the correct compatibility', () => { - const result = extension.compatibility() - expect(result).toEqual({ - platform: ['win32'], - }) - }) - }) - - describe('install', () => { - it('should install if compatible', async () => { - const mockSystemInfo: any = { - osInfo: { platform: 'win32' }, - gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] }, - } - ;(executeOnMain as jest.Mock).mockResolvedValue({}) - ;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo) - ;(fs.existsSync as jest.Mock).mockResolvedValue(false) - ;(fs.mkdir as jest.Mock).mockResolvedValue(undefined) - ;(baseName as jest.Mock).mockResolvedValue('./') - ;(joinPath as jest.Mock).mockResolvedValue('./') - ;(downloadFile as jest.Mock).mockResolvedValue({}) - - await extension.install() - - expect(executeOnMain).toHaveBeenCalled() - }) - - it('should not install if not compatible', async () => { - const mockSystemInfo: any = { - osInfo: { platform: 'linux' }, - gpuSetting: { gpus: [{ arch: 'pascal', name: 'NVIDIA GPU' }] }, - } - ;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo) - - jest.spyOn(extension, 'registerModels').mockReturnValue(Promise.resolve()) - await extension.install() - - expect(executeOnMain).not.toHaveBeenCalled() - }) - }) - - describe('installationState', () => { - it('should return NotCompatible if not compatible', async () => { - const mockSystemInfo: any = { - osInfo: { platform: 'linux' }, - gpuSetting: { gpus: [{ arch: 'pascal', name: 'NVIDIA GPU' }] }, - } - ;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo) - - const result = await extension.installationState() - - expect(result).toBe('NotCompatible') - }) - - it('should return Installed if executable exists', async () => { - const mockSystemInfo: any = { - osInfo: { platform: 'win32' }, - gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] }, - } - ;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo) - ;(fs.existsSync as jest.Mock).mockResolvedValue(true) - - const result = await extension.installationState() - - expect(result).toBe('Installed') - }) - - it('should return NotInstalled if executable does not exist', async () => { - const mockSystemInfo: any = { - osInfo: { platform: 'win32' }, - gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] }, - } - ;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo) - ;(fs.existsSync as jest.Mock).mockResolvedValue(false) - - const result = await extension.installationState() - - expect(result).toBe('NotInstalled') - }) - }) - - describe('isCompatible', () => { - it('should return true for compatible system', () => { - const mockInfo: any = { - osInfo: { platform: 'win32' }, - gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] }, - } - - const result = extension.isCompatible(mockInfo) - - expect(result).toBe(true) - }) - - it('should return false for incompatible system', () => { - const mockInfo: any = { - osInfo: { platform: 'linux' }, - gpuSetting: { gpus: [{ arch: 'pascal', name: 'AMD GPU' }] }, - } - - const result = extension.isCompatible(mockInfo) - - expect(result).toBe(false) - }) - }) -}) - -describe('GitHub Release File URL Test', () => { - const url = 'https://github.com/janhq/cortex.tensorrt-llm/releases/download/windows-v0.1.8-tensorrt-llm-v0.7.1/nitro-windows-v0.1.8-tensorrt-llm-v0.7.1-amd64-all-arch.tar.gz'; - - it('should return a status code 200 for the release file URL', async () => { - const response = await fetch(url, { method: 'HEAD' }); - expect(response.status).toBe(200); - }); - - it('should not return a 404 status', async () => { - const response = await fetch(url, { method: 'HEAD' }); - expect(response.status).not.toBe(404); - }); -}); diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts deleted file mode 100644 index d9c89242f..000000000 --- a/extensions/tensorrt-llm-extension/src/index.ts +++ /dev/null @@ -1,197 +0,0 @@ -/** - * @module tensorrt-llm-extension/src/index - */ - -import { - Compatibility, - DownloadEvent, - DownloadRequest, - DownloadState, - InstallationState, - baseName, - downloadFile, - events, - executeOnMain, - joinPath, - showToast, - systemInformation, - LocalOAIEngine, - fs, - MessageRequest, - ModelEvent, - getJanDataFolderPath, - SystemInformation, - Model, -} from '@janhq/core' - -/** - * TensorRTLLMExtension - Implementation of LocalOAIEngine - * @extends BaseOAILocalInferenceProvider - * Provide pre-populated models for TensorRTLLM - */ -export default class TensorRTLLMExtension extends LocalOAIEngine { - /** - * Override custom function name for loading and unloading model - * Which are implemented from node module - */ - override provider = PROVIDER - override inferenceUrl = INFERENCE_URL - override nodeModule = NODE - - private supportedGpuArch = ['ampere', 'ada'] - - override compatibility() { - return COMPATIBILITY as unknown as Compatibility - } - - override async onLoad(): Promise { - super.onLoad() - - if ((await this.installationState()) === 'Installed') { - const models = MODELS as unknown as Model[] - this.registerModels(models) - } - } - - override async install(): Promise { - await this.removePopulatedModels() - - const info = await systemInformation() - - if (!this.isCompatible(info)) return - - const janDataFolderPath = await getJanDataFolderPath() - const engineVersion = TENSORRT_VERSION - - const executableFolderPath = await joinPath([ - janDataFolderPath, - 'engines', - this.provider, - engineVersion, - info.gpuSetting?.gpus[0].arch, - ]) - - if (!(await fs.existsSync(executableFolderPath))) { - await fs.mkdir(executableFolderPath) - } - - const placeholderUrl = DOWNLOAD_RUNNER_URL - const tensorrtVersion = TENSORRT_VERSION - - const url = placeholderUrl - .replace(//g, tensorrtVersion) - .replace(//g, info.gpuSetting!.gpus[0]!.arch!) - - const tarball = await baseName(url) - - const tarballFullPath = await joinPath([executableFolderPath, tarball]) - const downloadRequest: DownloadRequest = { - url, - localPath: tarballFullPath, - extensionId: this.name, - downloadType: 'extension', - } - downloadFile(downloadRequest) - - const onFileDownloadSuccess = async (state: DownloadState) => { - // if other download, ignore - if (state.fileName !== tarball) return - events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess) - await executeOnMain( - this.nodeModule, - 'decompressRunner', - tarballFullPath, - executableFolderPath - ) - events.emit(DownloadEvent.onFileUnzipSuccess, state) - - // Prepopulate models as soon as it's ready - const models = MODELS as unknown as Model[] - this.registerModels(models).then(() => { - showToast( - 'Extension installed successfully.', - 'New models are added to Model Hub.' - ) - }) - } - events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess) - } - - private async removePopulatedModels(): Promise { - const models = MODELS as unknown as Model[] - console.debug(`removePopulatedModels`, JSON.stringify(models)) - const janDataFolderPath = await getJanDataFolderPath() - const modelFolderPath = await joinPath([janDataFolderPath, 'models']) - - for (const model of models) { - const modelPath = await joinPath([modelFolderPath, model.id]) - - try { - await fs.rm(modelPath) - } catch (err) { - console.error(`Error removing model ${modelPath}`, err) - } - } - events.emit(ModelEvent.OnModelsUpdate, {}) - } - - override async loadModel(model: Model): Promise { - if ((await this.installationState()) === 'Installed') - return super.loadModel(model) - - throw new Error('EXTENSION_IS_NOT_INSTALLED::TensorRT-LLM extension') - } - - override async installationState(): Promise { - const info = await systemInformation() - - if (!this.isCompatible(info)) return 'NotCompatible' - const firstGpu = info.gpuSetting?.gpus[0] - const janDataFolderPath = await getJanDataFolderPath() - const engineVersion = TENSORRT_VERSION - - const enginePath = await joinPath([ - janDataFolderPath, - 'engines', - this.provider, - engineVersion, - firstGpu.arch, - info.osInfo.platform === 'win32' ? 'nitro.exe' : 'nitro', - ]) - - // For now, we just check the executable of nitro x tensor rt - return (await fs.existsSync(enginePath)) ? 'Installed' : 'NotInstalled' - } - - override stopInference() { - if (!this.loadedModel) return - showToast( - 'Unable to Stop Inference', - 'The model does not support stopping inference.' - ) - return Promise.resolve() - } - - override async inference(data: MessageRequest) { - if (!this.loadedModel) return - // TensorRT LLM Extension supports streaming only - if (data.model && data.model.parameters) data.model.parameters.stream = true - super.inference(data) - } - - isCompatible(info: SystemInformation): info is Required & { - gpuSetting: { gpus: { arch: string }[] } - } { - const firstGpu = info.gpuSetting?.gpus[0] - return ( - !!info.osInfo && - !!info.gpuSetting && - !!firstGpu && - info.gpuSetting.gpus.length > 0 && - this.compatibility().platform.includes(info.osInfo.platform) && - !!firstGpu.arch && - firstGpu.name.toLowerCase().includes('nvidia') && - this.supportedGpuArch.includes(firstGpu.arch) - ) - } -} diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts deleted file mode 100644 index d02427170..000000000 --- a/extensions/tensorrt-llm-extension/src/node/index.ts +++ /dev/null @@ -1,325 +0,0 @@ -import path from 'path' -import { ChildProcessWithoutNullStreams, spawn } from 'child_process' -import tcpPortUsed from 'tcp-port-used' -import fetchRT from 'fetch-retry' -import { - log, - getJanDataFolderPath, - SystemInformation, - PromptTemplate, -} from '@janhq/core/node' -import decompress from 'decompress' -import terminate from 'terminate' - -// Polyfill fetch with retry -const fetchRetry = fetchRT(fetch) - -const supportedPlatform = (): string[] => ['win32', 'linux'] -const supportedGpuArch = (): string[] => ['ampere', 'ada'] -const PORT_CHECK_INTERVAL = 100 - -/** - * The response object for model init operation. - */ -interface ModelLoadParams { - engine_path: string - ctx_len: number -} - -// The subprocess instance for Engine -let subprocess: ChildProcessWithoutNullStreams | undefined = undefined - -/** - * Initializes a engine subprocess to load a machine learning model. - * @param params - The model load settings. - */ -async function loadModel( - params: any, - systemInfo?: SystemInformation -): Promise<{ error: Error | undefined }> { - // modelFolder is the absolute path to the running model folder - // e.g. ~/jan/models/llama-2 - let modelFolder = params.modelFolder - - if (params.model.settings?.prompt_template) { - const promptTemplate = params.model.settings.prompt_template - const prompt = promptTemplateConverter(promptTemplate) - if (prompt?.error) { - return Promise.reject(prompt.error) - } - params.model.settings.system_prompt = prompt.system_prompt - params.model.settings.user_prompt = prompt.user_prompt - params.model.settings.ai_prompt = prompt.ai_prompt - } - - const settings: ModelLoadParams = { - engine_path: modelFolder, - ctx_len: params.model.settings.ctx_len ?? 2048, - ...params.model.settings, - } - if (!systemInfo) { - throw new Error('Cannot get system info. Unable to start nitro x tensorrt.') - } - return runEngineAndLoadModel(settings, systemInfo) -} - -/** - * Stops a Engine subprocess. - */ -function unloadModel(): Promise { - const controller = new AbortController() - setTimeout(() => controller.abort(), 5000) - debugLog(`Request to kill engine`) - - const killRequest = () => { - return fetch(TERMINATE_ENGINE_URL, { - method: 'DELETE', - signal: controller.signal, - }) - .then(() => { - subprocess = undefined - }) - .catch(() => {}) // Do nothing with this attempt - .then(() => - tcpPortUsed.waitUntilFree( - parseInt(ENGINE_PORT), - PORT_CHECK_INTERVAL, - 5000 - ) - ) // Wait for port available - .then(() => debugLog(`Engine process is terminated`)) - .catch((err) => { - debugLog( - `Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}` - ) - throw 'PORT_NOT_AVAILABLE' - }) - } - - if (subprocess?.pid) { - log(`[CORTEX]:: Killing PID ${subprocess.pid}`) - const pid = subprocess.pid - return new Promise((resolve, reject) => { - terminate(pid, function (err) { - if (err) { - return killRequest() - } else { - return tcpPortUsed - .waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000) - .then(() => resolve()) - .then(() => log(`[CORTEX]:: cortex process is terminated`)) - .catch(() => { - killRequest() - }) - } - }) - }) - } else { - return killRequest() - } -} -/** - * 1. Spawn engine process - * 2. Load model into engine subprocess - * @returns - */ -async function runEngineAndLoadModel( - settings: ModelLoadParams, - systemInfo: SystemInformation -) { - return unloadModel() - .then(() => runEngine(systemInfo)) - .then(() => loadModelRequest(settings)) - .catch((err) => { - // TODO: Broadcast error so app could display proper error message - debugLog(`${err}`, 'Error') - return { error: err } - }) -} - -/** - * Loads a LLM model into the Engine subprocess by sending a HTTP POST request. - */ -async function loadModelRequest( - settings: ModelLoadParams -): Promise<{ error: Error | undefined }> { - debugLog(`Loading model with params ${JSON.stringify(settings)}`) - return fetchRetry(LOAD_MODEL_URL, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify(settings), - retries: 3, - retryDelay: 500, - }) - .then((res) => { - debugLog(`Load model success with response ${JSON.stringify(res)}`) - return Promise.resolve({ error: undefined }) - }) - .catch((err) => { - debugLog(`Load model failed with error ${err}`, 'Error') - return Promise.resolve({ error: err }) - }) -} - -/** - * Spawns engine subprocess. - */ -async function runEngine(systemInfo: SystemInformation): Promise { - debugLog(`Spawning engine subprocess...`) - if (systemInfo.gpuSetting == null) { - return Promise.reject( - 'No GPU information found. Please check your GPU setting.' - ) - } - - if (systemInfo.gpuSetting?.gpus.length === 0) { - return Promise.reject('No GPU found. Please check your GPU setting.') - } - - if (systemInfo.osInfo == null) { - return Promise.reject( - 'No OS information found. Please check your OS setting.' - ) - } - const platform = systemInfo.osInfo.platform - if (platform == null || supportedPlatform().includes(platform) === false) { - return Promise.reject( - 'No OS architecture found. Please check your OS setting.' - ) - } - - const gpu = systemInfo.gpuSetting?.gpus[0] - if (gpu.name.toLowerCase().includes('nvidia') === false) { - return Promise.reject('No Nvidia GPU found. Please check your GPU setting.') - } - const gpuArch = gpu.arch - if (gpuArch == null || supportedGpuArch().includes(gpuArch) === false) { - return Promise.reject( - `Your GPU: ${gpu.name} is not supported. Only ${supportedGpuArch().join( - ', ' - )} series are supported.` - ) - } - const janDataFolderPath = await getJanDataFolderPath() - const tensorRtVersion = TENSORRT_VERSION - const provider = PROVIDER - - return new Promise((resolve, reject) => { - // Current directory by default - - const executableFolderPath = path.join( - janDataFolderPath, - 'engines', - provider, - tensorRtVersion, - gpuArch - ) - const nitroExecutablePath = path.join( - executableFolderPath, - platform === 'win32' ? 'nitro.exe' : 'nitro' - ) - - const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT] - // Execute the binary - debugLog(`Spawn nitro at path: ${nitroExecutablePath}, and args: ${args}`) - subprocess = spawn(nitroExecutablePath, args, { - cwd: executableFolderPath, - env: { - ...process.env, - }, - }) - - // Handle subprocess output - subprocess.stdout.on('data', (data: any) => { - debugLog(`${data}`) - }) - - subprocess.stderr.on('data', (data: any) => { - debugLog(`${data}`) - }) - - subprocess.on('close', (code: any) => { - debugLog(`Engine exited with code: ${code}`) - subprocess = undefined - reject(`child process exited with code ${code}`) - }) - - tcpPortUsed - .waitUntilUsed(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 30000) - .then(() => { - debugLog(`Engine is ready`) - resolve() - }) - }) -} - -function debugLog(message: string, level: string = 'Debug') { - log(`[TENSORRT_LLM_NITRO]::${level}:${message}`) -} - -const decompressRunner = async (zipPath: string, output: string) => { - console.debug(`Decompressing ${zipPath} to ${output}...`) - try { - const files = await decompress(zipPath, output) - console.debug('Decompress finished!', files) - } catch (err) { - console.error(`Decompress ${zipPath} failed: ${err}`) - } -} - -/** - * Parse prompt template into agrs settings - * @param promptTemplate Template as string - * @returns - */ -function promptTemplateConverter(promptTemplate: string): PromptTemplate { - // Split the string using the markers - const systemMarker = '{system_message}' - const promptMarker = '{prompt}' - - if ( - promptTemplate.includes(systemMarker) && - promptTemplate.includes(promptMarker) - ) { - // Find the indices of the markers - const systemIndex = promptTemplate.indexOf(systemMarker) - const promptIndex = promptTemplate.indexOf(promptMarker) - - // Extract the parts of the string - const system_prompt = promptTemplate.substring(0, systemIndex) - const user_prompt = promptTemplate.substring( - systemIndex + systemMarker.length, - promptIndex - ) - const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length - ) - - // Return the split parts - return { system_prompt, user_prompt, ai_prompt } - } else if (promptTemplate.includes(promptMarker)) { - // Extract the parts of the string for the case where only promptMarker is present - const promptIndex = promptTemplate.indexOf(promptMarker) - const user_prompt = promptTemplate.substring(0, promptIndex) - const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length - ) - - // Return the split parts - return { user_prompt, ai_prompt } - } - - // Return an error if none of the conditions are met - return { error: 'Cannot split prompt template' } -} - -export default { - supportedPlatform, - supportedGpuArch, - decompressRunner, - loadModel, - unloadModel, - dispose: unloadModel, -} diff --git a/extensions/tensorrt-llm-extension/tsconfig.json b/extensions/tensorrt-llm-extension/tsconfig.json deleted file mode 100644 index 94465ebb6..000000000 --- a/extensions/tensorrt-llm-extension/tsconfig.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "compilerOptions": { - "moduleResolution": "node", - "target": "ES2015", - "module": "ES2020", - "lib": ["es2015", "es2016", "es2017", "dom"], - "strict": true, - "sourceMap": true, - "declaration": true, - "allowSyntheticDefaultImports": true, - "experimentalDecorators": true, - "emitDecoratorMetadata": true, - "declarationDir": "dist/types", - "outDir": "dist", - "importHelpers": true, - "resolveJsonModule": true, - "typeRoots": ["node_modules/@types"] - }, - "include": ["src"], - "exclude": ["**/*.test.ts"] -}