diff --git a/extensions/tensorrt-llm-extension/models.json b/extensions/tensorrt-llm-extension/models.json index 31bb11a9e..30f345f47 100644 --- a/extensions/tensorrt-llm-extension/models.json +++ b/extensions/tensorrt-llm-extension/models.json @@ -45,5 +45,52 @@ "size": 2151000000 }, "engine": "nitro-tensorrt-llm" + }, + { + "sources": [ + { + "filename": "config.json", + "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/config.json" + }, + { + "filename": "rank0.engine", + "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/rank0.engine" + }, + { + "filename": "tokenizer.model", + "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer.model" + }, + { + "filename": "special_tokens_map.json", + "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json" + }, + { + "filename": "tokenizer.json", + "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer.json" + }, + { + "filename": "tokenizer_config.json", + "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json" + } + ], + "id": "tinyjensen-1.1b-chat-fp16", + "object": "model", + "name": "TinyJensen 1.1B Chat FP16", + "version": "1.0", + "description": "Do you want to chat with Jensen Huan? Here you are", + "format": "TensorRT-LLM", + "settings": { + "ctx_len": 2048, + "text_model": false + }, + "parameters": { + "max_tokens": 4096 + }, + "metadata": { + "author": "LLama", + "tags": ["TensorRT-LLM", "1B", "Finetuned"], + "size": 2151000000 + }, + "engine": "nitro-tensorrt-llm" } ] diff --git a/extensions/tensorrt-llm-extension/package.json b/extensions/tensorrt-llm-extension/package.json index 01ff3e2c6..96ede4a56 100644 --- a/extensions/tensorrt-llm-extension/package.json +++ b/extensions/tensorrt-llm-extension/package.json @@ -1,6 +1,6 @@ { "name": "@janhq/tensorrt-llm-extension", - "version": "0.0.2", + "version": "0.0.3", "description": "Enables accelerated inference leveraging Nvidia's TensorRT-LLM for optimal GPU hardware optimizations. Compatible with models in TensorRT-LLM format. Requires Nvidia GPU driver and CUDA Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", @@ -8,7 +8,7 @@ "license": "AGPL-3.0", "config": { "host": "127.0.0.1", - "port": "3928" + "port": "3929" }, "compatibility": { "platform": [ diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts index e3014b447..cd85601dd 100644 --- a/extensions/tensorrt-llm-extension/src/index.ts +++ b/extensions/tensorrt-llm-extension/src/index.ts @@ -20,6 +20,7 @@ import { LocalOAIEngine, fs, MessageRequest, + ModelEvent, } from '@janhq/core' import models from '../models.json' @@ -127,6 +128,20 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess) } + async onModelInit(model: Model): Promise { + if ((await this.installationState()) === 'Installed') + return super.onModelInit(model) + else { + events.emit(ModelEvent.OnModelFail, { + ...model, + error: { + message: 'EXTENSION_IS_NOT_INSTALLED::TensorRT-LLM extension', + }, + }) + return + } + } + override async installationState(): Promise { // For now, we just check the executable of nitro x tensor rt const isNitroExecutableAvailable = await executeOnMain( diff --git a/web/screens/Chat/ErrorMessage/index.tsx b/web/screens/Chat/ErrorMessage/index.tsx index 25cec1cb9..5be87a59d 100644 --- a/web/screens/Chat/ErrorMessage/index.tsx +++ b/web/screens/Chat/ErrorMessage/index.tsx @@ -7,11 +7,14 @@ import ModalTroubleShooting, { modalTroubleShootingAtom, } from '@/containers/ModalTroubleShoot' +import { MainViewState } from '@/constants/screens' + import { loadModelErrorAtom } from '@/hooks/useActiveModel' import useSendChatMessage from '@/hooks/useSendChatMessage' import { getErrorTitle } from '@/utils/errorMessage' +import { mainViewStateAtom } from '@/helpers/atoms/App.atom' import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom' const ErrorMessage = ({ message }: { message: ThreadMessage }) => { @@ -19,6 +22,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => { const { resendChatMessage } = useSendChatMessage() const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom) const loadModelError = useAtomValue(loadModelErrorAtom) + const setMainState = useSetAtom(mainViewStateAtom) const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE' const regenerateMessage = async () => { @@ -70,6 +74,23 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {

+ ) : loadModelError?.includes('EXTENSION_IS_NOT_INSTALLED') ? ( +
+

+ Model is currently unavailable. Please switch to a different + model or install the{' '} + {' '} + to continue using it. +

+
) : (
= ({ item }) => { - {compatibility ? ( + {compatibility && + !compatibility['platform']?.includes(PLATFORM) ? ( Only available on{' '} {compatibility?.platform