fix: incompatible GPU error message (#2357)

* fix: incompatible GPU error message * fix: change port
2024-03-14 22:11:55 +07:00 · 2024-03-14 22:11:55 +07:00 · 758afdbeb4
commit 758afdbeb4
parent 441af9dc05
5 changed files with 87 additions and 3 deletions
--- a/extensions/tensorrt-llm-extension/models.json
+++ b/extensions/tensorrt-llm-extension/models.json
@ -45,5 +45,52 @@
      "size": 2151000000
    },
    "engine": "nitro-tensorrt-llm"
  },
  {
    "sources": [
      {
        "filename": "config.json",
        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/config.json"
      },
      {
        "filename": "rank0.engine",
        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/rank0.engine"
      },
      {
        "filename": "tokenizer.model",
        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
      },
      {
        "filename": "special_tokens_map.json",
        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
      },
      {
        "filename": "tokenizer.json",
        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
      },
      {
        "filename": "tokenizer_config.json",
        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
      }
    ],
    "id": "tinyjensen-1.1b-chat-fp16",
    "object": "model",
    "name": "TinyJensen 1.1B Chat FP16",
    "version": "1.0",
    "description": "Do you want to chat with Jensen Huan? Here you are",
    "format": "TensorRT-LLM",
    "settings": {
      "ctx_len": 2048,
      "text_model": false
    },
    "parameters": {
      "max_tokens": 4096
    },
    "metadata": {
      "author": "LLama",
      "tags": ["TensorRT-LLM", "1B", "Finetuned"],
      "size": 2151000000
    },
    "engine": "nitro-tensorrt-llm"
  }
 ]
--- a/extensions/tensorrt-llm-extension/package.json
+++ b/extensions/tensorrt-llm-extension/package.json
@ -1,6 +1,6 @@
 {
  "name": "@janhq/tensorrt-llm-extension",
-  "version": "0.0.2",
+  "version": "0.0.3",
  "description": "Enables accelerated inference leveraging Nvidia's TensorRT-LLM for optimal GPU hardware optimizations. Compatible with models in TensorRT-LLM format. Requires Nvidia GPU driver and CUDA Toolkit installation.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
@ -8,7 +8,7 @@
  "license": "AGPL-3.0",
  "config": {
    "host": "127.0.0.1",
-    "port": "3928"
+    "port": "3929"
  },
  "compatibility": {
    "platform": [
--- a/extensions/tensorrt-llm-extension/src/index.ts
+++ b/extensions/tensorrt-llm-extension/src/index.ts
@ -20,6 +20,7 @@ import {
  LocalOAIEngine,
  fs,
  MessageRequest,
  ModelEvent,
 } from '@janhq/core'
 import models from '../models.json'
@ -127,6 +128,20 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
    events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
  }
  async onModelInit(model: Model): Promise<void> {
    if ((await this.installationState()) === 'Installed')
      return super.onModelInit(model)
    else {
      events.emit(ModelEvent.OnModelFail, {
        ...model,
        error: {
          message: 'EXTENSION_IS_NOT_INSTALLED::TensorRT-LLM extension',
        },
      })
      return
    }
  }
  override async installationState(): Promise<InstallationState> {
    // For now, we just check the executable of nitro x tensor rt
    const isNitroExecutableAvailable = await executeOnMain(
--- a/web/screens/Chat/ErrorMessage/index.tsx
+++ b/web/screens/Chat/ErrorMessage/index.tsx
@ -7,11 +7,14 @@ import ModalTroubleShooting, {
  modalTroubleShootingAtom,
 } from '@/containers/ModalTroubleShoot'
 import { MainViewState } from '@/constants/screens'
 import { loadModelErrorAtom } from '@/hooks/useActiveModel'
 import useSendChatMessage from '@/hooks/useSendChatMessage'
 import { getErrorTitle } from '@/utils/errorMessage'
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
 const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
@ -19,6 +22,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
  const { resendChatMessage } = useSendChatMessage()
  const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
  const loadModelError = useAtomValue(loadModelErrorAtom)
  const setMainState = useSetAtom(mainViewStateAtom)
  const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE'
  const regenerateMessage = async () => {
@ -70,6 +74,23 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
              </p>
              <ModalTroubleShooting />
            </div>
          ) : loadModelError?.includes('EXTENSION_IS_NOT_INSTALLED') ? (
            <div
              key={message.id}
              className="flex w-full flex-col items-center text-center text-sm font-medium text-gray-500"
            >
              <p className="w-[90%]">
                Model is currently unavailable. Please switch to a different
                model or install the{' '}
                <button
                  className="font-medium text-blue-500"
                  onClick={() => setMainState(MainViewState.Settings)}
                >
                  {loadModelError.split('::')[1] ?? ''}
                </button>{' '}
                to continue using it.
              </p>
            </div>
          ) : (
            <div
              key={message.id}
--- a/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
+++ b/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
@ -144,7 +144,8 @@ const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
              </TooltipTrigger>
              <TooltipPortal>
                <TooltipContent side="top">
-                  {compatibility ? (
+                  {compatibility &&
                  !compatibility['platform']?.includes(PLATFORM) ? (
                    <span>
                      Only available on{' '}
                      {compatibility?.platform