From aab8ee84afd02f6e3da3ead0d9d5c27bcd1fe50f Mon Sep 17 00:00:00 2001 From: Louis Date: Thu, 14 Mar 2024 19:04:31 +0700 Subject: [PATCH] fix: disable rag & stream settings from tensorrt model.json (#2351) --- extensions/tensorrt-llm-extension/models.json | 4 ++-- extensions/tensorrt-llm-extension/src/index.ts | 7 +++++++ web/screens/Chat/ChatInput/index.tsx | 7 ++----- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/extensions/tensorrt-llm-extension/models.json b/extensions/tensorrt-llm-extension/models.json index bc6a78256..31bb11a9e 100644 --- a/extensions/tensorrt-llm-extension/models.json +++ b/extensions/tensorrt-llm-extension/models.json @@ -33,10 +33,10 @@ "description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM", "format": "TensorRT-LLM", "settings": { - "ctx_len": 2048 + "ctx_len": 2048, + "text_model": false }, "parameters": { - "stream": true, "max_tokens": 4096 }, "metadata": { diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts index 076951c3f..e3014b447 100644 --- a/extensions/tensorrt-llm-extension/src/index.ts +++ b/extensions/tensorrt-llm-extension/src/index.ts @@ -19,6 +19,7 @@ import { systemInformations, LocalOAIEngine, fs, + MessageRequest, } from '@janhq/core' import models from '../models.json' @@ -144,4 +145,10 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { ) return Promise.resolve() } + + inference(data: MessageRequest): void { + // TensorRT LLM Extension supports streaming only + if (data.model) data.model.parameters.stream = true + super.inference(data) + } } diff --git a/web/screens/Chat/ChatInput/index.tsx b/web/screens/Chat/ChatInput/index.tsx index c90a12cd2..8707e8bcd 100644 --- a/web/screens/Chat/ChatInput/index.tsx +++ b/web/screens/Chat/ChatInput/index.tsx @@ -244,16 +244,13 @@ const ChatInput: React.FC = () => {
  • { if ( - !activeThread?.assistants[0].model.settings - .vision_model || activeThread?.assistants[0].model.settings .text_model !== false ) {