fix: disable rag & stream settings from tensorrt model.json (#2351)
This commit is contained in:
parent
8120ad2a03
commit
aab8ee84af
@ -33,10 +33,10 @@
|
||||
"description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM",
|
||||
"format": "TensorRT-LLM",
|
||||
"settings": {
|
||||
"ctx_len": 2048
|
||||
"ctx_len": 2048,
|
||||
"text_model": false
|
||||
},
|
||||
"parameters": {
|
||||
"stream": true,
|
||||
"max_tokens": 4096
|
||||
},
|
||||
"metadata": {
|
||||
|
||||
@ -19,6 +19,7 @@ import {
|
||||
systemInformations,
|
||||
LocalOAIEngine,
|
||||
fs,
|
||||
MessageRequest,
|
||||
} from '@janhq/core'
|
||||
import models from '../models.json'
|
||||
|
||||
@ -144,4 +145,10 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
|
||||
)
|
||||
return Promise.resolve()
|
||||
}
|
||||
|
||||
inference(data: MessageRequest): void {
|
||||
// TensorRT LLM Extension supports streaming only
|
||||
if (data.model) data.model.parameters.stream = true
|
||||
super.inference(data)
|
||||
}
|
||||
}
|
||||
|
||||
@ -244,16 +244,13 @@ const ChatInput: React.FC = () => {
|
||||
<li
|
||||
className={twMerge(
|
||||
'flex w-full cursor-pointer items-center space-x-2 px-4 py-2 text-muted-foreground hover:bg-secondary',
|
||||
activeThread?.assistants[0].model.settings.vision_model &&
|
||||
activeThread?.assistants[0].model.settings
|
||||
.text_model === false
|
||||
activeThread?.assistants[0].model.settings.text_model ===
|
||||
false
|
||||
? 'cursor-not-allowed opacity-50'
|
||||
: 'cursor-pointer'
|
||||
)}
|
||||
onClick={() => {
|
||||
if (
|
||||
!activeThread?.assistants[0].model.settings
|
||||
.vision_model ||
|
||||
activeThread?.assistants[0].model.settings
|
||||
.text_model !== false
|
||||
) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user