diff --git a/README.md b/README.md index b25a917d7..6f6044b30 100644 --- a/README.md +++ b/README.md @@ -76,31 +76,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
jan.exe
Intel
M1/M2
jan.deb
jan.AppImage
diff --git a/extensions/monitoring-extension/src/node/index.ts b/extensions/monitoring-extension/src/node/index.ts
index 1d65704de..25f151112 100644
--- a/extensions/monitoring-extension/src/node/index.ts
+++ b/extensions/monitoring-extension/src/node/index.ts
@@ -2,17 +2,17 @@ import { GpuSetting, GpuSettingInfo, ResourceInfo } from '@janhq/core'
import { getJanDataFolderPath, log } from '@janhq/core/node'
import { mem, cpu } from 'node-os-utils'
import { exec } from 'child_process'
-import { writeFileSync, existsSync, readFileSync } from 'fs'
+import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
import path from 'path'
+/**
+ * Path to the settings directory
+ **/
+export const SETTINGS_DIR = path.join(getJanDataFolderPath(), 'settings')
/**
* Path to the settings file
**/
-export const GPU_INFO_FILE = path.join(
- getJanDataFolderPath(),
- 'settings',
- 'settings.json'
-)
+export const GPU_INFO_FILE = path.join(SETTINGS_DIR, 'settings.json')
/**
* Default GPU settings
@@ -136,6 +136,11 @@ export const updateNvidiaInfo = async () => {
try {
JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
} catch (error) {
+ if (!existsSync(SETTINGS_DIR)) {
+ mkdirSync(SETTINGS_DIR, {
+ recursive: true,
+ })
+ }
writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
}
diff --git a/extensions/tensorrt-llm-extension/models.json b/extensions/tensorrt-llm-extension/models.json
index bc6a78256..31bb11a9e 100644
--- a/extensions/tensorrt-llm-extension/models.json
+++ b/extensions/tensorrt-llm-extension/models.json
@@ -33,10 +33,10 @@
"description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM",
"format": "TensorRT-LLM",
"settings": {
- "ctx_len": 2048
+ "ctx_len": 2048,
+ "text_model": false
},
"parameters": {
- "stream": true,
"max_tokens": 4096
},
"metadata": {
diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts
index 076951c3f..e3014b447 100644
--- a/extensions/tensorrt-llm-extension/src/index.ts
+++ b/extensions/tensorrt-llm-extension/src/index.ts
@@ -19,6 +19,7 @@ import {
systemInformations,
LocalOAIEngine,
fs,
+ MessageRequest,
} from '@janhq/core'
import models from '../models.json'
@@ -144,4 +145,10 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
)
return Promise.resolve()
}
+
+ inference(data: MessageRequest): void {
+ // TensorRT LLM Extension supports streaming only
+ if (data.model) data.model.parameters.stream = true
+ super.inference(data)
+ }
}
diff --git a/web/screens/Chat/ChatInput/index.tsx b/web/screens/Chat/ChatInput/index.tsx
index c90a12cd2..8707e8bcd 100644
--- a/web/screens/Chat/ChatInput/index.tsx
+++ b/web/screens/Chat/ChatInput/index.tsx
@@ -244,16 +244,13 @@ const ChatInput: React.FC = () => {