diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts index 3af0404e3..bcd182cb5 100644 --- a/core/src/node/api/restful/helper/startStopModel.ts +++ b/core/src/node/api/restful/helper/startStopModel.ts @@ -63,11 +63,11 @@ const runModel = async (modelId: string, settingParams?: ModelSettingParams): Pr const nitroResourceProbe = await getSystemResourceInfo() const nitroModelSettings: NitroModelSettings = { + // This is critical and requires real CPU physical core count (or performance core) + cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), ...modelMetadata.settings, ...settingParams, llama_model_path: modelBinaryPath, - // This is critical and requires real CPU physical core count (or performance core) - cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), ...(modelMetadata.settings.mmproj && { mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj), }), diff --git a/extensions/assistant-extension/src/node/engine.ts b/extensions/assistant-extension/src/node/engine.ts index 17094ffbc..05a380340 100644 --- a/extensions/assistant-extension/src/node/engine.ts +++ b/extensions/assistant-extension/src/node/engine.ts @@ -15,6 +15,8 @@ export const readEmbeddingEngine = (engineName: string) => { const settingDirectoryPath = path.join( getJanDataFolderPath(), 'settings', + '@janhq', + // TODO: James - To be removed engineName === 'openai' ? 'inference-openai-extension' : 'inference-groq-extension', diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index dabda9aec..3cfdd3338 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-nitro-extension", "productName": "Nitro Inference Engine", - "version": "1.0.2", + "version": "1.0.4", "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json index a6827b391..2f4b5e0dc 100644 --- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json @@ -8,7 +8,7 @@ "id": "command-r-34b", "object": "model", "name": "Command-R v01 34B Q4", - "version": "1.2", + "version": "1.3", "description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.", "format": "gguf", "settings": { @@ -27,7 +27,7 @@ }, "metadata": { "author": "CohereAI", - "tags": ["34B", "Finetuned"], + "tags": ["34B", "Finetuned", "Featured"], "size": 21500000000 }, "engine": "nitro" diff --git a/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json b/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json index 09c3c8f4e..e478ff4cd 100644 --- a/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json @@ -8,7 +8,7 @@ "id": "hermes-pro-7b", "object": "model", "name": "Hermes Pro 7B Q4", - "version": "1.0", + "version": "1.1", "description": "Hermes Pro is superior in Roleplaying, Reasoning and Explaining problem.", "format": "gguf", "settings": { @@ -27,7 +27,7 @@ }, "metadata": { "author": "NousResearch", - "tags": ["7B", "Finetuned", "Featured"], + "tags": ["7B", "Finetuned"], "size": 4370000000 }, "engine": "nitro" diff --git a/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json b/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json index a13a0f2b8..dbbc9e0ec 100644 --- a/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json @@ -8,7 +8,7 @@ "id": "openhermes-neural-7b", "object": "model", "name": "OpenHermes Neural 7B Q4", - "version": "1.0", + "version": "1.1", "description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.", "format": "gguf", "settings": { @@ -26,7 +26,7 @@ }, "metadata": { "author": "Intel, Jan", - "tags": ["7B", "Merged", "Featured"], + "tags": ["7B", "Merged"], "size": 4370000000, "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/openhermes-neural-7b/cover.png" }, diff --git a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json b/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json index 235cbbb88..93fa6b610 100644 --- a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json @@ -8,7 +8,7 @@ "id": "stealth-v1.2-7b", "object": "model", "name": "Stealth 7B Q4", - "version": "1.0", + "version": "1.1", "description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.", "format": "gguf", "settings": { @@ -26,7 +26,7 @@ }, "metadata": { "author": "Jan", - "tags": ["7B", "Finetuned", "Featured"], + "tags": ["7B", "Finetuned"], "size": 4370000000 }, "engine": "nitro" diff --git a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json b/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json index 947629642..14444fbd4 100644 --- a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json @@ -8,7 +8,7 @@ "id": "trinity-v1.2-7b", "object": "model", "name": "Trinity-v1.2 7B Q4", - "version": "1.0", + "version": "1.1", "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.", "format": "gguf", "settings": { @@ -26,7 +26,7 @@ }, "metadata": { "author": "Jan", - "tags": ["7B", "Merged", "Featured"], + "tags": ["7B", "Merged"], "size": 4370000000, "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png" }, diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts index 7d20ee8c7..fbfdb8761 100644 --- a/extensions/inference-nitro-extension/src/node/index.ts +++ b/extensions/inference-nitro-extension/src/node/index.ts @@ -131,10 +131,11 @@ async function loadModel( if (!llama_model_path) return Promise.reject('No GGUF model file found') currentSettings = { + cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), + // model.settings can override the default settings ...params.model.settings, llama_model_path, // This is critical and requires real CPU physical core count (or performance core) - cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), ...(params.model.settings.mmproj && { mmproj: path.isAbsolute(params.model.settings.mmproj) ? params.model.settings.mmproj diff --git a/extensions/monitoring-extension/src/node/logger.ts b/extensions/monitoring-extension/src/node/logger.ts index 29a391313..ca64ea2d9 100644 --- a/extensions/monitoring-extension/src/node/logger.ts +++ b/extensions/monitoring-extension/src/node/logger.ts @@ -67,54 +67,54 @@ export class FileLogger extends Logger { const size = maxFileSizeBytes ?? 1 * 1024 * 1024 // 1 MB const days = daysToKeep ?? 7 // 7 days const logDirectory = path.join(getJanDataFolderPath(), 'logs') - // Perform log cleaning const currentDate = new Date() - fs.readdir(logDirectory, (err, files) => { - if (err) { - console.error('Error reading log directory:', err) - return - } + if (fs.existsSync(logDirectory)) + fs.readdir(logDirectory, (err, files) => { + if (err) { + console.error('Error reading log directory:', err) + return + } - files.forEach((file) => { - const filePath = path.join(logDirectory, file) - fs.stat(filePath, (err, stats) => { - if (err) { - console.error('Error getting file stats:', err) - return - } + files.forEach((file) => { + const filePath = path.join(logDirectory, file) + fs.stat(filePath, (err, stats) => { + if (err) { + console.error('Error getting file stats:', err) + return + } - // Check size - if (stats.size > size) { - fs.unlink(filePath, (err) => { - if (err) { - console.error('Error deleting log file:', err) - return - } - console.debug( - `Deleted log file due to exceeding size limit: ${filePath}` - ) - }) - } else { - // Check age - const creationDate = new Date(stats.ctime) - const daysDifference = Math.floor( - (currentDate.getTime() - creationDate.getTime()) / - (1000 * 3600 * 24) - ) - if (daysDifference > days) { + // Check size + if (stats.size > size) { fs.unlink(filePath, (err) => { if (err) { console.error('Error deleting log file:', err) return } - console.debug(`Deleted old log file: ${filePath}`) + console.debug( + `Deleted log file due to exceeding size limit: ${filePath}` + ) }) + } else { + // Check age + const creationDate = new Date(stats.ctime) + const daysDifference = Math.floor( + (currentDate.getTime() - creationDate.getTime()) / + (1000 * 3600 * 24) + ) + if (daysDifference > days) { + fs.unlink(filePath, (err) => { + if (err) { + console.error('Error deleting log file:', err) + return + } + console.debug(`Deleted old log file: ${filePath}`) + }) + } } - } + }) }) }) - }) // Schedule the next execution with doubled delays this.timeout = setTimeout( diff --git a/web/containers/DropdownListSidebar/index.tsx b/web/containers/DropdownListSidebar/index.tsx index 5bb3d29cb..d8e878cca 100644 --- a/web/containers/DropdownListSidebar/index.tsx +++ b/web/containers/DropdownListSidebar/index.tsx @@ -134,10 +134,19 @@ const DropdownListSidebar = ({ } if (activeThread) { + // Default setting ctx_len for the model for a better onboarding experience + // TODO: When Cortex support hardware instructions, we should remove this + const overriddenSettings = + model?.settings.ctx_len && model.settings.ctx_len > 2048 + ? { ctx_len: 2048 } + : {} + const modelParams = { ...model?.parameters, ...model?.settings, + ...overriddenSettings, } + // Update model parameter to the thread state setThreadModelParams(activeThread.id, modelParams) diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx index f772dd6cb..e4c96aeb7 100644 --- a/web/containers/Providers/EventHandler.tsx +++ b/web/containers/Providers/EventHandler.tsx @@ -20,6 +20,8 @@ import { ulid } from 'ulidx' import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel' +import { toRuntimeParams } from '@/utils/modelParam' + import { extensionManager } from '@/extension' import { getCurrentChatMessagesAtom, @@ -32,6 +34,7 @@ import { threadsAtom, isGeneratingResponseAtom, updateThreadAtom, + getActiveThreadModelParamsAtom, } from '@/helpers/atoms/Thread.atom' const maxWordForThreadTitle = 10 @@ -54,6 +57,8 @@ export default function EventHandler({ children }: { children: ReactNode }) { const updateThread = useSetAtom(updateThreadAtom) const messagesRef = useRef(messages) const activeModelRef = useRef(activeModel) + const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom) + const activeModelParamsRef = useRef(activeModelParams) useEffect(() => { threadsRef.current = threads @@ -71,6 +76,10 @@ export default function EventHandler({ children }: { children: ReactNode }) { activeModelRef.current = activeModel }, [activeModel]) + useEffect(() => { + activeModelParamsRef.current = activeModelParams + }, [activeModelParams]) + const onNewMessageResponse = useCallback( (message: ThreadMessage) => { if (message.type === MessageRequestType.Thread) { @@ -247,6 +256,8 @@ export default function EventHandler({ children }: { children: ReactNode }) { }, ] + const runtimeParams = toRuntimeParams(activeModelParamsRef.current) + const messageRequest: MessageRequest = { id: msgId, threadId: message.thread_id, @@ -255,6 +266,7 @@ export default function EventHandler({ children }: { children: ReactNode }) { model: { ...activeModelRef.current, parameters: { + ...runtimeParams, stream: false, }, }, diff --git a/web/containers/ServerLogs/index.tsx b/web/containers/ServerLogs/index.tsx index f423a0873..f03088ae8 100644 --- a/web/containers/ServerLogs/index.tsx +++ b/web/containers/ServerLogs/index.tsx @@ -97,7 +97,7 @@ const ServerLogs = (props: ServerLogsProps) => {
{logs.slice(-limit).map((log, i) => {
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 1e648f60e..ce182483e 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -25,7 +25,7 @@ export const stateModelAtom = atom({
model: undefined,
})
-export let loadModelController: AbortController | undefined
+const pendingModelLoadAtom = atom(false)
export function useActiveModel() {
const [activeModel, setActiveModel] = useAtom(activeModelAtom)
@@ -33,6 +33,7 @@ export function useActiveModel() {
const [stateModel, setStateModel] = useAtom(stateModelAtom)
const downloadedModels = useAtomValue(downloadedModelsAtom)
const setLoadModelError = useSetAtom(loadModelErrorAtom)
+ const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom)
const downloadedModelsRef = useRef([])
@@ -40,7 +41,7 @@ export function useActiveModel() {
downloadedModelsRef.current = downloadedModels
}, [downloadedModels])
- const startModel = async (modelId: string) => {
+ const startModel = async (modelId: string, abortable: boolean = true) => {
if (
(activeModel && activeModel.id === modelId) ||
(stateModel.model?.id === modelId && stateModel.loading)
@@ -48,7 +49,7 @@ export function useActiveModel() {
console.debug(`Model ${modelId} is already initialized. Ignore..`)
return Promise.resolve()
}
- loadModelController = new AbortController()
+ setPendingModelLoad(true)
let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
@@ -107,15 +108,16 @@ export function useActiveModel() {
})
})
.catch((error) => {
- if (loadModelController?.signal.aborted)
- return Promise.reject(new Error('aborted'))
-
setStateModel(() => ({
state: 'start',
loading: false,
model,
}))
+ if (!pendingModelLoad && abortable) {
+ return Promise.reject(new Error('aborted'))
+ }
+
toaster({
title: 'Failed!',
description: `Model ${model.id} failed to start.`,
@@ -139,9 +141,15 @@ export function useActiveModel() {
.then(() => {
setActiveModel(undefined)
setStateModel({ state: 'start', loading: false, model: undefined })
- loadModelController?.abort()
+ setPendingModelLoad(false)
})
- }, [activeModel, setActiveModel, setStateModel, stateModel])
+ }, [
+ activeModel,
+ setActiveModel,
+ setStateModel,
+ setPendingModelLoad,
+ stateModel,
+ ])
const stopInference = useCallback(async () => {
// Loading model
diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts
index ef57bc982..e42bc1d4c 100644
--- a/web/hooks/useCreateNewThread.ts
+++ b/web/hooks/useCreateNewThread.ts
@@ -94,6 +94,11 @@ export const useCreateNewThread = () => {
settings: assistant.tools && assistant.tools[0].settings,
}
+ const overriddenSettings =
+ defaultModel?.settings.ctx_len && defaultModel.settings.ctx_len > 2048
+ ? { ctx_len: 2048 }
+ : {}
+
const createdAt = Date.now()
const assistantInfo: ThreadAssistantInfo = {
assistant_id: assistant.id,
@@ -101,7 +106,7 @@ export const useCreateNewThread = () => {
tools: experimentalEnabled ? [assistantTools] : assistant.tools,
model: {
id: defaultModel?.id ?? '*',
- settings: defaultModel?.settings ?? {},
+ settings: { ...defaultModel?.settings, ...overriddenSettings } ?? {},
parameters: defaultModel?.parameters ?? {},
engine: defaultModel?.engine,
},
@@ -126,6 +131,7 @@ export const useCreateNewThread = () => {
setThreadModelParams(thread.id, {
...defaultModel?.settings,
...defaultModel?.parameters,
+ ...overriddenSettings,
})
// Delete the file upload state
diff --git a/web/screens/Chat/ModelSetting/predefinedComponent.ts b/web/screens/Chat/ModelSetting/predefinedComponent.ts
index a52214e38..652389d4a 100644
--- a/web/screens/Chat/ModelSetting/predefinedComponent.ts
+++ b/web/screens/Chat/ModelSetting/predefinedComponent.ts
@@ -165,6 +165,21 @@ export const presetConfiguration: Record = {
requireModelReload: true,
configType: 'setting',
},
+ cpu_threads: {
+ key: 'cpu_threads',
+ title: 'CPU Threads',
+ description:
+ 'Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)',
+ controllerType: 'slider',
+ controllerProps: {
+ min: 0,
+ max: 128,
+ step: 1,
+ value: 1,
+ },
+ requireModelReload: true,
+ configType: 'setting',
+ },
// assistant
chunk_size: {
key: 'chunk_size',
diff --git a/web/screens/LocalServer/index.tsx b/web/screens/LocalServer/index.tsx
index db7baec5a..aa7dbd57c 100644
--- a/web/screens/LocalServer/index.tsx
+++ b/web/screens/LocalServer/index.tsx
@@ -155,12 +155,12 @@ const LocalServerScreen = () => {
isCorsEnabled,
isVerboseEnabled,
})
- await startModel(selectedModel.id)
if (isStarted) setServerEnabled(true)
if (firstTimeVisitAPIServer) {
localStorage.setItem(FIRST_TIME_VISIT_API_SERVER, 'false')
setFirstTimeVisitAPIServer(false)
}
+ startModel(selectedModel.id, false).catch((e) => console.error(e))
} catch (e) {
console.error(e)
toaster({