From da161cd1596742f77e096a8a6fbd0a1a7824608a Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 23 Apr 2024 15:09:48 +0700 Subject: [PATCH 01/16] fix: override cpu_threads setting from model.json (#2789) --- .../node/api/restful/helper/startStopModel.ts | 4 +- .../assistant-extension/src/node/engine.ts | 2 + .../src/node/index.ts | 3 +- .../monitoring-extension/src/node/logger.ts | 70 +++++++++---------- web/containers/ServerLogs/index.tsx | 2 +- .../Chat/ModelSetting/predefinedComponent.ts | 15 ++++ 6 files changed, 57 insertions(+), 39 deletions(-) diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts index 3af0404e3..bcd182cb5 100644 --- a/core/src/node/api/restful/helper/startStopModel.ts +++ b/core/src/node/api/restful/helper/startStopModel.ts @@ -63,11 +63,11 @@ const runModel = async (modelId: string, settingParams?: ModelSettingParams): Pr const nitroResourceProbe = await getSystemResourceInfo() const nitroModelSettings: NitroModelSettings = { + // This is critical and requires real CPU physical core count (or performance core) + cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), ...modelMetadata.settings, ...settingParams, llama_model_path: modelBinaryPath, - // This is critical and requires real CPU physical core count (or performance core) - cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), ...(modelMetadata.settings.mmproj && { mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj), }), diff --git a/extensions/assistant-extension/src/node/engine.ts b/extensions/assistant-extension/src/node/engine.ts index 17094ffbc..05a380340 100644 --- a/extensions/assistant-extension/src/node/engine.ts +++ b/extensions/assistant-extension/src/node/engine.ts @@ -15,6 +15,8 @@ export const readEmbeddingEngine = (engineName: string) => { const settingDirectoryPath = path.join( getJanDataFolderPath(), 'settings', + '@janhq', + // TODO: James - To be removed engineName === 'openai' ? 'inference-openai-extension' : 'inference-groq-extension', diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts index 7d20ee8c7..fbfdb8761 100644 --- a/extensions/inference-nitro-extension/src/node/index.ts +++ b/extensions/inference-nitro-extension/src/node/index.ts @@ -131,10 +131,11 @@ async function loadModel( if (!llama_model_path) return Promise.reject('No GGUF model file found') currentSettings = { + cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), + // model.settings can override the default settings ...params.model.settings, llama_model_path, // This is critical and requires real CPU physical core count (or performance core) - cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), ...(params.model.settings.mmproj && { mmproj: path.isAbsolute(params.model.settings.mmproj) ? params.model.settings.mmproj diff --git a/extensions/monitoring-extension/src/node/logger.ts b/extensions/monitoring-extension/src/node/logger.ts index 29a391313..ca64ea2d9 100644 --- a/extensions/monitoring-extension/src/node/logger.ts +++ b/extensions/monitoring-extension/src/node/logger.ts @@ -67,54 +67,54 @@ export class FileLogger extends Logger { const size = maxFileSizeBytes ?? 1 * 1024 * 1024 // 1 MB const days = daysToKeep ?? 7 // 7 days const logDirectory = path.join(getJanDataFolderPath(), 'logs') - // Perform log cleaning const currentDate = new Date() - fs.readdir(logDirectory, (err, files) => { - if (err) { - console.error('Error reading log directory:', err) - return - } + if (fs.existsSync(logDirectory)) + fs.readdir(logDirectory, (err, files) => { + if (err) { + console.error('Error reading log directory:', err) + return + } - files.forEach((file) => { - const filePath = path.join(logDirectory, file) - fs.stat(filePath, (err, stats) => { - if (err) { - console.error('Error getting file stats:', err) - return - } + files.forEach((file) => { + const filePath = path.join(logDirectory, file) + fs.stat(filePath, (err, stats) => { + if (err) { + console.error('Error getting file stats:', err) + return + } - // Check size - if (stats.size > size) { - fs.unlink(filePath, (err) => { - if (err) { - console.error('Error deleting log file:', err) - return - } - console.debug( - `Deleted log file due to exceeding size limit: ${filePath}` - ) - }) - } else { - // Check age - const creationDate = new Date(stats.ctime) - const daysDifference = Math.floor( - (currentDate.getTime() - creationDate.getTime()) / - (1000 * 3600 * 24) - ) - if (daysDifference > days) { + // Check size + if (stats.size > size) { fs.unlink(filePath, (err) => { if (err) { console.error('Error deleting log file:', err) return } - console.debug(`Deleted old log file: ${filePath}`) + console.debug( + `Deleted log file due to exceeding size limit: ${filePath}` + ) }) + } else { + // Check age + const creationDate = new Date(stats.ctime) + const daysDifference = Math.floor( + (currentDate.getTime() - creationDate.getTime()) / + (1000 * 3600 * 24) + ) + if (daysDifference > days) { + fs.unlink(filePath, (err) => { + if (err) { + console.error('Error deleting log file:', err) + return + } + console.debug(`Deleted old log file: ${filePath}`) + }) + } } - } + }) }) }) - }) // Schedule the next execution with doubled delays this.timeout = setTimeout( diff --git a/web/containers/ServerLogs/index.tsx b/web/containers/ServerLogs/index.tsx index f423a0873..f03088ae8 100644 --- a/web/containers/ServerLogs/index.tsx +++ b/web/containers/ServerLogs/index.tsx @@ -97,7 +97,7 @@ const ServerLogs = (props: ServerLogsProps) => {
- {logs.length > 1 ? ( + {logs.length > 0 ? (
{logs.slice(-limit).map((log, i) => { diff --git a/web/screens/Chat/ModelSetting/predefinedComponent.ts b/web/screens/Chat/ModelSetting/predefinedComponent.ts index a52214e38..652389d4a 100644 --- a/web/screens/Chat/ModelSetting/predefinedComponent.ts +++ b/web/screens/Chat/ModelSetting/predefinedComponent.ts @@ -165,6 +165,21 @@ export const presetConfiguration: Record = { requireModelReload: true, configType: 'setting', }, + cpu_threads: { + key: 'cpu_threads', + title: 'CPU Threads', + description: + 'Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)', + controllerType: 'slider', + controllerProps: { + min: 0, + max: 128, + step: 1, + value: 1, + }, + requireModelReload: true, + configType: 'setting', + }, // assistant chunk_size: { key: 'chunk_size', From 48f351aefd39bcc52933f602bacb1bc143010e25 Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 24 Apr 2024 14:14:04 +0700 Subject: [PATCH 02/16] fix: new thread with overridden settings (#2798) --- web/containers/DropdownListSidebar/index.tsx | 9 +++++++++ web/hooks/useCreateNewThread.ts | 8 +++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/web/containers/DropdownListSidebar/index.tsx b/web/containers/DropdownListSidebar/index.tsx index 5bb3d29cb..d8e878cca 100644 --- a/web/containers/DropdownListSidebar/index.tsx +++ b/web/containers/DropdownListSidebar/index.tsx @@ -134,10 +134,19 @@ const DropdownListSidebar = ({ } if (activeThread) { + // Default setting ctx_len for the model for a better onboarding experience + // TODO: When Cortex support hardware instructions, we should remove this + const overriddenSettings = + model?.settings.ctx_len && model.settings.ctx_len > 2048 + ? { ctx_len: 2048 } + : {} + const modelParams = { ...model?.parameters, ...model?.settings, + ...overriddenSettings, } + // Update model parameter to the thread state setThreadModelParams(activeThread.id, modelParams) diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts index ef57bc982..e42bc1d4c 100644 --- a/web/hooks/useCreateNewThread.ts +++ b/web/hooks/useCreateNewThread.ts @@ -94,6 +94,11 @@ export const useCreateNewThread = () => { settings: assistant.tools && assistant.tools[0].settings, } + const overriddenSettings = + defaultModel?.settings.ctx_len && defaultModel.settings.ctx_len > 2048 + ? { ctx_len: 2048 } + : {} + const createdAt = Date.now() const assistantInfo: ThreadAssistantInfo = { assistant_id: assistant.id, @@ -101,7 +106,7 @@ export const useCreateNewThread = () => { tools: experimentalEnabled ? [assistantTools] : assistant.tools, model: { id: defaultModel?.id ?? '*', - settings: defaultModel?.settings ?? {}, + settings: { ...defaultModel?.settings, ...overriddenSettings } ?? {}, parameters: defaultModel?.parameters ?? {}, engine: defaultModel?.engine, }, @@ -126,6 +131,7 @@ export const useCreateNewThread = () => { setThreadModelParams(thread.id, { ...defaultModel?.settings, ...defaultModel?.parameters, + ...overriddenSettings, }) // Delete the file upload state From d14c3af99b082ee3dd436aa08624d65f33e13a3d Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:35:05 +0700 Subject: [PATCH 03/16] add: featured --- .../resources/models/command-r-34b/model.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json index a6827b391..eb4986e69 100644 --- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json @@ -27,7 +27,7 @@ }, "metadata": { "author": "CohereAI", - "tags": ["34B", "Finetuned"], + "tags": ["34B", "Finetuned", "Featured"], "size": 21500000000 }, "engine": "nitro" From 3810b1a009fc9270226f6b7553e5a486e7968327 Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:37:48 +0700 Subject: [PATCH 04/16] fix: remove featured --- .../resources/models/hermes-pro-7b/model.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json b/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json index 09c3c8f4e..5d35c3c12 100644 --- a/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json @@ -27,7 +27,7 @@ }, "metadata": { "author": "NousResearch", - "tags": ["7B", "Finetuned", "Featured"], + "tags": ["7B", "Finetuned"], "size": 4370000000 }, "engine": "nitro" From 6bf12e42a8f28568a4815c50350b88bdc2ddd952 Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:38:09 +0700 Subject: [PATCH 05/16] fix: remove featured --- .../resources/models/openhermes-neural-7b/model.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json b/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json index a13a0f2b8..ae20c8af0 100644 --- a/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json @@ -26,7 +26,7 @@ }, "metadata": { "author": "Intel, Jan", - "tags": ["7B", "Merged", "Featured"], + "tags": ["7B", "Merged"], "size": 4370000000, "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/openhermes-neural-7b/cover.png" }, From f5c4324f79fcce9385bf16b8742c7cb02b1e19ac Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:38:30 +0700 Subject: [PATCH 06/16] fix: remove featured --- .../resources/models/stealth-v1.2-7b/model.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json b/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json index 235cbbb88..35ac9f5bb 100644 --- a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json @@ -26,7 +26,7 @@ }, "metadata": { "author": "Jan", - "tags": ["7B", "Finetuned", "Featured"], + "tags": ["7B", "Finetuned"], "size": 4370000000 }, "engine": "nitro" From e076c5ba4e28876397ff45f8089acd5b3f2ac1fb Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:38:47 +0700 Subject: [PATCH 07/16] fix: remove featured --- .../resources/models/trinity-v1.2-7b/model.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json b/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json index 947629642..b2ea4b6cb 100644 --- a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json @@ -26,7 +26,7 @@ }, "metadata": { "author": "Jan", - "tags": ["7B", "Merged", "Featured"], + "tags": ["7B", "Merged"], "size": 4370000000, "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png" }, From 68b0018d55714b8c3ba809209d8a0aa20c76cc0c Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:40:27 +0700 Subject: [PATCH 08/16] fix: version bump --- extensions/inference-nitro-extension/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index dabda9aec..9a98eed8c 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-nitro-extension", "productName": "Nitro Inference Engine", - "version": "1.0.2", + "version": "1.0.3", "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", From 7d4cc67aa19852180c105e70587696072b811187 Mon Sep 17 00:00:00 2001 From: NamH Date: Wed, 24 Apr 2024 20:20:15 +0700 Subject: [PATCH 09/16] fix: stop server if failed to load model (#2807) fix: load model error start server state not update Co-authored-by: James --- web/hooks/useActiveModel.ts | 24 ++++++++++++++++-------- web/screens/LocalServer/index.tsx | 2 +- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts index 1e648f60e..ce182483e 100644 --- a/web/hooks/useActiveModel.ts +++ b/web/hooks/useActiveModel.ts @@ -25,7 +25,7 @@ export const stateModelAtom = atom({ model: undefined, }) -export let loadModelController: AbortController | undefined +const pendingModelLoadAtom = atom(false) export function useActiveModel() { const [activeModel, setActiveModel] = useAtom(activeModelAtom) @@ -33,6 +33,7 @@ export function useActiveModel() { const [stateModel, setStateModel] = useAtom(stateModelAtom) const downloadedModels = useAtomValue(downloadedModelsAtom) const setLoadModelError = useSetAtom(loadModelErrorAtom) + const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom) const downloadedModelsRef = useRef([]) @@ -40,7 +41,7 @@ export function useActiveModel() { downloadedModelsRef.current = downloadedModels }, [downloadedModels]) - const startModel = async (modelId: string) => { + const startModel = async (modelId: string, abortable: boolean = true) => { if ( (activeModel && activeModel.id === modelId) || (stateModel.model?.id === modelId && stateModel.loading) @@ -48,7 +49,7 @@ export function useActiveModel() { console.debug(`Model ${modelId} is already initialized. Ignore..`) return Promise.resolve() } - loadModelController = new AbortController() + setPendingModelLoad(true) let model = downloadedModelsRef?.current.find((e) => e.id === modelId) @@ -107,15 +108,16 @@ export function useActiveModel() { }) }) .catch((error) => { - if (loadModelController?.signal.aborted) - return Promise.reject(new Error('aborted')) - setStateModel(() => ({ state: 'start', loading: false, model, })) + if (!pendingModelLoad && abortable) { + return Promise.reject(new Error('aborted')) + } + toaster({ title: 'Failed!', description: `Model ${model.id} failed to start.`, @@ -139,9 +141,15 @@ export function useActiveModel() { .then(() => { setActiveModel(undefined) setStateModel({ state: 'start', loading: false, model: undefined }) - loadModelController?.abort() + setPendingModelLoad(false) }) - }, [activeModel, setActiveModel, setStateModel, stateModel]) + }, [ + activeModel, + setActiveModel, + setStateModel, + setPendingModelLoad, + stateModel, + ]) const stopInference = useCallback(async () => { // Loading model diff --git a/web/screens/LocalServer/index.tsx b/web/screens/LocalServer/index.tsx index db7baec5a..aa7dbd57c 100644 --- a/web/screens/LocalServer/index.tsx +++ b/web/screens/LocalServer/index.tsx @@ -155,12 +155,12 @@ const LocalServerScreen = () => { isCorsEnabled, isVerboseEnabled, }) - await startModel(selectedModel.id) if (isStarted) setServerEnabled(true) if (firstTimeVisitAPIServer) { localStorage.setItem(FIRST_TIME_VISIT_API_SERVER, 'false') setFirstTimeVisitAPIServer(false) } + startModel(selectedModel.id, false).catch((e) => console.error(e)) } catch (e) { console.error(e) toaster({ From b4b8c862c030c99e6e49b6ef135ea72cafd5f2db Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 24 Apr 2024 21:17:19 +0700 Subject: [PATCH 10/16] fix: infinite talking in auto thread titles generators (#2810) --- web/containers/Providers/EventHandler.tsx | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx index f772dd6cb..e4c96aeb7 100644 --- a/web/containers/Providers/EventHandler.tsx +++ b/web/containers/Providers/EventHandler.tsx @@ -20,6 +20,8 @@ import { ulid } from 'ulidx' import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel' +import { toRuntimeParams } from '@/utils/modelParam' + import { extensionManager } from '@/extension' import { getCurrentChatMessagesAtom, @@ -32,6 +34,7 @@ import { threadsAtom, isGeneratingResponseAtom, updateThreadAtom, + getActiveThreadModelParamsAtom, } from '@/helpers/atoms/Thread.atom' const maxWordForThreadTitle = 10 @@ -54,6 +57,8 @@ export default function EventHandler({ children }: { children: ReactNode }) { const updateThread = useSetAtom(updateThreadAtom) const messagesRef = useRef(messages) const activeModelRef = useRef(activeModel) + const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom) + const activeModelParamsRef = useRef(activeModelParams) useEffect(() => { threadsRef.current = threads @@ -71,6 +76,10 @@ export default function EventHandler({ children }: { children: ReactNode }) { activeModelRef.current = activeModel }, [activeModel]) + useEffect(() => { + activeModelParamsRef.current = activeModelParams + }, [activeModelParams]) + const onNewMessageResponse = useCallback( (message: ThreadMessage) => { if (message.type === MessageRequestType.Thread) { @@ -247,6 +256,8 @@ export default function EventHandler({ children }: { children: ReactNode }) { }, ] + const runtimeParams = toRuntimeParams(activeModelParamsRef.current) + const messageRequest: MessageRequest = { id: msgId, threadId: message.thread_id, @@ -255,6 +266,7 @@ export default function EventHandler({ children }: { children: ReactNode }) { model: { ...activeModelRef.current, parameters: { + ...runtimeParams, stream: false, }, }, From 984838a7bc902866a0ef55e508b5adc3732026e0 Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Wed, 24 Apr 2024 22:19:11 +0700 Subject: [PATCH 11/16] fix: bump version --- .../resources/models/command-r-34b/model.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json index eb4986e69..2f4b5e0dc 100644 --- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json @@ -8,7 +8,7 @@ "id": "command-r-34b", "object": "model", "name": "Command-R v01 34B Q4", - "version": "1.2", + "version": "1.3", "description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.", "format": "gguf", "settings": { From 4d80f5c3c16953f01a0e28cc71a911378ac314f3 Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Wed, 24 Apr 2024 22:22:14 +0700 Subject: [PATCH 12/16] fix: bump version --- .../resources/models/hermes-pro-7b/model.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json b/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json index 5d35c3c12..e478ff4cd 100644 --- a/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json @@ -8,7 +8,7 @@ "id": "hermes-pro-7b", "object": "model", "name": "Hermes Pro 7B Q4", - "version": "1.0", + "version": "1.1", "description": "Hermes Pro is superior in Roleplaying, Reasoning and Explaining problem.", "format": "gguf", "settings": { From ec589b1f224e44d042a3be674839a6d41fa9e7f4 Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Wed, 24 Apr 2024 22:23:40 +0700 Subject: [PATCH 13/16] fix: bump version --- .../resources/models/openhermes-neural-7b/model.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json b/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json index ae20c8af0..dbbc9e0ec 100644 --- a/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json @@ -8,7 +8,7 @@ "id": "openhermes-neural-7b", "object": "model", "name": "OpenHermes Neural 7B Q4", - "version": "1.0", + "version": "1.1", "description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.", "format": "gguf", "settings": { From 785b84d9ec9d0638fda4681a8d675a8cf3270393 Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Wed, 24 Apr 2024 22:24:00 +0700 Subject: [PATCH 14/16] fix: bump version --- .../resources/models/stealth-v1.2-7b/model.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json b/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json index 35ac9f5bb..93fa6b610 100644 --- a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json @@ -8,7 +8,7 @@ "id": "stealth-v1.2-7b", "object": "model", "name": "Stealth 7B Q4", - "version": "1.0", + "version": "1.1", "description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.", "format": "gguf", "settings": { From eb3593e96a5d6009d80549e19b18880821b43f92 Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Wed, 24 Apr 2024 22:24:22 +0700 Subject: [PATCH 15/16] fix: bump version --- .../resources/models/trinity-v1.2-7b/model.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json b/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json index b2ea4b6cb..14444fbd4 100644 --- a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json @@ -8,7 +8,7 @@ "id": "trinity-v1.2-7b", "object": "model", "name": "Trinity-v1.2 7B Q4", - "version": "1.0", + "version": "1.1", "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.", "format": "gguf", "settings": { From f9a8e06a4f916a7b159912d996157f8a85918ca6 Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Wed, 24 Apr 2024 22:46:56 +0700 Subject: [PATCH 16/16] fix: version bump --- extensions/inference-nitro-extension/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 9a98eed8c..3cfdd3338 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-nitro-extension", "productName": "Nitro Inference Engine", - "version": "1.0.3", + "version": "1.0.4", "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js",