diff --git a/core/src/browser/extensions/engines/helpers/sse.ts b/core/src/browser/extensions/engines/helpers/sse.ts index aaafbf7e5..55cde56b4 100644 --- a/core/src/browser/extensions/engines/helpers/sse.ts +++ b/core/src/browser/extensions/engines/helpers/sse.ts @@ -91,8 +91,12 @@ export function requestInference( const toParse = cachedLines + line if (!line.includes('data: [DONE]')) { const data = JSON.parse(toParse.replace('data: ', '')) - if ('error' in data) { - subscriber.error(data.error) + if ( + 'error' in data || + 'message' in data || + 'detail' in data + ) { + subscriber.error(data.error ?? data) subscriber.complete() return } diff --git a/extensions/engine-management-extension/models/anthropic.json b/extensions/engine-management-extension/models/anthropic.json index d35ba4c22..46b5893d1 100644 --- a/extensions/engine-management-extension/models/anthropic.json +++ b/extensions/engine-management-extension/models/anthropic.json @@ -8,7 +8,7 @@ "inference_params": { "max_tokens": 4096, "temperature": 0.7, - "stream": false + "stream": true }, "engine": "anthropic" }, @@ -21,7 +21,7 @@ "inference_params": { "max_tokens": 8192, "temperature": 0.7, - "stream": false + "stream": true }, "engine": "anthropic" }, diff --git a/extensions/engine-management-extension/models/martian.json b/extensions/engine-management-extension/models/martian.json index b935587cc..9ce7b69ba 100644 --- a/extensions/engine-management-extension/models/martian.json +++ b/extensions/engine-management-extension/models/martian.json @@ -5,7 +5,7 @@ "name": "Martian Model Router", "version": "1.0", "description": "Martian Model Router dynamically routes requests to the best LLM in real-time", - "parameters": { + "inference_params": { "max_tokens": 4096, "temperature": 0.7, "top_p": 0.95, diff --git a/extensions/engine-management-extension/models/mistral.json b/extensions/engine-management-extension/models/mistral.json index 47833a31c..12fcf938d 100644 --- a/extensions/engine-management-extension/models/mistral.json +++ b/extensions/engine-management-extension/models/mistral.json @@ -5,7 +5,7 @@ "name": "Mistral Small", "version": "1.1", "description": "Mistral Small is the ideal choice for simple tasks (Classification, Customer Support, or Text Generation) at an affordable price.", - "parameters": { + "inference_params": { "max_tokens": 32000, "temperature": 0.7, "top_p": 0.95, @@ -19,7 +19,7 @@ "name": "Mistral Large", "version": "1.1", "description": "Mistral Large is ideal for complex tasks (Synthetic Text Generation, Code Generation, RAG, or Agents).", - "parameters": { + "inference_params": { "max_tokens": 32000, "temperature": 0.7, "top_p": 0.95, @@ -33,7 +33,7 @@ "name": "Mixtral 8x22B", "version": "1.1", "description": "Mixtral 8x22B is a high-performance, cost-effective model designed for complex tasks.", - "parameters": { + "inference_params": { "max_tokens": 32000, "temperature": 0.7, "top_p": 0.95, diff --git a/extensions/engine-management-extension/models/nvidia.json b/extensions/engine-management-extension/models/nvidia.json index f2adac779..dfce9f8bc 100644 --- a/extensions/engine-management-extension/models/nvidia.json +++ b/extensions/engine-management-extension/models/nvidia.json @@ -5,7 +5,7 @@ "name": "Mistral 7B", "version": "1.1", "description": "Mistral 7B with NVIDIA", - "parameters": { + "inference_params": { "max_tokens": 1024, "temperature": 0.3, "top_p": 1, diff --git a/extensions/engine-management-extension/models/openrouter.json b/extensions/engine-management-extension/models/openrouter.json index 5ac189a81..b9714bb57 100644 --- a/extensions/engine-management-extension/models/openrouter.json +++ b/extensions/engine-management-extension/models/openrouter.json @@ -5,7 +5,7 @@ "name": "OpenRouter", "version": "1.0", "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.", - "parameters": { + "inference_params": { "max_tokens": 128000, "temperature": 0.7, "top_p": 0.95, diff --git a/extensions/engine-management-extension/resources/anthropic.json b/extensions/engine-management-extension/resources/anthropic.json index 81d6c99c0..12a3f08b8 100644 --- a/extensions/engine-management-extension/resources/anthropic.json +++ b/extensions/engine-management-extension/resources/anthropic.json @@ -15,7 +15,7 @@ }, "transform_resp": { "chat_completions": { - "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": \"{{ input_request.model }}\", \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"{{ input_request.role }}\", \"content\": \"{% if input_request.content and input_request.content.0.type == \"text\" %} \"{{input_request.content.0.text}}\" {% endif %}\", \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.stop_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.usage.input_tokens }}, \"completion_tokens\": {{ input_request.usage.output_tokens }}, \"total_tokens\": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 }, \"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}" + "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": \"{{ input_request.model }}\", \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"{{ input_request.role }}\", \"content\": {% if input_request.content and input_request.content.0.type == \"text\" %} \"{{input_request.content.0.text}}\" {% else %} null {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.stop_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.usage.input_tokens }}, \"completion_tokens\": {{ input_request.usage.output_tokens }}, \"total_tokens\": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 }, \"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}" } } } diff --git a/extensions/engine-management-extension/resources/mistral.json b/extensions/engine-management-extension/resources/mistral.json index 40a77cbf1..3f447dc4c 100644 --- a/extensions/engine-management-extension/resources/mistral.json +++ b/extensions/engine-management-extension/resources/mistral.json @@ -15,7 +15,7 @@ }, "transform_resp": { "chat_completions": { - "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" + "template": "{{tojson(input_request)}}" } } } diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt index fad0ae17f..d3b342f1a 100644 --- a/extensions/inference-cortex-extension/bin/version.txt +++ b/extensions/inference-cortex-extension/bin/version.txt @@ -1 +1 @@ -1.0.9-rc3 +1.0.9-rc4 diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx index 06eebea92..7a7dd1e24 100644 --- a/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx +++ b/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx @@ -1,19 +1,20 @@ import { Tooltip, Button, Badge } from '@janhq/joi' -import { useAtom, useAtomValue } from 'jotai' +import { useAtom } from 'jotai' import { useActiveModel } from '@/hooks/useActiveModel' +import { useGetEngines } from '@/hooks/useEngineManagement' + import { toGibibytes } from '@/utils/converter' import { isLocalEngine } from '@/utils/modelEngine' -import { installedEnginesAtom } from '@/helpers/atoms/Engines.atom' import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom' const TableActiveModel = () => { const { activeModel, stateModel, stopModel } = useActiveModel() - const engines = useAtomValue(installedEnginesAtom) + const { engines } = useGetEngines() const [serverEnabled, setServerEnabled] = useAtom(serverEnabledAtom) @@ -21,9 +22,7 @@ const TableActiveModel = () => {
| model.id === modelId)
- setSelectedModel(model)
+ if (model) {
+ if (
+ engines?.[model.engine]?.[0]?.type === 'local' ||
+ (engines?.[model.engine]?.[0]?.api_key?.length ?? 0) > 0
+ )
+ setSelectedModel(model)
+ } else {
+ setSelectedModel(undefined)
+ }
}, [
recommendedModel,
activeThread,
downloadedModels,
setSelectedModel,
activeAssistant?.model?.id,
+ engines,
])
const isLocalEngine = useCallback(
diff --git a/web/containers/Providers/DataLoader.tsx b/web/containers/Providers/DataLoader.tsx
index 01093e4b2..832e47d1a 100644
--- a/web/containers/Providers/DataLoader.tsx
+++ b/web/containers/Providers/DataLoader.tsx
@@ -2,11 +2,18 @@
import { Fragment, useEffect } from 'react'
-import { AppConfiguration, getUserHomePath } from '@janhq/core'
+import {
+ AppConfiguration,
+ EngineEvent,
+ events,
+ getUserHomePath,
+} from '@janhq/core'
import { useSetAtom } from 'jotai'
+import { useDebouncedCallback } from 'use-debounce'
+
import useAssistants from '@/hooks/useAssistants'
-import useEngines from '@/hooks/useEngines'
+import { useGetEngines } from '@/hooks/useEngineManagement'
import useGetSystemResources from '@/hooks/useGetSystemResources'
import useModels from '@/hooks/useModels'
import useThreads from '@/hooks/useThreads'
@@ -26,7 +33,7 @@ const DataLoader: React.FC = () => {
const setJanDefaultDataFolder = useSetAtom(defaultJanDataFolderAtom)
const setJanSettingScreen = useSetAtom(janSettingScreenAtom)
const { getData: loadModels } = useModels()
- const { getData: loadEngines } = useEngines()
+ const { mutate } = useGetEngines()
useThreads()
useAssistants()
@@ -35,9 +42,19 @@ const DataLoader: React.FC = () => {
useEffect(() => {
// Load data once
loadModels()
- loadEngines()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
+ const reloadData = useDebouncedCallback(() => {
+ mutate()
+ }, 300)
+
+ useEffect(() => {
+ events.on(EngineEvent.OnEngineUpdate, reloadData)
+ return () => {
+ // Remove listener on unmount
+ events.off(EngineEvent.OnEngineUpdate, reloadData)
+ }
+ }, [reloadData])
useEffect(() => {
window.core?.api
diff --git a/web/containers/Providers/ModelHandler.tsx b/web/containers/Providers/ModelHandler.tsx
index 32182804c..cc7a0da80 100644
--- a/web/containers/Providers/ModelHandler.tsx
+++ b/web/containers/Providers/ModelHandler.tsx
@@ -23,6 +23,8 @@ import { ulid } from 'ulidx'
import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
+import { useGetEngines } from '@/hooks/useEngineManagement'
+
import { isLocalEngine } from '@/utils/modelEngine'
import { extensionManager } from '@/extension'
@@ -34,7 +36,6 @@ import {
deleteMessageAtom,
subscribedGeneratingMessageAtom,
} from '@/helpers/atoms/ChatMessage.atom'
-import { installedEnginesAtom } from '@/helpers/atoms/Engines.atom'
import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
import {
updateThreadWaitingForResponseAtom,
@@ -75,7 +76,7 @@ export default function ModelHandler() {
const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
const activeModelParamsRef = useRef(activeModelParams)
const setTokenSpeed = useSetAtom(tokenSpeedAtom)
- const engines = useAtomValue(installedEnginesAtom)
+ const { engines } = useGetEngines()
useEffect(() => {
activeThreadRef.current = activeThread
@@ -336,7 +337,8 @@ export default function ModelHandler() {
// Check model engine; we don't want to generate a title when it's not a local engine. remote model using first promp
if (
- !isLocalEngine(engines, activeModelRef.current?.engine as InferenceEngine)
+ activeModelRef.current?.engine !== InferenceEngine.cortex &&
+ activeModelRef.current?.engine !== InferenceEngine.cortex_llamacpp
) {
const updatedThread: Thread = {
...thread,
@@ -396,9 +398,7 @@ export default function ModelHandler() {
// 2. Update the title with the result of the inference
setTimeout(() => {
- const engine = EngineManager.instance().get(
- messageRequest.model?.engine ?? activeModelRef.current?.engine ?? ''
- )
+ const engine = EngineManager.instance().get(InferenceEngine.cortex)
engine?.inference(messageRequest)
}, 1000)
}
diff --git a/web/containers/Providers/SWRConfigProvider.tsx b/web/containers/Providers/SWRConfigProvider.tsx
new file mode 100644
index 000000000..346385343
--- /dev/null
+++ b/web/containers/Providers/SWRConfigProvider.tsx
@@ -0,0 +1,30 @@
+'use client'
+
+import * as React from 'react'
+
+import { SWRConfig } from 'swr'
+
+function SWRConfigProvider({ children }: { children: React.ReactNode }) {
+ // https://swr.vercel.app/docs/advanced/cache#localstorage-based-persistent-cache
+ // When initializing, we restore the data from `localStorage` into a map.
+
+ const map = React.useMemo(() => new Map
diff --git a/web/screens/LocalServer/LocalServerLeftPanel/index.tsx b/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
index a5250b013..99c2d7488 100644
--- a/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
@@ -1,6 +1,11 @@
import { Fragment, useCallback, useState } from 'react'
-import { EngineManager, Model, ModelSettingParams } from '@janhq/core'
+import {
+ EngineManager,
+ InferenceEngine,
+ Model,
+ ModelSettingParams,
+} from '@janhq/core'
import { Button, Tooltip, Select, Input, Checkbox } from '@janhq/joi'
import { useAtom, useAtomValue, useSetAtom } from 'jotai'
@@ -94,7 +99,7 @@ const LocalServerLeftPanel = () => {
localStorage.setItem(FIRST_TIME_VISIT_API_SERVER, 'false')
setFirstTimeVisitAPIServer(false)
}
- const engine = EngineManager.instance().get((model as Model).engine)
+ const engine = EngineManager.instance().get(InferenceEngine.cortex)
engine?.loadModel(model as Model)
// startModel(selectedModel.id, false).catch((e) => console.error(e))
setIsLoading(false)
diff --git a/web/screens/Settings/Engines/RemoteEngineSettings.tsx b/web/screens/Settings/Engines/RemoteEngineSettings.tsx
index 5396b86e0..cb05f7f2b 100644
--- a/web/screens/Settings/Engines/RemoteEngineSettings.tsx
+++ b/web/screens/Settings/Engines/RemoteEngineSettings.tsx
@@ -1,11 +1,14 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
/* eslint-disable react/no-unescaped-entities */
+/* eslint-disable @typescript-eslint/no-unused-vars */
import React, { useCallback, useRef, useState, useEffect } from 'react'
import {
EngineConfig as OriginalEngineConfig,
InferenceEngine,
+ events,
+ EngineEvent,
} from '@janhq/core'
interface EngineConfig extends OriginalEngineConfig {
@@ -64,6 +67,7 @@ const RemoteEngineSettings = ({
set(updatedEngine, field, value)
await updateEngine(name, updatedEngine)
mutate()
+ events.emit(EngineEvent.OnEngineUpdate, {})
}, 300)
},
[engine, name, mutate]
@@ -115,6 +119,8 @@ const RemoteEngineSettings = ({
}
}, [engine])
+ if (!engine) return null
+
return (
diff --git a/web/screens/Settings/Engines/index.tsx b/web/screens/Settings/Engines/index.tsx
index a79ba5be6..4ad155939 100644
--- a/web/screens/Settings/Engines/index.tsx
+++ b/web/screens/Settings/Engines/index.tsx
@@ -4,16 +4,16 @@ import { InferenceEngine } from '@janhq/core'
import { ScrollArea } from '@janhq/joi'
import { useAtomValue } from 'jotai'
+import { useGetEngines } from '@/hooks/useEngineManagement'
+
import { isLocalEngine } from '@/utils/modelEngine'
import LocalEngineItems from './LocalEngineItem'
import ModalAddRemoteEngine from './ModalAddRemoteEngine'
import RemoteEngineItems from './RemoteEngineItem'
-import { installedEnginesAtom } from '@/helpers/atoms/Engines.atom'
-
const Engines = () => {
- const engines = useAtomValue(installedEnginesAtom)
+ const { engines } = useGetEngines()
return (
diff --git a/web/screens/Settings/SettingDetail/index.tsx b/web/screens/Settings/SettingDetail/index.tsx
index 1e4b79282..d4a2c4d82 100644
--- a/web/screens/Settings/SettingDetail/index.tsx
+++ b/web/screens/Settings/SettingDetail/index.tsx
@@ -1,6 +1,8 @@
import { InferenceEngine } from '@janhq/core'
import { useAtomValue } from 'jotai'
+import { useGetEngines } from '@/hooks/useEngineManagement'
+
import Advanced from '@/screens/Settings/Advanced'
import AppearanceOptions from '@/screens/Settings/Appearance'
import ExtensionCatalog from '@/screens/Settings/CoreExtensions'
@@ -14,12 +16,11 @@ import Privacy from '@/screens/Settings/Privacy'
import { isLocalEngine } from '@/utils/modelEngine'
-import { installedEnginesAtom } from '@/helpers/atoms/Engines.atom'
import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
const SettingDetail = () => {
const selectedSetting = useAtomValue(selectedSettingAtom)
- const engines = useAtomValue(installedEnginesAtom)
+ const { engines } = useGetEngines()
switch (selectedSetting) {
case 'Engines':
diff --git a/web/screens/Settings/SettingLeftPanel/index.tsx b/web/screens/Settings/SettingLeftPanel/index.tsx
index 564ffc6d2..db4c9108c 100644
--- a/web/screens/Settings/SettingLeftPanel/index.tsx
+++ b/web/screens/Settings/SettingLeftPanel/index.tsx
@@ -6,12 +6,13 @@ import { useAtomValue } from 'jotai'
import LeftPanelContainer from '@/containers/LeftPanelContainer'
+import { useGetEngines } from '@/hooks/useEngineManagement'
+
import { getTitleByEngine, isLocalEngine } from '@/utils/modelEngine'
import SettingItem from './SettingItem'
import { extensionManager } from '@/extension'
-import { installedEnginesAtom } from '@/helpers/atoms/Engines.atom'
import {
showSettingActiveLocalEngineAtom,
@@ -20,7 +21,7 @@ import {
import { janSettingScreenAtom } from '@/helpers/atoms/Setting.atom'
const SettingLeftPanel = () => {
- const engines = useAtomValue(installedEnginesAtom)
+ const { engines } = useGetEngines()
const settingScreens = useAtomValue(janSettingScreenAtom)
const showSettingActiveLocalEngine = useAtomValue(
diff --git a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
index d04f9b233..1ecb6f7fd 100644
--- a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
@@ -88,6 +88,7 @@ const AssistantSetting: React.FC
)
})}
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
index e70047d05..457758749 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
@@ -22,6 +22,7 @@ import { currentPromptAtom, fileUploadAtom } from '@/containers/Providers/Jotai'
import { useActiveModel } from '@/hooks/useActiveModel'
+import { useGetEngines } from '@/hooks/useEngineManagement'
import useSendChatMessage from '@/hooks/useSendChatMessage'
import { uploader } from '@/utils/file'
@@ -35,7 +36,6 @@ import RichTextEditor from './RichTextEditor'
import { showRightPanelAtom } from '@/helpers/atoms/App.atom'
import { experimentalFeatureEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
-import { installedEnginesAtom } from '@/helpers/atoms/Engines.atom'
import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
import { spellCheckAtom } from '@/helpers/atoms/Setting.atom'
import {
@@ -64,7 +64,7 @@ const ChatInput = () => {
const textareaRef = useRef {
- setMainViewState(MainViewState.Settings)
- setSelectedSetting(
- remoteEngine as InferenceEngine
- )
- }}
- >
- {engineLogo && (
-
+ )
+ })}
- {getTitleByEngine( - remoteEngine as InferenceEngine + return ( + {
+ setMainViewState(MainViewState.Settings)
+ setSelectedSetting(
+ remoteEngine as InferenceEngine
+ )
+ }}
+ >
+ {engineLogo && (
+
- )
- })}
+
+ + {getTitleByEngine( + remoteEngine as InferenceEngine + )} + + |