diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index a49b1a852..626d53696 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -62,6 +62,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { cache_type: string = 'q8' cpu_threads?: number auto_unload_models: boolean = true + reasoning_budget = -1 // Default reasoning budget in seconds /** * The URL for making inference requests. */ @@ -230,8 +231,6 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { const loadedModels = await this.activeModels() - console.log('Loaded models:', loadedModels) - // This is to avoid loading the same model multiple times if (loadedModels.some((e: { id: string }) => e.id === model.id)) { console.log(`Model ${model.id} already loaded`) @@ -269,6 +268,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { ...(this.cont_batching && this.n_parallel && this.n_parallel > 1 ? { cont_batching: this.cont_batching } : {}), + ...(model.id.toLowerCase().includes('jan-nano') + ? { reasoning_budget: 0 } + : { reasoning_budget: this.reasoning_budget }), + ...{ 'no-context-shift': true }, }, timeout: false, signal, diff --git a/src-tauri/src/core/mcp.rs b/src-tauri/src/core/mcp.rs index 642505486..f9509c8e5 100644 --- a/src-tauri/src/core/mcp.rs +++ b/src-tauri/src/core/mcp.rs @@ -377,7 +377,12 @@ pub async fn call_tool( }); return match timeout(MCP_TOOL_CALL_TIMEOUT, tool_call).await { - Ok(result) => result.map_err(|e| e.to_string()), + Ok(result) => { + match result { + Ok(ok_result) => Ok(ok_result), + Err(e) => Err(e.to_string()), + } + } Err(_) => Err(format!( "Tool call '{}' timed out after {} seconds", tool_name, diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx index af5da384c..a83adc59e 100644 --- a/web-app/src/containers/ChatInput.tsx +++ b/web-app/src/containers/ChatInput.tsx @@ -36,6 +36,7 @@ import { ModelLoader } from '@/containers/loaders/ModelLoader' import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable' import { getConnectedServers } from '@/services/mcp' import { stopAllModels } from '@/services/models' +import { useOutOfContextPromiseModal } from './dialogs/OutOfContextDialog' type ChatInputProps = { className?: string @@ -60,6 +61,8 @@ const ChatInput = ({ const { t } = useTranslation() const { spellCheckChatInput } = useGeneralSetting() const { tokenSpeed } = useAppState() + const { showModal, PromiseModal: OutOfContextModal } = + useOutOfContextPromiseModal() const maxRows = 10 const { selectedModel } = useModelProvider() @@ -110,7 +113,7 @@ const ChatInput = ({ return } setMessage('') - sendMessage(prompt) + sendMessage(prompt, showModal) } useEffect(() => { @@ -611,6 +614,7 @@ const ChatInput = ({ )} + ) } diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx index f067a4431..833846db1 100644 --- a/web-app/src/containers/ThreadContent.tsx +++ b/web-app/src/containers/ThreadContent.tsx @@ -1,6 +1,6 @@ import { ThreadMessage } from '@janhq/core' import { RenderMarkdown } from './RenderMarkdown' -import { Fragment, memo, useCallback, useMemo, useState } from 'react' +import React, { Fragment, memo, useCallback, useMemo, useState } from 'react' import { IconCopy, IconCopyCheck, @@ -79,6 +79,8 @@ export const ThreadContent = memo( showAssistant?: boolean // eslint-disable-next-line @typescript-eslint/no-explicit-any streamTools?: any + contextOverflowModal?: React.ReactNode | null + showContextOverflowModal?: () => Promise } ) => { const [message, setMessage] = useState(item.content?.[0]?.text?.value || '') @@ -129,7 +131,10 @@ export const ThreadContent = memo( } if (toSendMessage) { deleteMessage(toSendMessage.thread_id, toSendMessage.id ?? '') - sendMessage(toSendMessage.content?.[0]?.text?.value || '') + sendMessage( + toSendMessage.content?.[0]?.text?.value || '', + item.showContextOverflowModal + ) } }, [deleteMessage, getMessages, item, sendMessage]) @@ -162,15 +167,25 @@ export const ThreadContent = memo( const editMessage = useCallback( (messageId: string) => { const threadMessages = getMessages(item.thread_id) + const index = threadMessages.findIndex((msg) => msg.id === messageId) if (index === -1) return + // Delete all messages after the edited message for (let i = threadMessages.length - 1; i >= index; i--) { deleteMessage(threadMessages[i].thread_id, threadMessages[i].id) } - sendMessage(message) + + sendMessage(message, item.showContextOverflowModal) }, - [deleteMessage, getMessages, item.thread_id, message, sendMessage] + [ + deleteMessage, + getMessages, + item.thread_id, + message, + sendMessage, + item.showContextOverflowModal, + ] ) const isToolCalls = @@ -445,6 +460,7 @@ export const ThreadContent = memo( {image.detail &&

{image.detail}

} )} + {item.contextOverflowModal && item.contextOverflowModal} ) } diff --git a/web-app/src/containers/dialogs/OutOfContextDialog.tsx b/web-app/src/containers/dialogs/OutOfContextDialog.tsx new file mode 100644 index 000000000..fb01d7907 --- /dev/null +++ b/web-app/src/containers/dialogs/OutOfContextDialog.tsx @@ -0,0 +1,104 @@ +import { t } from 'i18next' +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from '@/components/ui/dialog' + +import { ReactNode, useCallback, useState } from 'react' +import { Button } from '@/components/ui/button' + +export function useOutOfContextPromiseModal() { + const [isOpen, setIsOpen] = useState(false) + const [modalProps, setModalProps] = useState<{ + resolveRef: ((value: unknown) => void) | null + }>({ + resolveRef: null, + }) + // Function to open the modal and return a Promise + const showModal = useCallback(() => { + return new Promise((resolve) => { + setModalProps({ + resolveRef: resolve, + }) + setIsOpen(true) + }) + }, []) + + const PromiseModal = useCallback((): ReactNode => { + if (!isOpen) { + return null + } + + const handleConfirm = () => { + setIsOpen(false) + if (modalProps.resolveRef) { + modalProps.resolveRef(true) + } + } + + const handleCancel = () => { + setIsOpen(false) + if (modalProps.resolveRef) { + modalProps.resolveRef(false) + } + } + + return ( + { + setIsOpen(open) + if (!open) handleCancel() + }} + > + + + + {t('outOfContextError.title', 'Out of context error')} + + + + {t( + 'outOfContextError.description', + 'This chat is reaching the AI’s memory limit, like a whiteboard filling up. We can expand the memory window (called context size) so it remembers more, but it may use more of your computer’s memory.' + )} +
+
+ {t( + 'outOfContextError.increaseContextSizeDescription', + 'Do you want to increase the context size?' + )} +
+ + + + +
+
+ ) + }, [isOpen, modalProps]) + return { showModal, PromiseModal } +} diff --git a/web-app/src/hooks/useAppState.ts b/web-app/src/hooks/useAppState.ts index ace57d8d2..dc29f7f8a 100644 --- a/web-app/src/hooks/useAppState.ts +++ b/web-app/src/hooks/useAppState.ts @@ -12,6 +12,7 @@ type AppState = { abortControllers: Record tokenSpeed?: TokenSpeed currentToolCall?: ChatCompletionMessageToolCall + showOutOfContextDialog?: boolean setServerStatus: (value: 'running' | 'stopped' | 'pending') => void updateStreamingContent: (content: ThreadMessage | undefined) => void updateCurrentToolCall: ( @@ -22,6 +23,7 @@ type AppState = { setAbortController: (threadId: string, controller: AbortController) => void updateTokenSpeed: (message: ThreadMessage) => void resetTokenSpeed: () => void + setOutOfContextDialog: (show: boolean) => void } export const useAppState = create()((set) => ({ @@ -99,4 +101,9 @@ export const useAppState = create()((set) => ({ set({ tokenSpeed: undefined, }), + setOutOfContextDialog: (show) => { + set(() => ({ + showOutOfContextDialog: show, + })) + }, })) diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts index 3073ececf..164555563 100644 --- a/web-app/src/hooks/useChat.ts +++ b/web-app/src/hooks/useChat.ts @@ -24,10 +24,11 @@ import { getTools } from '@/services/mcp' import { MCPTool } from '@/types/completion' import { listen } from '@tauri-apps/api/event' import { SystemEvent } from '@/types/events' -import { stopModel, startModel } from '@/services/models' +import { stopModel, startModel, stopAllModels } from '@/services/models' import { useToolApproval } from '@/hooks/useToolApproval' import { useToolAvailable } from '@/hooks/useToolAvailable' +import { OUT_OF_CONTEXT_SIZE } from '@/utils/error' export const useChat = () => { const { prompt, setPrompt } = usePrompt() @@ -41,6 +42,7 @@ export const useChat = () => { setAbortController, } = useAppState() const { currentAssistant } = useAssistant() + const { updateProvider } = useModelProvider() const { approvedTools, showApprovalModal, allowAllMCPPermissions } = useToolApproval() @@ -108,8 +110,60 @@ export const useChat = () => { currentAssistant, ]) + const increaseModelContextSize = useCallback( + (model: Model, provider: ProviderObject) => { + /** + * Should increase the context size of the model by 2x + * If the context size is not set or too low, it defaults to 8192. + */ + const ctxSize = Math.max( + model.settings?.ctx_len?.controller_props.value + ? typeof model.settings.ctx_len.controller_props.value === 'string' + ? parseInt(model.settings.ctx_len.controller_props.value as string) + : (model.settings.ctx_len.controller_props.value as number) + : 8192, + 8192 + ) + const updatedModel = { + ...model, + settings: { + ...model.settings, + ctx_len: { + ...(model.settings?.ctx_len != null ? model.settings?.ctx_len : {}), + controller_props: { + ...(model.settings?.ctx_len?.controller_props ?? {}), + value: ctxSize * 2, + }, + }, + }, + } + + // Find the model index in the provider's models array + const modelIndex = provider.models.findIndex((m) => m.id === model.id) + + if (modelIndex !== -1) { + // Create a copy of the provider's models array + const updatedModels = [...provider.models] + + // Update the specific model in the array + updatedModels[modelIndex] = updatedModel as Model + + // Update the provider with the new models array + updateProvider(provider.provider, { + models: updatedModels, + }) + } + stopAllModels() + }, + [updateProvider] + ) + const sendMessage = useCallback( - async (message: string) => { + async ( + message: string, + showModal?: () => Promise, + troubleshooting = true + ) => { const activeThread = await getCurrentThread() resetTokenSpeed() @@ -121,7 +175,9 @@ export const useChat = () => { const abortController = new AbortController() setAbortController(activeThread.id, abortController) updateStreamingContent(emptyThreadContent) - addMessage(newUserThreadContent(activeThread.id, message)) + // Do not add new message on retry + if (troubleshooting) + addMessage(newUserThreadContent(activeThread.id, message)) updateThreadTimestamp(activeThread.id) setPrompt('') try { @@ -180,6 +236,14 @@ export const useChat = () => { } } else { for await (const part of completion) { + // Error message + if (!part.choices) { + throw new Error( + 'message' in part + ? (part.message as string) + : (JSON.stringify(part) ?? '') + ) + } const delta = part.choices[0]?.delta?.content || '' if (part.choices[0]?.delta?.tool_calls) { @@ -252,9 +316,26 @@ export const useChat = () => { if (!followUpWithToolUse) availableTools = [] } } catch (error) { - toast.error( - `Error sending message: ${error && typeof error === 'object' && 'message' in error ? error.message : error}` - ) + const errorMessage = + error && typeof error === 'object' && 'message' in error + ? error.message + : error + if ( + typeof errorMessage === 'string' && + errorMessage.includes(OUT_OF_CONTEXT_SIZE) && + selectedModel && + troubleshooting + ) { + showModal?.().then((confirmed) => { + if (confirmed) { + increaseModelContextSize(selectedModel, activeProvider) + setTimeout(() => { + sendMessage(message, showModal, false) // Retry sending the message without troubleshooting + }, 1000) + } + }) + } + toast.error(`Error sending message: ${errorMessage}`) console.error('Error sending message:', error) } finally { updateLoadingModel(false) @@ -282,6 +363,7 @@ export const useChat = () => { allowAllMCPPermissions, showApprovalModal, updateTokenSpeed, + increaseModelContextSize, ] ) diff --git a/web-app/src/lib/completion.ts b/web-app/src/lib/completion.ts index 1e8a5ec55..24daec3cd 100644 --- a/web-app/src/lib/completion.ts +++ b/web-app/src/lib/completion.ts @@ -134,7 +134,8 @@ export const sendCompletion = async ( thread.model.id && !(thread.model.id in Object.values(models).flat()) && // eslint-disable-next-line @typescript-eslint/no-explicit-any - !tokenJS.extendedModelExist(providerName as any, thread.model?.id) + !tokenJS.extendedModelExist(providerName as any, thread.model?.id) && + provider.provider !== 'llama.cpp' ) { try { tokenJS.extendModelList( @@ -323,7 +324,7 @@ export const postMessageProcessing = async ( ? await showModal(toolCall.function.name, message.thread_id) : true) - const result = approved + let result = approved ? await callTool({ toolName: toolCall.function.name, arguments: toolCall.function.arguments.length @@ -335,7 +336,7 @@ export const postMessageProcessing = async ( content: [ { type: 'text', - text: `Error calling tool ${toolCall.function.name}: ${e.message}`, + text: `Error calling tool ${toolCall.function.name}: ${e.message ?? e}`, }, ], error: true, @@ -350,7 +351,16 @@ export const postMessageProcessing = async ( ], } - if ('error' in result && result.error) break + if (typeof result === 'string') { + result = { + content: [ + { + type: 'text', + text: result, + }, + ], + } + } message.metadata = { ...(message.metadata ?? {}), diff --git a/web-app/src/locales/en/common.json b/web-app/src/locales/en/common.json index 296c0162c..9abd447ad 100644 --- a/web-app/src/locales/en/common.json +++ b/web-app/src/locales/en/common.json @@ -23,6 +23,7 @@ "reset": "Reset", "search": "Search", "name": "Name", + "cancel": "Cancel", "placeholder": { "chatInput": "Ask me anything..." diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx index 8c299bd5f..f58723ac4 100644 --- a/web-app/src/routes/hub.tsx +++ b/web-app/src/routes/hub.tsx @@ -77,6 +77,7 @@ function Hub() { const addModelSourceTimeoutRef = useRef | null>( null ) + const downloadButtonRef = useRef(null) const { getProviderByName } = useModelProvider() const llamaProvider = getProviderByName('llama.cpp') @@ -233,18 +234,14 @@ function Hub() { isRecommended && 'hub-download-button-step' )} > -
- - - {Math.round(downloadProgress * 100)}% - -
+ {isDownloading && !isDownloaded && ( +
+ + + {Math.round(downloadProgress * 100)}% + +
+ )} {isDownloaded ? ( @@ -266,6 +264,7 @@ function Hub() { llamaProvider?.models, handleUseModel, isRecommendedModel, + downloadButtonRef, ]) const { step } = useSearch({ from: Route.id }) @@ -291,8 +290,9 @@ function Hub() { isRecommendedModel(model.metadata?.id) ) if (recommendedModel && recommendedModel.models[0]?.id) { - downloadModel(recommendedModel.models[0].id) - + if (downloadButtonRef.current) { + downloadButtonRef.current.click() + } return } } @@ -413,7 +413,7 @@ function Hub() { -
+
{loading ? (
diff --git a/web-app/src/routes/threads/$threadId.tsx b/web-app/src/routes/threads/$threadId.tsx index 1190dfc1c..2f18f6094 100644 --- a/web-app/src/routes/threads/$threadId.tsx +++ b/web-app/src/routes/threads/$threadId.tsx @@ -18,6 +18,7 @@ import { useAppState } from '@/hooks/useAppState' import DropdownAssistant from '@/containers/DropdownAssistant' import { useAssistant } from '@/hooks/useAssistant' import { useAppearance } from '@/hooks/useAppearance' +import { useOutOfContextPromiseModal } from '@/containers/dialogs/OutOfContextDialog' // as route.threadsDetail export const Route = createFileRoute('/threads/$threadId')({ @@ -47,6 +48,8 @@ function ThreadDetail() { const scrollContainerRef = useRef(null) const isFirstRender = useRef(true) const messagesCount = useMemo(() => messages?.length ?? 0, [messages]) + const { showModal, PromiseModal: OutOfContextModal } = + useOutOfContextPromiseModal() // Function to check scroll position and scrollbar presence const checkScrollState = () => { @@ -193,6 +196,8 @@ function ThreadDetail() { if (!messages || !threadModel) return null + const contextOverflowModalComponent = + return (
@@ -233,6 +238,8 @@ function ThreadDetail() { )) } index={index} + showContextOverflowModal={showModal} + contextOverflowModal={contextOverflowModalComponent} />
) diff --git a/web-app/src/utils/error.ts b/web-app/src/utils/error.ts new file mode 100644 index 000000000..1019d2853 --- /dev/null +++ b/web-app/src/utils/error.ts @@ -0,0 +1,2 @@ +export const OUT_OF_CONTEXT_SIZE = + 'the request exceeds the available context size.' diff --git a/web-app/src/utils/models.ts b/web-app/src/utils/models.ts index c21b4a3a9..7f8756f67 100644 --- a/web-app/src/utils/models.ts +++ b/web-app/src/utils/models.ts @@ -1,6 +1,6 @@ export const hardcodedModel = { - author: 'Menlo', - id: 'Menlo/Jan-nano', + author: 'menlo', + id: 'menlo/jan-nano', metadata: { '_id': '68492cd9cada68b1d11ca1bd', 'author': 'Menlo', @@ -12,7 +12,7 @@ export const hardcodedModel = { 'description': '---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)', 'disabled': false, - 'downloads': 939, + 'downloads': 1434, 'gated': false, 'gguf': { architecture: 'qwen3', @@ -25,17 +25,17 @@ export const hardcodedModel = { total: 4022468096, }, 'id': 'Menlo/Jan-nano', - 'lastModified': '2025-06-13T05:53:33.000Z', - 'likes': 2, + 'lastModified': '2025-06-13T16:57:55.000Z', + 'likes': 3, 'model-index': null, 'modelId': 'Menlo/Jan-nano', 'pipeline_tag': 'text-generation', 'private': false, - 'sha': '782985633ac4080dfdaa52e62d61dcf637e9ff0d', + 'sha': 'a04aab0878648d8f284c63a52664a482ead16f06', 'siblings': [ { rfilename: '.gitattributes', - size: 1742, + size: 3460, }, { rfilename: 'README.md', @@ -45,6 +45,58 @@ export const hardcodedModel = { rfilename: 'jan-nano-0.4-iQ4_XS.gguf', size: 2270750400, }, + { + rfilename: 'jan-nano-4b-Q3_K_L.gguf', + size: 2239784384, + }, + { + rfilename: 'jan-nano-4b-Q3_K_M.gguf', + size: 2075616704, + }, + { + rfilename: 'jan-nano-4b-Q3_K_S.gguf', + size: 1886995904, + }, + { + rfilename: 'jan-nano-4b-Q4_0.gguf', + size: 2369545664, + }, + { + rfilename: 'jan-nano-4b-Q4_1.gguf', + size: 2596627904, + }, + { + rfilename: 'jan-nano-4b-Q4_K_M.gguf', + size: 2497279424, + }, + { + rfilename: 'jan-nano-4b-Q4_K_S.gguf', + size: 2383308224, + }, + { + rfilename: 'jan-nano-4b-Q5_0.gguf', + size: 2823710144, + }, + { + rfilename: 'jan-nano-4b-Q5_1.gguf', + size: 3050792384, + }, + { + rfilename: 'jan-nano-4b-Q5_K_M.gguf', + size: 2889512384, + }, + { + rfilename: 'jan-nano-4b-Q5_K_S.gguf', + size: 2823710144, + }, + { + rfilename: 'jan-nano-4b-Q6_K.gguf', + size: 3306259904, + }, + { + rfilename: 'jan-nano-4b-Q8_0.gguf', + size: 4280403904, + }, ], 'spaces': [], 'tags': [ @@ -56,7 +108,7 @@ export const hardcodedModel = { 'imatrix', 'conversational', ], - 'usedStorage': 20820673088, + 'usedStorage': 93538518464, 'widgetData': [ { text: 'Hi, what can you help me with?', @@ -74,8 +126,60 @@ export const hardcodedModel = { }, models: [ { - id: 'Menlo:Jan-nano:jan-nano-0.4-iQ4_XS.gguf', + id: 'menlo:jan-nano:jan-nano-0.4-iQ4_XS.gguf', size: 2270750400, }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q3_K_L.gguf', + size: 2239784384, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q3_K_M.gguf', + size: 2075616704, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q3_K_S.gguf', + size: 1886995904, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q4_0.gguf', + size: 2369545664, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q4_1.gguf', + size: 2596627904, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q4_K_M.gguf', + size: 2497279424, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q4_K_S.gguf', + size: 2383308224, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q5_0.gguf', + size: 2823710144, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q5_1.gguf', + size: 3050792384, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q5_K_M.gguf', + size: 2889512384, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q5_K_S.gguf', + size: 2823710144, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q6_K.gguf', + size: 3306259904, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q8_0.gguf', + size: 4280403904, + }, ], }