enhancement: model run improvement (#5268)

* fix: mcp tool error handling

* fix: error message

* fix: trigger download from recommend model

* fix: can't scroll hub

* fix: show progress

* enhancement: prompt users to increase context size

* enhancement: rearrange action buttons for a better UX

* 🔧chore: clean up logics

---------

Co-authored-by: Faisal Amir <urmauur@gmail.com>
This commit is contained in:
Louis 2025-06-14 16:32:15 +07:00 committed by GitHub
parent 23240f958e
commit 1e17cc6ec7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 387 additions and 42 deletions

View File

@ -62,6 +62,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
cache_type: string = 'q8' cache_type: string = 'q8'
cpu_threads?: number cpu_threads?: number
auto_unload_models: boolean = true auto_unload_models: boolean = true
reasoning_budget = -1 // Default reasoning budget in seconds
/** /**
* The URL for making inference requests. * The URL for making inference requests.
*/ */
@ -230,8 +231,6 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
const loadedModels = await this.activeModels() const loadedModels = await this.activeModels()
console.log('Loaded models:', loadedModels)
// This is to avoid loading the same model multiple times // This is to avoid loading the same model multiple times
if (loadedModels.some((e: { id: string }) => e.id === model.id)) { if (loadedModels.some((e: { id: string }) => e.id === model.id)) {
console.log(`Model ${model.id} already loaded`) console.log(`Model ${model.id} already loaded`)
@ -269,6 +268,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
...(this.cont_batching && this.n_parallel && this.n_parallel > 1 ...(this.cont_batching && this.n_parallel && this.n_parallel > 1
? { cont_batching: this.cont_batching } ? { cont_batching: this.cont_batching }
: {}), : {}),
...(model.id.toLowerCase().includes('jan-nano')
? { reasoning_budget: 0 }
: { reasoning_budget: this.reasoning_budget }),
...{ 'no-context-shift': true },
}, },
timeout: false, timeout: false,
signal, signal,

View File

@ -377,7 +377,12 @@ pub async fn call_tool(
}); });
return match timeout(MCP_TOOL_CALL_TIMEOUT, tool_call).await { return match timeout(MCP_TOOL_CALL_TIMEOUT, tool_call).await {
Ok(result) => result.map_err(|e| e.to_string()), Ok(result) => {
match result {
Ok(ok_result) => Ok(ok_result),
Err(e) => Err(e.to_string()),
}
}
Err(_) => Err(format!( Err(_) => Err(format!(
"Tool call '{}' timed out after {} seconds", "Tool call '{}' timed out after {} seconds",
tool_name, tool_name,

View File

@ -36,6 +36,7 @@ import { ModelLoader } from '@/containers/loaders/ModelLoader'
import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable' import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable'
import { getConnectedServers } from '@/services/mcp' import { getConnectedServers } from '@/services/mcp'
import { stopAllModels } from '@/services/models' import { stopAllModels } from '@/services/models'
import { useOutOfContextPromiseModal } from './dialogs/OutOfContextDialog'
type ChatInputProps = { type ChatInputProps = {
className?: string className?: string
@ -60,6 +61,8 @@ const ChatInput = ({
const { t } = useTranslation() const { t } = useTranslation()
const { spellCheckChatInput } = useGeneralSetting() const { spellCheckChatInput } = useGeneralSetting()
const { tokenSpeed } = useAppState() const { tokenSpeed } = useAppState()
const { showModal, PromiseModal: OutOfContextModal } =
useOutOfContextPromiseModal()
const maxRows = 10 const maxRows = 10
const { selectedModel } = useModelProvider() const { selectedModel } = useModelProvider()
@ -110,7 +113,7 @@ const ChatInput = ({
return return
} }
setMessage('') setMessage('')
sendMessage(prompt) sendMessage(prompt, showModal)
} }
useEffect(() => { useEffect(() => {
@ -611,6 +614,7 @@ const ChatInput = ({
</div> </div>
</div> </div>
)} )}
<OutOfContextModal />
</div> </div>
) )
} }

View File

@ -1,6 +1,6 @@
import { ThreadMessage } from '@janhq/core' import { ThreadMessage } from '@janhq/core'
import { RenderMarkdown } from './RenderMarkdown' import { RenderMarkdown } from './RenderMarkdown'
import { Fragment, memo, useCallback, useMemo, useState } from 'react' import React, { Fragment, memo, useCallback, useMemo, useState } from 'react'
import { import {
IconCopy, IconCopy,
IconCopyCheck, IconCopyCheck,
@ -79,6 +79,8 @@ export const ThreadContent = memo(
showAssistant?: boolean showAssistant?: boolean
// eslint-disable-next-line @typescript-eslint/no-explicit-any // eslint-disable-next-line @typescript-eslint/no-explicit-any
streamTools?: any streamTools?: any
contextOverflowModal?: React.ReactNode | null
showContextOverflowModal?: () => Promise<unknown>
} }
) => { ) => {
const [message, setMessage] = useState(item.content?.[0]?.text?.value || '') const [message, setMessage] = useState(item.content?.[0]?.text?.value || '')
@ -129,7 +131,10 @@ export const ThreadContent = memo(
} }
if (toSendMessage) { if (toSendMessage) {
deleteMessage(toSendMessage.thread_id, toSendMessage.id ?? '') deleteMessage(toSendMessage.thread_id, toSendMessage.id ?? '')
sendMessage(toSendMessage.content?.[0]?.text?.value || '') sendMessage(
toSendMessage.content?.[0]?.text?.value || '',
item.showContextOverflowModal
)
} }
}, [deleteMessage, getMessages, item, sendMessage]) }, [deleteMessage, getMessages, item, sendMessage])
@ -162,15 +167,25 @@ export const ThreadContent = memo(
const editMessage = useCallback( const editMessage = useCallback(
(messageId: string) => { (messageId: string) => {
const threadMessages = getMessages(item.thread_id) const threadMessages = getMessages(item.thread_id)
const index = threadMessages.findIndex((msg) => msg.id === messageId) const index = threadMessages.findIndex((msg) => msg.id === messageId)
if (index === -1) return if (index === -1) return
// Delete all messages after the edited message // Delete all messages after the edited message
for (let i = threadMessages.length - 1; i >= index; i--) { for (let i = threadMessages.length - 1; i >= index; i--) {
deleteMessage(threadMessages[i].thread_id, threadMessages[i].id) deleteMessage(threadMessages[i].thread_id, threadMessages[i].id)
} }
sendMessage(message)
sendMessage(message, item.showContextOverflowModal)
}, },
[deleteMessage, getMessages, item.thread_id, message, sendMessage] [
deleteMessage,
getMessages,
item.thread_id,
message,
sendMessage,
item.showContextOverflowModal,
]
) )
const isToolCalls = const isToolCalls =
@ -445,6 +460,7 @@ export const ThreadContent = memo(
{image.detail && <p className="text-sm mt-1">{image.detail}</p>} {image.detail && <p className="text-sm mt-1">{image.detail}</p>}
</div> </div>
)} )}
{item.contextOverflowModal && item.contextOverflowModal}
</Fragment> </Fragment>
) )
} }

View File

@ -0,0 +1,104 @@
import { t } from 'i18next'
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from '@/components/ui/dialog'
import { ReactNode, useCallback, useState } from 'react'
import { Button } from '@/components/ui/button'
export function useOutOfContextPromiseModal() {
const [isOpen, setIsOpen] = useState(false)
const [modalProps, setModalProps] = useState<{
resolveRef: ((value: unknown) => void) | null
}>({
resolveRef: null,
})
// Function to open the modal and return a Promise
const showModal = useCallback(() => {
return new Promise((resolve) => {
setModalProps({
resolveRef: resolve,
})
setIsOpen(true)
})
}, [])
const PromiseModal = useCallback((): ReactNode => {
if (!isOpen) {
return null
}
const handleConfirm = () => {
setIsOpen(false)
if (modalProps.resolveRef) {
modalProps.resolveRef(true)
}
}
const handleCancel = () => {
setIsOpen(false)
if (modalProps.resolveRef) {
modalProps.resolveRef(false)
}
}
return (
<Dialog
open={isOpen}
onOpenChange={(open) => {
setIsOpen(open)
if (!open) handleCancel()
}}
>
<DialogContent>
<DialogHeader>
<DialogTitle>
{t('outOfContextError.title', 'Out of context error')}
</DialogTitle>
</DialogHeader>
<DialogDescription>
{t(
'outOfContextError.description',
'This chat is reaching the AIs memory limit, like a whiteboard filling up. We can expand the memory window (called context size) so it remembers more, but it may use more of your computers memory.'
)}
<br />
<br />
{t(
'outOfContextError.increaseContextSizeDescription',
'Do you want to increase the context size?'
)}
</DialogDescription>
<DialogFooter className="flex gap-2">
<Button
variant="default"
className="bg-transparent border border-main-view-fg/20 hover:bg-main-view-fg/4"
onClick={() => setIsOpen(false)}
>
{t('common.cancel', 'Cancel')}
</Button>
<Button
asChild
onClick={() => {
handleConfirm()
setIsOpen(false)
}}
>
<span className="text-main-view-fg/70">
{t(
'outOfContextError.increaseContextSize',
'Increase Context Size'
)}
</span>
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
)
}, [isOpen, modalProps])
return { showModal, PromiseModal }
}

View File

@ -12,6 +12,7 @@ type AppState = {
abortControllers: Record<string, AbortController> abortControllers: Record<string, AbortController>
tokenSpeed?: TokenSpeed tokenSpeed?: TokenSpeed
currentToolCall?: ChatCompletionMessageToolCall currentToolCall?: ChatCompletionMessageToolCall
showOutOfContextDialog?: boolean
setServerStatus: (value: 'running' | 'stopped' | 'pending') => void setServerStatus: (value: 'running' | 'stopped' | 'pending') => void
updateStreamingContent: (content: ThreadMessage | undefined) => void updateStreamingContent: (content: ThreadMessage | undefined) => void
updateCurrentToolCall: ( updateCurrentToolCall: (
@ -22,6 +23,7 @@ type AppState = {
setAbortController: (threadId: string, controller: AbortController) => void setAbortController: (threadId: string, controller: AbortController) => void
updateTokenSpeed: (message: ThreadMessage) => void updateTokenSpeed: (message: ThreadMessage) => void
resetTokenSpeed: () => void resetTokenSpeed: () => void
setOutOfContextDialog: (show: boolean) => void
} }
export const useAppState = create<AppState>()((set) => ({ export const useAppState = create<AppState>()((set) => ({
@ -99,4 +101,9 @@ export const useAppState = create<AppState>()((set) => ({
set({ set({
tokenSpeed: undefined, tokenSpeed: undefined,
}), }),
setOutOfContextDialog: (show) => {
set(() => ({
showOutOfContextDialog: show,
}))
},
})) }))

View File

@ -24,10 +24,11 @@ import { getTools } from '@/services/mcp'
import { MCPTool } from '@/types/completion' import { MCPTool } from '@/types/completion'
import { listen } from '@tauri-apps/api/event' import { listen } from '@tauri-apps/api/event'
import { SystemEvent } from '@/types/events' import { SystemEvent } from '@/types/events'
import { stopModel, startModel } from '@/services/models' import { stopModel, startModel, stopAllModels } from '@/services/models'
import { useToolApproval } from '@/hooks/useToolApproval' import { useToolApproval } from '@/hooks/useToolApproval'
import { useToolAvailable } from '@/hooks/useToolAvailable' import { useToolAvailable } from '@/hooks/useToolAvailable'
import { OUT_OF_CONTEXT_SIZE } from '@/utils/error'
export const useChat = () => { export const useChat = () => {
const { prompt, setPrompt } = usePrompt() const { prompt, setPrompt } = usePrompt()
@ -41,6 +42,7 @@ export const useChat = () => {
setAbortController, setAbortController,
} = useAppState() } = useAppState()
const { currentAssistant } = useAssistant() const { currentAssistant } = useAssistant()
const { updateProvider } = useModelProvider()
const { approvedTools, showApprovalModal, allowAllMCPPermissions } = const { approvedTools, showApprovalModal, allowAllMCPPermissions } =
useToolApproval() useToolApproval()
@ -108,8 +110,60 @@ export const useChat = () => {
currentAssistant, currentAssistant,
]) ])
const increaseModelContextSize = useCallback(
(model: Model, provider: ProviderObject) => {
/**
* Should increase the context size of the model by 2x
* If the context size is not set or too low, it defaults to 8192.
*/
const ctxSize = Math.max(
model.settings?.ctx_len?.controller_props.value
? typeof model.settings.ctx_len.controller_props.value === 'string'
? parseInt(model.settings.ctx_len.controller_props.value as string)
: (model.settings.ctx_len.controller_props.value as number)
: 8192,
8192
)
const updatedModel = {
...model,
settings: {
...model.settings,
ctx_len: {
...(model.settings?.ctx_len != null ? model.settings?.ctx_len : {}),
controller_props: {
...(model.settings?.ctx_len?.controller_props ?? {}),
value: ctxSize * 2,
},
},
},
}
// Find the model index in the provider's models array
const modelIndex = provider.models.findIndex((m) => m.id === model.id)
if (modelIndex !== -1) {
// Create a copy of the provider's models array
const updatedModels = [...provider.models]
// Update the specific model in the array
updatedModels[modelIndex] = updatedModel as Model
// Update the provider with the new models array
updateProvider(provider.provider, {
models: updatedModels,
})
}
stopAllModels()
},
[updateProvider]
)
const sendMessage = useCallback( const sendMessage = useCallback(
async (message: string) => { async (
message: string,
showModal?: () => Promise<unknown>,
troubleshooting = true
) => {
const activeThread = await getCurrentThread() const activeThread = await getCurrentThread()
resetTokenSpeed() resetTokenSpeed()
@ -121,6 +175,8 @@ export const useChat = () => {
const abortController = new AbortController() const abortController = new AbortController()
setAbortController(activeThread.id, abortController) setAbortController(activeThread.id, abortController)
updateStreamingContent(emptyThreadContent) updateStreamingContent(emptyThreadContent)
// Do not add new message on retry
if (troubleshooting)
addMessage(newUserThreadContent(activeThread.id, message)) addMessage(newUserThreadContent(activeThread.id, message))
updateThreadTimestamp(activeThread.id) updateThreadTimestamp(activeThread.id)
setPrompt('') setPrompt('')
@ -180,6 +236,14 @@ export const useChat = () => {
} }
} else { } else {
for await (const part of completion) { for await (const part of completion) {
// Error message
if (!part.choices) {
throw new Error(
'message' in part
? (part.message as string)
: (JSON.stringify(part) ?? '')
)
}
const delta = part.choices[0]?.delta?.content || '' const delta = part.choices[0]?.delta?.content || ''
if (part.choices[0]?.delta?.tool_calls) { if (part.choices[0]?.delta?.tool_calls) {
@ -252,9 +316,26 @@ export const useChat = () => {
if (!followUpWithToolUse) availableTools = [] if (!followUpWithToolUse) availableTools = []
} }
} catch (error) { } catch (error) {
toast.error( const errorMessage =
`Error sending message: ${error && typeof error === 'object' && 'message' in error ? error.message : error}` error && typeof error === 'object' && 'message' in error
) ? error.message
: error
if (
typeof errorMessage === 'string' &&
errorMessage.includes(OUT_OF_CONTEXT_SIZE) &&
selectedModel &&
troubleshooting
) {
showModal?.().then((confirmed) => {
if (confirmed) {
increaseModelContextSize(selectedModel, activeProvider)
setTimeout(() => {
sendMessage(message, showModal, false) // Retry sending the message without troubleshooting
}, 1000)
}
})
}
toast.error(`Error sending message: ${errorMessage}`)
console.error('Error sending message:', error) console.error('Error sending message:', error)
} finally { } finally {
updateLoadingModel(false) updateLoadingModel(false)
@ -282,6 +363,7 @@ export const useChat = () => {
allowAllMCPPermissions, allowAllMCPPermissions,
showApprovalModal, showApprovalModal,
updateTokenSpeed, updateTokenSpeed,
increaseModelContextSize,
] ]
) )

View File

@ -134,7 +134,8 @@ export const sendCompletion = async (
thread.model.id && thread.model.id &&
!(thread.model.id in Object.values(models).flat()) && !(thread.model.id in Object.values(models).flat()) &&
// eslint-disable-next-line @typescript-eslint/no-explicit-any // eslint-disable-next-line @typescript-eslint/no-explicit-any
!tokenJS.extendedModelExist(providerName as any, thread.model?.id) !tokenJS.extendedModelExist(providerName as any, thread.model?.id) &&
provider.provider !== 'llama.cpp'
) { ) {
try { try {
tokenJS.extendModelList( tokenJS.extendModelList(
@ -323,7 +324,7 @@ export const postMessageProcessing = async (
? await showModal(toolCall.function.name, message.thread_id) ? await showModal(toolCall.function.name, message.thread_id)
: true) : true)
const result = approved let result = approved
? await callTool({ ? await callTool({
toolName: toolCall.function.name, toolName: toolCall.function.name,
arguments: toolCall.function.arguments.length arguments: toolCall.function.arguments.length
@ -335,7 +336,7 @@ export const postMessageProcessing = async (
content: [ content: [
{ {
type: 'text', type: 'text',
text: `Error calling tool ${toolCall.function.name}: ${e.message}`, text: `Error calling tool ${toolCall.function.name}: ${e.message ?? e}`,
}, },
], ],
error: true, error: true,
@ -350,7 +351,16 @@ export const postMessageProcessing = async (
], ],
} }
if ('error' in result && result.error) break if (typeof result === 'string') {
result = {
content: [
{
type: 'text',
text: result,
},
],
}
}
message.metadata = { message.metadata = {
...(message.metadata ?? {}), ...(message.metadata ?? {}),

View File

@ -23,6 +23,7 @@
"reset": "Reset", "reset": "Reset",
"search": "Search", "search": "Search",
"name": "Name", "name": "Name",
"cancel": "Cancel",
"placeholder": { "placeholder": {
"chatInput": "Ask me anything..." "chatInput": "Ask me anything..."

View File

@ -77,6 +77,7 @@ function Hub() {
const addModelSourceTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>( const addModelSourceTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(
null null
) )
const downloadButtonRef = useRef<HTMLButtonElement>(null)
const { getProviderByName } = useModelProvider() const { getProviderByName } = useModelProvider()
const llamaProvider = getProviderByName('llama.cpp') const llamaProvider = getProviderByName('llama.cpp')
@ -233,18 +234,14 @@ function Hub() {
isRecommended && 'hub-download-button-step' isRecommended && 'hub-download-button-step'
)} )}
> >
<div {isDownloading && !isDownloaded && (
className={cn( <div className={cn('flex items-center gap-2 w-20')}>
'flex items-center gap-2 w-20 ',
!isDownloading &&
'opacity-0 visibility-hidden w-0 pointer-events-none'
)}
>
<Progress value={downloadProgress * 100} /> <Progress value={downloadProgress * 100} />
<span className="text-xs text-center text-main-view-fg/70"> <span className="text-xs text-center text-main-view-fg/70">
{Math.round(downloadProgress * 100)}% {Math.round(downloadProgress * 100)}%
</span> </span>
</div> </div>
)}
{isDownloaded ? ( {isDownloaded ? (
<Button size="sm" onClick={() => handleUseModel(modelId)}> <Button size="sm" onClick={() => handleUseModel(modelId)}>
Use Use
@ -254,6 +251,7 @@ function Hub() {
size="sm" size="sm"
onClick={() => downloadModel(modelId)} onClick={() => downloadModel(modelId)}
className={cn(isDownloading && 'hidden')} className={cn(isDownloading && 'hidden')}
ref={isRecommended ? downloadButtonRef : undefined}
> >
Download Download
</Button> </Button>
@ -266,6 +264,7 @@ function Hub() {
llamaProvider?.models, llamaProvider?.models,
handleUseModel, handleUseModel,
isRecommendedModel, isRecommendedModel,
downloadButtonRef,
]) ])
const { step } = useSearch({ from: Route.id }) const { step } = useSearch({ from: Route.id })
@ -291,8 +290,9 @@ function Hub() {
isRecommendedModel(model.metadata?.id) isRecommendedModel(model.metadata?.id)
) )
if (recommendedModel && recommendedModel.models[0]?.id) { if (recommendedModel && recommendedModel.models[0]?.id) {
downloadModel(recommendedModel.models[0].id) if (downloadButtonRef.current) {
downloadButtonRef.current.click()
}
return return
} }
} }
@ -413,7 +413,7 @@ function Hub() {
</div> </div>
</div> </div>
</HeaderPage> </HeaderPage>
<div className="p-4 w-full h-[calc(100%-32px)] overflow-y-auto first-step-setup-local-provider"> <div className="p-4 w-full h-[calc(100%-32px)] !overflow-y-auto first-step-setup-local-provider">
<div className="flex flex-col h-full justify-between gap-4 gap-y-3 w-4/5 mx-auto"> <div className="flex flex-col h-full justify-between gap-4 gap-y-3 w-4/5 mx-auto">
{loading ? ( {loading ? (
<div className="flex items-center justify-center"> <div className="flex items-center justify-center">

View File

@ -18,6 +18,7 @@ import { useAppState } from '@/hooks/useAppState'
import DropdownAssistant from '@/containers/DropdownAssistant' import DropdownAssistant from '@/containers/DropdownAssistant'
import { useAssistant } from '@/hooks/useAssistant' import { useAssistant } from '@/hooks/useAssistant'
import { useAppearance } from '@/hooks/useAppearance' import { useAppearance } from '@/hooks/useAppearance'
import { useOutOfContextPromiseModal } from '@/containers/dialogs/OutOfContextDialog'
// as route.threadsDetail // as route.threadsDetail
export const Route = createFileRoute('/threads/$threadId')({ export const Route = createFileRoute('/threads/$threadId')({
@ -47,6 +48,8 @@ function ThreadDetail() {
const scrollContainerRef = useRef<HTMLDivElement>(null) const scrollContainerRef = useRef<HTMLDivElement>(null)
const isFirstRender = useRef(true) const isFirstRender = useRef(true)
const messagesCount = useMemo(() => messages?.length ?? 0, [messages]) const messagesCount = useMemo(() => messages?.length ?? 0, [messages])
const { showModal, PromiseModal: OutOfContextModal } =
useOutOfContextPromiseModal()
// Function to check scroll position and scrollbar presence // Function to check scroll position and scrollbar presence
const checkScrollState = () => { const checkScrollState = () => {
@ -193,6 +196,8 @@ function ThreadDetail() {
if (!messages || !threadModel) return null if (!messages || !threadModel) return null
const contextOverflowModalComponent = <OutOfContextModal />
return ( return (
<div className="flex flex-col h-full"> <div className="flex flex-col h-full">
<HeaderPage> <HeaderPage>
@ -233,6 +238,8 @@ function ThreadDetail() {
)) ))
} }
index={index} index={index}
showContextOverflowModal={showModal}
contextOverflowModal={contextOverflowModalComponent}
/> />
</div> </div>
) )

View File

@ -0,0 +1,2 @@
export const OUT_OF_CONTEXT_SIZE =
'the request exceeds the available context size.'

View File

@ -1,6 +1,6 @@
export const hardcodedModel = { export const hardcodedModel = {
author: 'Menlo', author: 'menlo',
id: 'Menlo/Jan-nano', id: 'menlo/jan-nano',
metadata: { metadata: {
'_id': '68492cd9cada68b1d11ca1bd', '_id': '68492cd9cada68b1d11ca1bd',
'author': 'Menlo', 'author': 'Menlo',
@ -12,7 +12,7 @@ export const hardcodedModel = {
'description': 'description':
'---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)', '---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)',
'disabled': false, 'disabled': false,
'downloads': 939, 'downloads': 1434,
'gated': false, 'gated': false,
'gguf': { 'gguf': {
architecture: 'qwen3', architecture: 'qwen3',
@ -25,17 +25,17 @@ export const hardcodedModel = {
total: 4022468096, total: 4022468096,
}, },
'id': 'Menlo/Jan-nano', 'id': 'Menlo/Jan-nano',
'lastModified': '2025-06-13T05:53:33.000Z', 'lastModified': '2025-06-13T16:57:55.000Z',
'likes': 2, 'likes': 3,
'model-index': null, 'model-index': null,
'modelId': 'Menlo/Jan-nano', 'modelId': 'Menlo/Jan-nano',
'pipeline_tag': 'text-generation', 'pipeline_tag': 'text-generation',
'private': false, 'private': false,
'sha': '782985633ac4080dfdaa52e62d61dcf637e9ff0d', 'sha': 'a04aab0878648d8f284c63a52664a482ead16f06',
'siblings': [ 'siblings': [
{ {
rfilename: '.gitattributes', rfilename: '.gitattributes',
size: 1742, size: 3460,
}, },
{ {
rfilename: 'README.md', rfilename: 'README.md',
@ -45,6 +45,58 @@ export const hardcodedModel = {
rfilename: 'jan-nano-0.4-iQ4_XS.gguf', rfilename: 'jan-nano-0.4-iQ4_XS.gguf',
size: 2270750400, size: 2270750400,
}, },
{
rfilename: 'jan-nano-4b-Q3_K_L.gguf',
size: 2239784384,
},
{
rfilename: 'jan-nano-4b-Q3_K_M.gguf',
size: 2075616704,
},
{
rfilename: 'jan-nano-4b-Q3_K_S.gguf',
size: 1886995904,
},
{
rfilename: 'jan-nano-4b-Q4_0.gguf',
size: 2369545664,
},
{
rfilename: 'jan-nano-4b-Q4_1.gguf',
size: 2596627904,
},
{
rfilename: 'jan-nano-4b-Q4_K_M.gguf',
size: 2497279424,
},
{
rfilename: 'jan-nano-4b-Q4_K_S.gguf',
size: 2383308224,
},
{
rfilename: 'jan-nano-4b-Q5_0.gguf',
size: 2823710144,
},
{
rfilename: 'jan-nano-4b-Q5_1.gguf',
size: 3050792384,
},
{
rfilename: 'jan-nano-4b-Q5_K_M.gguf',
size: 2889512384,
},
{
rfilename: 'jan-nano-4b-Q5_K_S.gguf',
size: 2823710144,
},
{
rfilename: 'jan-nano-4b-Q6_K.gguf',
size: 3306259904,
},
{
rfilename: 'jan-nano-4b-Q8_0.gguf',
size: 4280403904,
},
], ],
'spaces': [], 'spaces': [],
'tags': [ 'tags': [
@ -56,7 +108,7 @@ export const hardcodedModel = {
'imatrix', 'imatrix',
'conversational', 'conversational',
], ],
'usedStorage': 20820673088, 'usedStorage': 93538518464,
'widgetData': [ 'widgetData': [
{ {
text: 'Hi, what can you help me with?', text: 'Hi, what can you help me with?',
@ -74,8 +126,60 @@ export const hardcodedModel = {
}, },
models: [ models: [
{ {
id: 'Menlo:Jan-nano:jan-nano-0.4-iQ4_XS.gguf', id: 'menlo:jan-nano:jan-nano-0.4-iQ4_XS.gguf',
size: 2270750400, size: 2270750400,
}, },
{
id: 'menlo:jan-nano:jan-nano-4b-Q3_K_L.gguf',
size: 2239784384,
},
{
id: 'menlo:jan-nano:jan-nano-4b-Q3_K_M.gguf',
size: 2075616704,
},
{
id: 'menlo:jan-nano:jan-nano-4b-Q3_K_S.gguf',
size: 1886995904,
},
{
id: 'menlo:jan-nano:jan-nano-4b-Q4_0.gguf',
size: 2369545664,
},
{
id: 'menlo:jan-nano:jan-nano-4b-Q4_1.gguf',
size: 2596627904,
},
{
id: 'menlo:jan-nano:jan-nano-4b-Q4_K_M.gguf',
size: 2497279424,
},
{
id: 'menlo:jan-nano:jan-nano-4b-Q4_K_S.gguf',
size: 2383308224,
},
{
id: 'menlo:jan-nano:jan-nano-4b-Q5_0.gguf',
size: 2823710144,
},
{
id: 'menlo:jan-nano:jan-nano-4b-Q5_1.gguf',
size: 3050792384,
},
{
id: 'menlo:jan-nano:jan-nano-4b-Q5_K_M.gguf',
size: 2889512384,
},
{
id: 'menlo:jan-nano:jan-nano-4b-Q5_K_S.gguf',
size: 2823710144,
},
{
id: 'menlo:jan-nano:jan-nano-4b-Q6_K.gguf',
size: 3306259904,
},
{
id: 'menlo:jan-nano:jan-nano-4b-Q8_0.gguf',
size: 4280403904,
},
], ],
} }