From 2634659366d43cec3527d0cca16c7d5a8feb67f4 Mon Sep 17 00:00:00 2001 From: Louis Date: Sun, 15 Jun 2025 02:34:14 +0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9Bfix:=20default=20onboarding=20model?= =?UTF-8?q?=20should=20use=20recommended=20quantizations=20(#5273)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🐛fix: default onboarding model should use recommended quantizations * ✨enhancement: show context shift option in provider settings * 🔧chore: wording --- extensions/download-extension/package.json | 2 +- .../resources/default_settings.json | 9 + .../inference-cortex-extension/src/index.ts | 12 +- web-app/src/routes/hub.tsx | 16 +- web-app/src/utils/models.ts | 185 ------------------ 5 files changed, 35 insertions(+), 189 deletions(-) delete mode 100644 web-app/src/utils/models.ts diff --git a/extensions/download-extension/package.json b/extensions/download-extension/package.json index 750934594..58fe42289 100644 --- a/extensions/download-extension/package.json +++ b/extensions/download-extension/package.json @@ -2,7 +2,7 @@ "name": "@janhq/download-extension", "productName": "Download Manager", "version": "1.0.0", - "description": "Handle downloads", + "description": "Download and manage files and AI models in Jan.", "main": "dist/index.js", "author": "Jan ", "license": "AGPL-3.0", diff --git a/extensions/inference-cortex-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json index 451596842..d27624639 100644 --- a/extensions/inference-cortex-extension/resources/default_settings.json +++ b/extensions/inference-cortex-extension/resources/default_settings.json @@ -8,6 +8,15 @@ "value": true } }, + { + "key": "context_shift", + "title": "Context Shift", + "description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.", + "controllerType": "checkbox", + "controllerProps": { + "value": true + } + }, { "key": "cont_batching", "title": "Continuous Batching", diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index 626d53696..3e8b60ebe 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -37,6 +37,7 @@ enum Settings { cpu_threads = 'cpu_threads', huggingfaceToken = 'hugging-face-access-token', auto_unload_models = 'auto_unload_models', + context_shift = 'context_shift', } type LoadedModelResponse = { data: { engine: string; id: string }[] } @@ -63,6 +64,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { cpu_threads?: number auto_unload_models: boolean = true reasoning_budget = -1 // Default reasoning budget in seconds + context_shift = true /** * The URL for making inference requests. */ @@ -128,6 +130,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { true ) this.flash_attn = await this.getSetting(Settings.flash_attn, true) + this.context_shift = await this.getSetting( + Settings.context_shift, + true + ) this.use_mmap = await this.getSetting(Settings.use_mmap, true) if (this.caching_enabled) this.cache_type = await this.getSetting(Settings.cache_type, 'q8') @@ -209,6 +215,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { this.updateCortexConfig({ huggingface_token: value }) } else if (key === Settings.auto_unload_models) { this.auto_unload_models = value as boolean + } else if (key === Settings.context_shift && typeof value === 'boolean') { + this.context_shift = value } } @@ -271,7 +279,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { ...(model.id.toLowerCase().includes('jan-nano') ? { reasoning_budget: 0 } : { reasoning_budget: this.reasoning_budget }), - ...{ 'no-context-shift': true }, + ...(this.context_shift === false + ? { 'no-context-shift': true } + : {}), }, timeout: false, signal, diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx index 22a747d70..ed7fb669b 100644 --- a/web-app/src/routes/hub.tsx +++ b/web-app/src/routes/hub.tsx @@ -49,6 +49,7 @@ type ModelProps = { type SearchParams = { repo: string } +const defaultModelQuantizations = ['iq4_xs.gguf', 'q4_k_m.gguf'] export const Route = createFileRoute(route.hub as any)({ component: Hub, @@ -219,7 +220,10 @@ function Hub() { const DownloadButtonPlaceholder = useMemo(() => { return ({ model }: ModelProps) => { - const modelId = model.models[0]?.id + const modelId = + model.models.find((e) => + defaultModelQuantizations.some((m) => e.id.toLowerCase().includes(m)) + )?.id ?? model.models[0]?.id const isDownloading = downloadProcesses.some((e) => e.id === modelId) const downloadProgress = downloadProcesses.find((e) => e.id === modelId)?.progress || 0 @@ -460,7 +464,15 @@ function Hub() {
- {toGigabytes(model.models?.[0]?.size)} + {toGigabytes( + ( + model.models.find((m) => + defaultModelQuantizations.some((e) => + m.id.toLowerCase().includes(e) + ) + ) ?? model.models?.[0] + )?.size + )}
diff --git a/web-app/src/utils/models.ts b/web-app/src/utils/models.ts deleted file mode 100644 index 7f8756f67..000000000 --- a/web-app/src/utils/models.ts +++ /dev/null @@ -1,185 +0,0 @@ -export const hardcodedModel = { - author: 'menlo', - id: 'menlo/jan-nano', - metadata: { - '_id': '68492cd9cada68b1d11ca1bd', - 'author': 'Menlo', - 'cardData': { - license: 'apache-2.0', - pipeline_tag: 'text-generation', - }, - 'createdAt': '2025-06-11T07:14:33.000Z', - 'description': - '---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)', - 'disabled': false, - 'downloads': 1434, - 'gated': false, - 'gguf': { - architecture: 'qwen3', - bos_token: '<|endoftext|>', - chat_template: - "{%- if tools %} {{- '<|im_start|>system\\n' }} {%- if messages[0].role == 'system' %} {{- messages[0].content + '\\n\\n' }} {%- endif %} {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }} {%- for tool in tools %} {{- \"\\n\" }} {{- tool | tojson }} {%- endfor %} {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }} {%- else %} {%- if messages[0].role == 'system' %} {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} {%- for message in messages[::-1] %} {%- set index = (messages|length - 1) - loop.index0 %} {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} {%- set ns.multi_step_tool = false %} {%- set ns.last_query_index = index %} {%- endif %} {%- endfor %} {%- for message in messages %} {%- if message.content is string %} {%- set content = message.content %} {%- else %} {%- set content = '' %} {%- endif %} {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %} {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }} {%- elif message.role == \"assistant\" %} {%- set reasoning_content = '' %} {%- if message.reasoning_content is string %} {%- set reasoning_content = message.reasoning_content %} {%- else %} {%- if '' in content %} {%- set reasoning_content = content.split('')[0].rstrip('\\n').split('')[-1].lstrip('\\n') %} {%- set content = content.split('')[-1].lstrip('\\n') %} {%- endif %} {%- endif %} {%- if loop.index0 > ns.last_query_index %} {%- if loop.last or (not loop.last and reasoning_content) %} {{- '<|im_start|>' + message.role + '\\n\\n' + reasoning_content.strip('\\n') + '\\n\\n\\n' + content.lstrip('\\n') }} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- if message.tool_calls %} {%- for tool_call in message.tool_calls %} {%- if (loop.first and content) or (not loop.first) %} {{- '\\n' }} {%- endif %} {%- if tool_call.function %} {%- set tool_call = tool_call.function %} {%- endif %} {{- '\\n{\"name\": \"' }} {{- tool_call.name }} {{- '\", \"arguments\": ' }} {%- if tool_call.arguments is string %} {{- tool_call.arguments }} {%- else %} {{- tool_call.arguments | tojson }} {%- endif %} {{- '}\\n' }} {%- endfor %} {%- endif %} {{- '<|im_end|>\\n' }} {%- elif message.role == \"tool\" %} {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %} {{- '<|im_start|>user' }} {%- endif %} {{- '\\n\\n' }} {{- content }} {{- '\\n' }} {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %} {{- '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {{- '<|im_start|>assistant\\n' }} {{- '\\n\\n\\n\\n' }} {%- endif %}", - context_length: 40960, - eos_token: '<|im_end|>', - quantize_imatrix_file: 'imatrix.dat', - total: 4022468096, - }, - 'id': 'Menlo/Jan-nano', - 'lastModified': '2025-06-13T16:57:55.000Z', - 'likes': 3, - 'model-index': null, - 'modelId': 'Menlo/Jan-nano', - 'pipeline_tag': 'text-generation', - 'private': false, - 'sha': 'a04aab0878648d8f284c63a52664a482ead16f06', - 'siblings': [ - { - rfilename: '.gitattributes', - size: 3460, - }, - { - rfilename: 'README.md', - size: 776, - }, - { - rfilename: 'jan-nano-0.4-iQ4_XS.gguf', - size: 2270750400, - }, - { - rfilename: 'jan-nano-4b-Q3_K_L.gguf', - size: 2239784384, - }, - { - rfilename: 'jan-nano-4b-Q3_K_M.gguf', - size: 2075616704, - }, - { - rfilename: 'jan-nano-4b-Q3_K_S.gguf', - size: 1886995904, - }, - { - rfilename: 'jan-nano-4b-Q4_0.gguf', - size: 2369545664, - }, - { - rfilename: 'jan-nano-4b-Q4_1.gguf', - size: 2596627904, - }, - { - rfilename: 'jan-nano-4b-Q4_K_M.gguf', - size: 2497279424, - }, - { - rfilename: 'jan-nano-4b-Q4_K_S.gguf', - size: 2383308224, - }, - { - rfilename: 'jan-nano-4b-Q5_0.gguf', - size: 2823710144, - }, - { - rfilename: 'jan-nano-4b-Q5_1.gguf', - size: 3050792384, - }, - { - rfilename: 'jan-nano-4b-Q5_K_M.gguf', - size: 2889512384, - }, - { - rfilename: 'jan-nano-4b-Q5_K_S.gguf', - size: 2823710144, - }, - { - rfilename: 'jan-nano-4b-Q6_K.gguf', - size: 3306259904, - }, - { - rfilename: 'jan-nano-4b-Q8_0.gguf', - size: 4280403904, - }, - ], - 'spaces': [], - 'tags': [ - 'gguf', - 'text-generation', - 'license:apache-2.0', - 'endpoints_compatible', - 'region:us', - 'imatrix', - 'conversational', - ], - 'usedStorage': 93538518464, - 'widgetData': [ - { - text: 'Hi, what can you help me with?', - }, - { - text: 'What is 84 * 3 / 2?', - }, - { - text: 'Tell me an interesting fact about the universe!', - }, - { - text: 'Explain quantum computing in simple terms.', - }, - ], - }, - models: [ - { - id: 'menlo:jan-nano:jan-nano-0.4-iQ4_XS.gguf', - size: 2270750400, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q3_K_L.gguf', - size: 2239784384, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q3_K_M.gguf', - size: 2075616704, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q3_K_S.gguf', - size: 1886995904, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q4_0.gguf', - size: 2369545664, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q4_1.gguf', - size: 2596627904, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q4_K_M.gguf', - size: 2497279424, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q4_K_S.gguf', - size: 2383308224, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q5_0.gguf', - size: 2823710144, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q5_1.gguf', - size: 3050792384, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q5_K_M.gguf', - size: 2889512384, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q5_K_S.gguf', - size: 2823710144, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q6_K.gguf', - size: 3306259904, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q8_0.gguf', - size: 4280403904, - }, - ], -}