🐛fix: default onboarding model should use recommended quantizations (#5273)

* 🐛fix: default onboarding model should use recommended quantizations * ✨enhancement: show context shift option in provider settings * 🔧chore: wording
2025-06-15 02:34:14 +07:00 · 2025-06-15 02:34:14 +07:00 · 2634659366
commit 2634659366
parent a10dbef2c8
5 changed files with 35 additions and 189 deletions
--- a/extensions/download-extension/package.json
+++ b/extensions/download-extension/package.json
@ -2,7 +2,7 @@
  "name": "@janhq/download-extension",
  "productName": "Download Manager",
  "version": "1.0.0",
-  "description": "Handle downloads",
+  "description": "Download and manage files and AI models in Jan.",
  "main": "dist/index.js",
  "author": "Jan <service@jan.ai>",
  "license": "AGPL-3.0",
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
@ -8,6 +8,15 @@
      "value": true
    }
  },
+  {
+    "key": "context_shift",
+    "title": "Context Shift",
+    "description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.",
+    "controllerType": "checkbox",
+    "controllerProps": {
+      "value": true
+    }
+  },
  {
    "key": "cont_batching",
    "title": "Continuous Batching",
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@ -37,6 +37,7 @@ enum Settings {
  cpu_threads = 'cpu_threads',
  huggingfaceToken = 'hugging-face-access-token',
  auto_unload_models = 'auto_unload_models',
+  context_shift = 'context_shift',
 }

 type LoadedModelResponse = { data: { engine: string; id: string }[] }
@ -63,6 +64,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
  cpu_threads?: number
  auto_unload_models: boolean = true
  reasoning_budget = -1 // Default reasoning budget in seconds
+  context_shift = true
  /**
   * The URL for making inference requests.
   */
@ -128,6 +130,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
      true
    )
    this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
+    this.context_shift = await this.getSetting<boolean>(
+      Settings.context_shift,
+      true
+    )
    this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
    if (this.caching_enabled)
      this.cache_type = await this.getSetting<string>(Settings.cache_type, 'q8')
@ -209,6 +215,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
      this.updateCortexConfig({ huggingface_token: value })
    } else if (key === Settings.auto_unload_models) {
      this.auto_unload_models = value as boolean
+    } else if (key === Settings.context_shift && typeof value === 'boolean') {
+      this.context_shift = value
    }
  }

@ -271,7 +279,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
            ...(model.id.toLowerCase().includes('jan-nano')
              ? { reasoning_budget: 0 }
              : { reasoning_budget: this.reasoning_budget }),
-            ...{ 'no-context-shift': true },
+            ...(this.context_shift === false
+              ? { 'no-context-shift': true }
+              : {}),
          },
          timeout: false,
          signal,
--- a/web-app/src/routes/hub.tsx
+++ b/web-app/src/routes/hub.tsx
@ -49,6 +49,7 @@ type ModelProps = {
 type SearchParams = {
  repo: string
 }
+const defaultModelQuantizations = ['iq4_xs.gguf', 'q4_k_m.gguf']

 export const Route = createFileRoute(route.hub as any)({
  component: Hub,
@ -219,7 +220,10 @@ function Hub() {

  const DownloadButtonPlaceholder = useMemo(() => {
    return ({ model }: ModelProps) => {
-      const modelId = model.models[0]?.id
+      const modelId =
+        model.models.find((e) =>
+          defaultModelQuantizations.some((m) => e.id.toLowerCase().includes(m))
+        )?.id ?? model.models[0]?.id
      const isDownloading = downloadProcesses.some((e) => e.id === modelId)
      const downloadProgress =
        downloadProcesses.find((e) => e.id === modelId)?.progress || 0
@ -460,7 +464,15 @@ function Hub() {
                            </Link>
                            <div className="shrink-0 space-x-3 flex items-center">
                              <span className="text-main-view-fg/70 font-medium text-xs">
-                                {toGigabytes(model.models?.[0]?.size)}
+                                {toGigabytes(
+                                  (
+                                    model.models.find((m) =>
+                                      defaultModelQuantizations.some((e) =>
+                                        m.id.toLowerCase().includes(e)
+                                      )
+                                    ) ?? model.models?.[0]
+                                  )?.size
+                                )}
                              </span>
                              <DownloadButtonPlaceholder model={model} />
                            </div>
--- a/web-app/src/utils/models.ts
+++ b/web-app/src/utils/models.ts
@ -1,185 +0,0 @@
-export const hardcodedModel = {
-  author: 'menlo',
-  id: 'menlo/jan-nano',
-  metadata: {
-    '_id': '68492cd9cada68b1d11ca1bd',
-    'author': 'Menlo',
-    'cardData': {
-      license: 'apache-2.0',
-      pipeline_tag: 'text-generation',
-    },
-    'createdAt': '2025-06-11T07:14:33.000Z',
-    'description':
-      '---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)',
-    'disabled': false,
-    'downloads': 1434,
-    'gated': false,
-    'gguf': {
-      architecture: 'qwen3',
-      bos_token: '<|endoftext|>',
-      chat_template:
-        "{%- if tools %} {{- '<|im_start|>system\\n' }} {%- if messages[0].role == 'system' %} {{- messages[0].content + '\\n\\n' }} {%- endif %} {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }} {%- for tool in tools %} {{- \"\\n\" }} {{- tool | tojson }} {%- endfor %} {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }} {%- else %} {%- if messages[0].role == 'system' %} {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} {%- for message in messages[::-1] %} {%- set index = (messages|length - 1) - loop.index0 %} {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %} {%- set ns.multi_step_tool = false %} {%- set ns.last_query_index = index %} {%- endif %} {%- endfor %} {%- for message in messages %} {%- if message.content is string %} {%- set content = message.content %} {%- else %} {%- set content = '' %} {%- endif %} {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %} {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }} {%- elif message.role == \"assistant\" %} {%- set reasoning_content = '' %} {%- if message.reasoning_content is string %} {%- set reasoning_content = message.reasoning_content %} {%- else %} {%- if '</think>' in content %} {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %} {%- set content = content.split('</think>')[-1].lstrip('\\n') %} {%- endif %} {%- endif %} {%- if loop.index0 > ns.last_query_index %} {%- if loop.last or (not loop.last and reasoning_content) %} {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- if message.tool_calls %} {%- for tool_call in message.tool_calls %} {%- if (loop.first and content) or (not loop.first) %} {{- '\\n' }} {%- endif %} {%- if tool_call.function %} {%- set tool_call = tool_call.function %} {%- endif %} {{- '<tool_call>\\n{\"name\": \"' }} {{- tool_call.name }} {{- '\", \"arguments\": ' }} {%- if tool_call.arguments is string %} {{- tool_call.arguments }} {%- else %} {{- tool_call.arguments | tojson }} {%- endif %} {{- '}\\n</tool_call>' }} {%- endfor %} {%- endif %} {{- '<|im_end|>\\n' }} {%- elif message.role == \"tool\" %} {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %} {{- '<|im_start|>user' }} {%- endif %} {{- '\\n<tool_response>\\n' }} {{- content }} {{- '\\n</tool_response>' }} {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %} {{- '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {{- '<|im_start|>assistant\\n' }} {{- '<think>\\n\\n</think>\\n\\n' }} {%- endif %}",
-      context_length: 40960,
-      eos_token: '<|im_end|>',
-      quantize_imatrix_file: 'imatrix.dat',
-      total: 4022468096,
-    },
-    'id': 'Menlo/Jan-nano',
-    'lastModified': '2025-06-13T16:57:55.000Z',
-    'likes': 3,
-    'model-index': null,
-    'modelId': 'Menlo/Jan-nano',
-    'pipeline_tag': 'text-generation',
-    'private': false,
-    'sha': 'a04aab0878648d8f284c63a52664a482ead16f06',
-    'siblings': [
-      {
-        rfilename: '.gitattributes',
-        size: 3460,
-      },
-      {
-        rfilename: 'README.md',
-        size: 776,
-      },
-      {
-        rfilename: 'jan-nano-0.4-iQ4_XS.gguf',
-        size: 2270750400,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q3_K_L.gguf',
-        size: 2239784384,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q3_K_M.gguf',
-        size: 2075616704,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q3_K_S.gguf',
-        size: 1886995904,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q4_0.gguf',
-        size: 2369545664,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q4_1.gguf',
-        size: 2596627904,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q4_K_M.gguf',
-        size: 2497279424,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q4_K_S.gguf',
-        size: 2383308224,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q5_0.gguf',
-        size: 2823710144,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q5_1.gguf',
-        size: 3050792384,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q5_K_M.gguf',
-        size: 2889512384,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q5_K_S.gguf',
-        size: 2823710144,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q6_K.gguf',
-        size: 3306259904,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q8_0.gguf',
-        size: 4280403904,
-      },
-    ],
-    'spaces': [],
-    'tags': [
-      'gguf',
-      'text-generation',
-      'license:apache-2.0',
-      'endpoints_compatible',
-      'region:us',
-      'imatrix',
-      'conversational',
-    ],
-    'usedStorage': 93538518464,
-    'widgetData': [
-      {
-        text: 'Hi, what can you help me with?',
-      },
-      {
-        text: 'What is 84 * 3 / 2?',
-      },
-      {
-        text: 'Tell me an interesting fact about the universe!',
-      },
-      {
-        text: 'Explain quantum computing in simple terms.',
-      },
-    ],
-  },
-  models: [
-    {
-      id: 'menlo:jan-nano:jan-nano-0.4-iQ4_XS.gguf',
-      size: 2270750400,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q3_K_L.gguf',
-      size: 2239784384,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q3_K_M.gguf',
-      size: 2075616704,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q3_K_S.gguf',
-      size: 1886995904,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q4_0.gguf',
-      size: 2369545664,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q4_1.gguf',
-      size: 2596627904,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q4_K_M.gguf',
-      size: 2497279424,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q4_K_S.gguf',
-      size: 2383308224,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q5_0.gguf',
-      size: 2823710144,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q5_1.gguf',
-      size: 3050792384,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q5_K_M.gguf',
-      size: 2889512384,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q5_K_S.gguf',
-      size: 2823710144,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q6_K.gguf',
-      size: 3306259904,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q8_0.gguf',
-      size: 4280403904,
-    },
-  ],
-}