From 2b8c448be49e0002fb97795c2c88035003026460 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Fri, 13 Jun 2025 11:37:34 +0700
Subject: [PATCH 01/48] chore: enable shortcut zoom (#5261)

* chore: enable shortcut zoom

* chore: update shortcut setting
---
 src-tauri/capabilities/default.json       | 15 +++++----------
 src-tauri/tauri.conf.json                 |  1 +
 web-app/src/routes/settings/shortcuts.tsx | 22 ++++++++++++++++++++++
 3 files changed, 28 insertions(+), 10 deletions(-)
diff --git a/src-tauri/capabilities/default.json b/src-tauri/capabilities/default.json
index 2dd5925c3..76c9bc567 100644
--- a/src-tauri/capabilities/default.json
+++ b/src-tauri/capabilities/default.json
@@ -2,16 +2,13 @@
   "$schema": "../gen/schemas/desktop-schema.json",
   "identifier": "default",
   "description": "enables the default permissions",
-  "windows": [
-    "main"
-  ],
+  "windows": ["main"],
   "remote": {
-    "urls": [
-      "http://*"
-    ]
+    "urls": ["http://*"]
   },
   "permissions": [
     "core:default",
+    "core:webview:allow-set-webview-zoom",
     "core:window:allow-start-dragging",
     "core:window:allow-set-theme",
     "shell:allow-spawn",
@@ -81,9 +78,7 @@
     {
       "identifier": "opener:allow-open-url",
       "description": "opens the default permissions for the core module",
-      "windows": [
-        "*"
-      ],
+      "windows": ["*"],
       "allow": [
         {
           "url": "https://*"
@@ -98,4 +93,4 @@
     },
     "store:default"
   ]
-}
\ No newline at end of file
+}
diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json
index 545873172..1810c0661 100644
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@@ -13,6 +13,7 @@
     "macOSPrivateApi": true,
     "windows": [
       {
+        "zoomHotkeysEnabled": true,
         "label": "main",
         "title": "Jan",
         "width": 1024,
diff --git a/web-app/src/routes/settings/shortcuts.tsx b/web-app/src/routes/settings/shortcuts.tsx
index ae28acae0..2d46ad7c2 100644
--- a/web-app/src/routes/settings/shortcuts.tsx
+++ b/web-app/src/routes/settings/shortcuts.tsx
@@ -47,6 +47,28 @@ function Shortcuts() {
                   </div>
                 }
               />
+              <CardItem
+                title="Zoom In"
+                description="Increase the zoom level"
+                actions={
+                  <div className="flex items-center justify-center px-3 py-1 bg-main-view-fg/5 rounded-md">
+                    <span className="font-medium">
+                      <PlatformMetaKey /> +
+                    </span>
+                  </div>
+                }
+              />
+              <CardItem
+                title="Zoom Out"
+                description="Decrease the zoom level"
+                actions={
+                  <div className="flex items-center justify-center px-3 py-1 bg-main-view-fg/5 rounded-md">
+                    <span className="font-medium">
+                      <PlatformMetaKey /> -
+                    </span>
+                  </div>
+                }
+              />
             </Card>
 
             {/* Chat */}

From e04dfc73c7ebf4703bf5b4543da3c82cd06fb2c3 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Fri, 13 Jun 2025 15:22:55 +0700
Subject: [PATCH 02/48] fix: thinking block (#5263)

---
 web-app/src/containers/ThreadContent.tsx | 2 +-
 web-app/src/routes/hub.tsx               | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx
index 2ad4477e6..f067a4431 100644
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@@ -184,7 +184,7 @@ export const ThreadContent = memo(
       | undefined
 
     return (
-      <Fragment key={item.id}>
+      <Fragment>
         {item.content?.[0]?.text && item.role === 'user' && (
           <div className="w-full">
             <div className="flex justify-end w-full h-full text-start break-words whitespace-normal">
diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx
index 9ae3d151e..8c299bd5f 100644
--- a/web-app/src/routes/hub.tsx
+++ b/web-app/src/routes/hub.tsx
@@ -236,7 +236,8 @@ function Hub() {
           <div
             className={cn(
               'flex items-center gap-2 w-20 ',
-              !isDownloading && 'opacity-0 visibility-hidden w-0'
+              !isDownloading &&
+                'opacity-0 visibility-hidden w-0 pointer-events-none'
             )}
           >
             <Progress value={downloadProgress * 100} />

From 23240f958e3a6bde152167fd2de041cd40398278 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Fri, 13 Jun 2025 17:05:49 +0700
Subject: [PATCH 03/48] Merge pull request #5262 from
 menloresearch/chore/sync-new-hub-data

chore: sync new hub data
---
 extensions/assistant-extension/src/index.ts   |    2 +-
 .../model-extension/resources/default.json    | 6113 +++++++++++++++--
 web-app/src/hooks/useAssistant.ts             |    2 +-
 web-app/src/utils/models.ts                   |   36 +-
 4 files changed, 5672 insertions(+), 481 deletions(-)

diff --git a/extensions/assistant-extension/src/index.ts b/extensions/assistant-extension/src/index.ts
index 6706e5ff3..a0bbe71f1 100644
--- a/extensions/assistant-extension/src/index.ts
+++ b/extensions/assistant-extension/src/index.ts
@@ -75,7 +75,7 @@ export default class JanAssistantExtension extends AssistantExtension {
       'Jan is a helpful desktop assistant that can reason through complex tasks and use tools to complete them on the user’s behalf.',
     model: '*',
     instructions:
-      'Jan is a helpful desktop assistant that can reason through complex tasks and use tools to complete them on the user’s behalf. Respond naturally and concisely, take actions when needed, and guide the user toward their goals.',
+      'You have access to a set of tools to help you answer the user’s question. You can use only one tool per message, and you’ll receive the result of that tool in the user’s next response. To complete a task, use tools step by step—each step should be guided by the outcome of the previous one.\nTool Usage Rules:\n1. Always provide the correct values as arguments when using tools. Do not pass variable names—use actual values instead.\n2. You may perform multiple tool steps to complete a task.\n3. Avoid repeating a tool call with exactly the same parameters to prevent infinite loops.',
     tools: [
       {
         type: 'retrieval',
diff --git a/extensions/model-extension/resources/default.json b/extensions/model-extension/resources/default.json
index ce17c9616..dbd9d906e 100644
--- a/extensions/model-extension/resources/default.json
+++ b/extensions/model-extension/resources/default.json
@@ -1,25 +1,146 @@
 [
   {
-    "author": "CohereForAI",
-    "id": "cortexso/command-r",
+    "author": "PrimeIntellect",
+    "id": "cortexso/intellect-2",
     "metadata": {
-      "_id": "66751b98585f2bf57092b2ae",
+      "_id": "6821ac2482ae7d76d34abdb8",
       "author": "cortexso",
       "cardData": {
-        "license": "cc-by-nc-4.0"
+        "license": "apache-2.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp", "featured"]
       },
-      "createdAt": "2024-06-21T06:20:08.000Z",
-      "description": "---\nlicense: cc-by-nc-4.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nC4AI Command-R is a research release of a 35 billion parameter highly performant generative model. Command-R is a large language model with open weights optimized for a variety of use cases including reasoning, summarization, and question answering. Command-R has the capability for multilingual generation evaluated in 10 languages and highly performant RAG capabilities.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Command-r-32b](https://huggingface.co/cortexhub/command-r/tree/32b) | `cortex run command-r:32b` |\n| 1 | [Command-r-35b](https://huggingface.co/cortexhub/command-r/tree/35b) | `cortex run command-r:35b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/command-r\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run command-r\n    ```\n    \n## Credits\n\n- **Author:** Cohere For AI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://cohere.com/c4ai-cc-by-nc-license)",
+      "createdAt": "2025-05-12T08:07:00.000Z",
+      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\n**Prime Intellect** released **INTELLECT-2**, a 32 billion parameter large language model (LLM) trained through distributed reinforcement learning on globally donated GPU resources. Built on the **Qwen2** architecture and fine-tuned with the **prime-rl** framework, INTELLECT-2 demonstrates strong performance in math, coding, and logical reasoning.\n\nThis model leverages GRPO (Generalized Reinforcement Policy Optimization) over verifiable rewards, introducing asynchronous distributed RL training with enhanced stability techniques. While its primary focus was on verifiable mathematical and coding tasks, it remains compatible with general-purpose text generation tasks.\n\n## Variants\n\n### INTELLECT-2\n\n| No | Variant                                                                         | Branch | Cortex CLI command                 |\n|----|----------------------------------------------------------------------------------|--------|-----------------------------------|\n| 1  | [INTELLECT-2 (32B)](https://huggingface.co/cortexso/intellect-2/tree/32b) | 32b    | `cortex run intellect-2:32b`      |\n\nEach branch includes multiple GGUF quantized versions, optimized for various hardware configurations:\n- **INTELLECT-2-32B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n   ```bash\n   cortexso/intellect-2\n   ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n   ```bash\n   cortex run intellect-2\n   ```\n\n## Credits\n\n- **Author:** Prime Intellect\n- **Converter:** [Menlo Research](https://menlo.ai/)\n- **Original License:** [Apache-2.0](https://choosealicense.com/licenses/apache-2.0/)\n- **Paper:** [Intellect 2 Technical Report](https://storage.googleapis.com/public-technical-paper/INTELLECT_2_Technical_Report.pdf)",
       "disabled": false,
-      "downloads": 14,
+      "downloads": 1436,
       "gated": false,
-      "id": "cortexso/command-r",
-      "lastModified": "2024-11-12T20:13:19.000Z",
-      "likes": 1,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- '' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n  {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" and not message.tool_calls %}\n        {%- set content = message.content %}\n        {%- if not loop.last %}\n            {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n        {%- endif %}\n        {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set content = message.content %}\n        {%- if not loop.last %}\n            {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n        {%- endif %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n<think>\\n' }}\n{%- endif %}\n",
+        "context_length": 40960,
+        "eos_token": "<|im_end|>",
+        "total": 32763876352
+      },
+      "id": "cortexso/intellect-2",
+      "lastModified": "2025-05-12T14:18:35.000Z",
+      "likes": 0,
       "model-index": null,
-      "modelId": "cortexso/command-r",
+      "modelId": "cortexso/intellect-2",
+      "pipeline_tag": "text-generation",
       "private": false,
-      "sha": "ca1564f6a6d4d03181b01e87e6c3e3fc959c7103",
+      "sha": "9d237b26053af28e0119331e0dfbc75b45a0317b",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "intellect-2-q2_k.gguf"
+        },
+        {
+          "rfilename": "intellect-2-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "intellect-2-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "intellect-2-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "intellect-2-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "intellect-2-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "intellect-2-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "intellect-2-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "intellect-2-q6_k.gguf"
+        },
+        {
+          "rfilename": "intellect-2-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "featured",
+        "text-generation",
+        "license:apache-2.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 206130755200,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "intellect-2:32b",
+        "size": 19851336256
+      }
+    ]
+  },
+  {
+    "author": "Microsoft",
+    "id": "cortexso/phi-4-reasoning",
+    "metadata": {
+      "_id": "681857cda178d73748a1295f",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp", "featured"]
+      },
+      "createdAt": "2025-05-05T06:16:45.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\n**Microsoft Research** developed and released the **Phi-4-reasoning** series, a cutting-edge family of reasoning-focused language models optimized for chain-of-thought (CoT), step-by-step problem solving, and high-efficiency inference. These models excel in advanced mathematical reasoning, scientific Q&A, and instruction-following scenarios.\n\nThe Phi-4 models introduce extended context lengths, ChatML reasoning templates, and strong performance on benchmark datasets, while maintaining compact sizes that are ideal for memory- and latency-constrained environments.\n\n## Variants\n\n### Phi-4-reasoning\n\n| No | Variant                                                                            | Branch     | Cortex CLI command                   |\n|----|-------------------------------------------------------------------------------------|------------|-------------------------------------|\n| 1  | [phi-4-mini-reasoning](https://huggingface.co/microsoft/phi-4-mini-reasoning)      | 4b         | `cortex run phi4:4b`                |\n| 2  | [phi-4-reasoning](https://huggingface.co/microsoft/phi-4-reasoning-plus)      | 14b   | `cortex run phi4:14b`          |\n| 3  | [phi-4-reasoning-plus](https://huggingface.co/microsoft/phi-4-reasoning-plus)      | 14b-plus   | `cortex run phi4:14b-plus`          |\n\nEach branch supports multiple quantized GGUF versions:\n- **phi-4-mini-reasoning:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **phi-4-reasoning:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **phi-4-reasoning-plus:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n   ```bash\n   cortexso/phi4\n   ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n   ```bash\n   cortex run phi4\n   ```\n\n## Credits\n\n- **Author:** Microsoft Research\n- **Converter:** [Menlo Research](https://menlo.ai/)\n- **Original License:** [MIT License](https://opensource.org/license/mit/)\n- **Blogs:** [Phi-4 Reasoning Blog](https://www.microsoft.com/en-us/research/blog/)\n",
+      "disabled": false,
+      "downloads": 2894,
+      "gated": false,
+      "gguf": {
+        "architecture": "phi3",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{{ '<|system|>Your name is Phi, an AI math expert developed by Microsoft.' }}{% for message in messages %}{% if message['role'] == 'system' %} {{ message['content'] }}{% if 'tools' in message and message['tools'] is not none %}{{ '<|tool|>' + message['tools'] + '<|/tool|>' }}{% endif %}{% endif %}{% endfor %}{{ '<|end|>' }}{% for message in messages %}{% if message['role'] != 'system' %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}",
+        "context_length": 131072,
+        "eos_token": "<|endoftext|>",
+        "total": 3836021856
+      },
+      "id": "cortexso/phi-4-reasoning",
+      "lastModified": "2025-05-05T09:36:18.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/phi-4-reasoning",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "218f08078412d1bcd46e7ce48c4442b14b98164d",
       "siblings": [
         {
           "rfilename": ".gitattributes"
@@ -32,60 +153,2169 @@
         },
         {
           "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "phi-4-mini-reasoning-q2_k.gguf"
+        },
+        {
+          "rfilename": "phi-4-mini-reasoning-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "phi-4-mini-reasoning-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-4-mini-reasoning-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-4-mini-reasoning-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-4-mini-reasoning-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-4-mini-reasoning-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-4-mini-reasoning-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-4-mini-reasoning-q6_k.gguf"
+        },
+        {
+          "rfilename": "phi-4-mini-reasoning-q8_0.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-plus-q2_k.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-plus-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-plus-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-plus-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-plus-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-plus-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-plus-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-plus-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-plus-q6_k.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-plus-q8_0.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-q2_k.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-q6_k.gguf"
+        },
+        {
+          "rfilename": "phi-4-reasoning-q8_0.gguf"
         }
       ],
       "spaces": [],
-      "tags": ["license:cc-by-nc-4.0", "region:us"],
-      "usedStorage": 227869888992
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "featured",
+        "text-generation",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 212004788352,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
     },
     "models": [
       {
-        "id": "command-r:32b-gguf-q8-0",
-        "size": 34326891584
+        "id": "phi-4-reasoning:14b",
+        "size": 9053115968
       },
       {
-        "id": "command-r:35b-gguf",
-        "size": 21527041888
+        "id": "phi-4-reasoning:4b",
+        "size": 2491874464
       },
       {
-        "id": "command-r:32b-gguf-q6-k",
-        "size": 26505169984
+        "id": "phi-4-reasoning:14b-plus",
+        "size": 9053116000
+      }
+    ]
+  },
+  {
+    "author": "Internlm",
+    "id": "cortexso/internlm3-8b-it",
+    "metadata": {
+      "_id": "678dcf22fbe4dceca4562d1f",
+      "author": "cortexso",
+      "cardData": {
+        "license": "apache-2.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2025-01-20T04:20:50.000Z",
+      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**InternLM** developed and released the [InternLM3-8B-Instruct](https://huggingface.co/internlm/InternLM3-8B-Instruct), an 8-billion parameter instruction-tuned language model designed for general-purpose usage and advanced reasoning tasks. The model delivers state-of-the-art performance on reasoning and knowledge-intensive tasks, outperforming other models like Llama3.1-8B and Qwen2.5-7B. Trained on 4 trillion high-quality tokens, InternLM3 achieves exceptional efficiency, reducing training costs by over 75% compared to other models of similar scale. \n\nThe model features dual operational modes: a deep thinking mode for solving complex reasoning tasks through long chain-of-thought processes and a normal response mode for fluent and interactive user experiences. These capabilities make InternLM3-8B-Instruct ideal for applications in conversational AI, advanced reasoning, and general-purpose language understanding.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Internlm3-8b-it](https://huggingface.co/cortexso/internlm3-8b-it/tree/8b) | `cortex run internlm3-8b-it:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/internlm3-8b-it\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run internlm3-8b-it\n    ```\n\n## Credits\n\n- **Author:** InternLM\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/internlm/internlm3-8b-instruct/blob/main/LICENSE.txt)\n- **Papers:** [InternLM2 Technical Report](https://arxiv.org/abs/2403.17297)",
+      "disabled": false,
+      "downloads": 229,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<s>",
+        "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+        "context_length": 32768,
+        "eos_token": "<|im_end|>",
+        "total": 8804241408
+      },
+      "id": "cortexso/internlm3-8b-it",
+      "lastModified": "2025-03-03T05:57:41.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/internlm3-8b-it",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "957eb6aa16a10eda3ce1a87dcacfd99bda5c469a",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "internlm3-8b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "internlm3-8b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "internlm3-8b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "internlm3-8b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "internlm3-8b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "internlm3-8b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "internlm3-8b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "internlm3-8b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "internlm3-8b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "internlm3-8b-instruct-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2403.17297",
+        "license:apache-2.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 56027406208,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "internlm3-8b-it:8b",
+        "size": 5358623936
+      }
+    ]
+  },
+  {
+    "author": "Google",
+    "id": "cortexso/gemma3",
+    "metadata": {
+      "_id": "67d14a4c2e461dfe226bd1be",
+      "author": "cortexso",
+      "cardData": {
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp", "featured"]
+      },
+      "createdAt": "2025-03-12T08:48:12.000Z",
+      "description": "---\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n## Overview\n**Google** developed and released the **Gemma 3** series, featuring multiple model sizes with both pre-trained and instruction-tuned variants. These multimodal models handle both text and image inputs while generating text outputs, making them versatile for various applications. Gemma 3 models are built from the same research and technology used to create the Gemini models, offering state-of-the-art capabilities in a lightweight and accessible format.\n\nThe Gemma 3 models include four different sizes with open weights, providing excellent performance across tasks like question answering, summarization, and reasoning while maintaining efficiency for deployment in resource-constrained environments such as laptops, desktops, or custom cloud infrastructure.\n\n## Variants\n\n### Gemma 3\n| No | Variant                                                | Branch | Cortex CLI command            |\n| -- | ------------------------------------------------------ | ------ | ----------------------------- |\n| 1  | [Gemma-3-1B](https://huggingface.co/cortexso/gemma3/tree/1b)   | 1b     | `cortex run gemma3:1b`        |\n| 2  | [Gemma-3-4B](https://huggingface.co/cortexso/gemma3/tree/4b)   | 4b     | `cortex run gemma3:4b`        |\n| 3  | [Gemma-3-12B](https://huggingface.co/cortexso/gemma3/tree/12b) | 12b    | `cortex run gemma3:12b`       |\n| 4  | [Gemma-3-27B](https://huggingface.co/cortexso/gemma3/tree/27b) | 27b    | `cortex run gemma3:27b`       |\n\nEach branch contains a default quantized version.\n\n### Key Features\n- **Multimodal capabilities**: Handles both text and image inputs\n- **Large context window**: 128K tokens\n- **Multilingual support**: Over 140 languages\n- **Available in multiple sizes**: From 1B to 27B parameters\n- **Open weights**: For both pre-trained and instruction-tuned variants\n\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n   ```bash\n   cortexso/gemma3\n   ```\n\n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n   ```bash\n   cortex run gemma3\n   ```\n\n## Credits\n- **Author:** Google\n- **Original License:** [Gemma License](https://ai.google.dev/gemma/terms)\n- **Papers:** [Gemma 3 Technical Report](https://storage.googleapis.com/deepmind-media/gemma/Gemma3Report.pdf)",
+      "disabled": false,
+      "downloads": 5425,
+      "gated": false,
+      "gguf": {
+        "architecture": "gemma3",
+        "bos_token": "<bos>",
+        "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
+        "context_length": 131072,
+        "eos_token": "<eos>",
+        "total": 11765788416
+      },
+      "id": "cortexso/gemma3",
+      "lastModified": "2025-05-13T12:45:28.000Z",
+      "likes": 1,
+      "model-index": null,
+      "modelId": "cortexso/gemma3",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "289bd96e0dbb2f82e77c56c9c09d66ff76769895",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "gemma-3-12b-it-q2_k.gguf"
+        },
+        {
+          "rfilename": "gemma-3-12b-it-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "gemma-3-12b-it-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-3-12b-it-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-3-12b-it-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-3-12b-it-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-3-12b-it-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-3-12b-it-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-3-12b-it-q6_k.gguf"
+        },
+        {
+          "rfilename": "gemma-3-12b-it-q8_0.gguf"
+        },
+        {
+          "rfilename": "gemma-3-1b-it-q2_k.gguf"
+        },
+        {
+          "rfilename": "gemma-3-1b-it-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "gemma-3-1b-it-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-3-1b-it-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-3-1b-it-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-3-1b-it-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-3-1b-it-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-3-1b-it-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-3-1b-it-q6_k.gguf"
+        },
+        {
+          "rfilename": "gemma-3-1b-it-q8_0.gguf"
+        },
+        {
+          "rfilename": "gemma-3-27b-it-q2_k.gguf"
+        },
+        {
+          "rfilename": "gemma-3-27b-it-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "gemma-3-27b-it-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-3-27b-it-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-3-27b-it-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-3-27b-it-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-3-27b-it-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-3-27b-it-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-3-27b-it-q6_k.gguf"
+        },
+        {
+          "rfilename": "gemma-3-27b-it-q8_0.gguf"
+        },
+        {
+          "rfilename": "gemma-3-4b-it-q2_k.gguf"
+        },
+        {
+          "rfilename": "gemma-3-4b-it-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "gemma-3-4b-it-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-3-4b-it-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-3-4b-it-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-3-4b-it-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-3-4b-it-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-3-4b-it-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-3-4b-it-q6_k.gguf"
+        },
+        {
+          "rfilename": "gemma-3-4b-it-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "featured",
+        "text-generation",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 280561347040,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "gemma3:4b",
+        "size": 2489757760
       },
       {
-        "id": "command-r:32b-gguf-q5-km",
-        "size": 23051422784
+        "id": "gemma3:27b",
+        "size": 16546404640
       },
       {
-        "id": "command-r:32b-gguf-q4-km",
-        "size": 19800837184
+        "id": "gemma3:12b",
+        "size": 7300574912
       },
       {
-        "id": "command-r:32b-gguf-q5-ks",
-        "size": 22494366784
+        "id": "gemma3:1b",
+        "size": 806058144
+      }
+    ]
+  },
+  {
+    "author": "Qwen",
+    "id": "cortexso/qwen-qwq",
+    "metadata": {
+      "_id": "67c909487c87605263db5352",
+      "author": "cortexso",
+      "cardData": {
+        "license": "apache-2.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp", "featured"]
+      },
+      "createdAt": "2025-03-06T02:32:40.000Z",
+      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview  \n\n**QwQ** is the reasoning model of the **Qwen** series. Unlike conventional instruction-tuned models, **QwQ** is designed to think and reason, achieving significantly enhanced performance in downstream tasks, especially challenging problem-solving scenarios.  \n\n**QwQ-32B** is the **medium-sized** reasoning model in the QwQ family, capable of **competitive performance** against state-of-the-art reasoning models, such as **DeepSeek-R1** and **o1-mini**. It is optimized for tasks requiring logical deduction, multi-step reasoning, and advanced comprehension.  \n\nThe model is well-suited for **AI research, automated theorem proving, advanced dialogue systems, and high-level decision-making applications**.  \n\n## Variants  \n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [QwQ-32B](https://huggingface.co/cortexso/qwen-qwq/tree/main) | `cortex run qwen-qwq:32b` |  \n\n## Use it with Jan (UI)  \n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)  \n2. Use in Jan model Hub:  \n    ```bash\n    cortexso/qwen-qwq\n    ```  \n\n## Use it with Cortex (CLI)  \n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)  \n2. Run the model with command:  \n    ```bash\n    cortex run qwen-qwq\n    ```  \n\n## Credits  \n\n- **Author:** Qwen Team  \n- **Converter:** [Homebrew](https://www.homebrew.ltd/)  \n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)  \n- **Paper:** [Introducing QwQ-32B: The Medium-Sized Reasoning Model](https://qwenlm.github.io/blog/qwq-32b/)",
+      "disabled": false,
+      "downloads": 582,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- '' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n  {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" and not message.tool_calls %}\n        {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n        {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n<think>\\n' }}\n{%- endif %}\n",
+        "context_length": 131072,
+        "eos_token": "<|im_end|>",
+        "total": 32763876352
+      },
+      "id": "cortexso/qwen-qwq",
+      "lastModified": "2025-03-13T02:39:51.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/qwen-qwq",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "17e393edf64f5ecca3089b4b5822d05a165882bd",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "qwq-32b-q2_k.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-q6_k.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "featured",
+        "text-generation",
+        "license:apache-2.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 206130754880,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "qwen-qwq:32b",
+        "size": 19851336224
+      }
+    ]
+  },
+  {
+    "author": "DeepCogito",
+    "id": "cortexso/cogito-v1",
+    "metadata": {
+      "_id": "67f67ca2c68bea1f264edc11",
+      "author": "cortexso",
+      "cardData": {
+        "license": "apache-2.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp", "featured"]
+      },
+      "createdAt": "2025-04-09T13:56:50.000Z",
+      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\n**DeepCogito** introduces the **Cogito-v1 Preview** series, a powerful suite of hybrid reasoning models trained with Iterated Distillation and Amplification (IDA). These models are designed to push the boundaries of open-weight LLMs through scalable alignment and self-improvement strategies, offering unmatched performance across coding, STEM, multilingual, and agentic use cases.\n\nEach model in this series operates in both **standard** (direct answer) and **reasoning** (self-reflective) modes, significantly outperforming size-equivalent open models such as LLaMA, DeepSeek, and Qwen. The 70B variant notably surpasses the newly released LLaMA 4 109B MoE model in benchmarks.\n\n## Variants\n\n### Cogito-v1 Preview\n\n| No | Variant                                                                                         | Branch | Cortex CLI command                            |\n|----|--------------------------------------------------------------------------------------------------|--------|-----------------------------------------------|\n| 1  | [Cogito-v1-Preview-LLaMA-3B](https://huggingface.co/cortexso/cogito-v1/tree/3b)       | 3b     | `cortex run cognito-v1:3b`                     |\n| 2  | [Cogito-v1-Preview-LLaMA-8B](https://huggingface.co/cortexso/cogito-v1/tree/8b)       | 8b     | `cortex run cognito-v1:8b`                     |\n| 3  | [Cogito-v1-Preview-Qwen-14B](https://huggingface.co/cortexso/cogito-v1/tree/14b)       | 14b    | `cortex run cognito-v1:14b`                    |\n| 4  | [Cogito-v1-Preview-Qwen-32B](https://huggingface.co/cortexso/cogito-v1/tree/32b)       | 32b    | `cortex run cognito-v1:32b`                    |\n| 5  | [Cogito-v1-Preview-LLaMA-70B](https://huggingface.co/cortexso/cogito-v1/tree/70b)     | 70b    | `cortex run cognito-v1:70b`                    |\n\nEach branch contains a default quantized version:\n- **LLaMA-3B:** q4-km  \n- **LLaMA-8B:** q4-km  \n- **Qwen-14B:** q4-km  \n- **Qwen-32B:** q4-km  \n- **LLaMA-70B:** q4-km  \n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)  \n2. Use in Jan model Hub:  \n   ```bash\n   deepcogito/cognito-v1\n   ```\n## Use it with Cortex (CLI)\n\n1. Install Cortex using [Quickstart](https://cortex.so/)\n2. Run the model with command:\n  ```bash\n  cortex run cognito-v1\n  ```\n\n## Credits\n\n- **Author:** DeepCogito\n- **Original License:** [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n- **Papers:** [Cognito v1 Preview](https://www.deepcogito.com/research/cogito-v1-preview)",
+      "disabled": false,
+      "downloads": 4045,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<|begin_of_text|>",
+        "chat_template": "{{- bos_token }}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n{%- if not enable_thinking is defined %}\n    {%- set enable_thinking = false %}\n{%- endif %}\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n{#- Set the system message. If enable_thinking is true, add the \"Enable deep thinking subroutine.\" #}\n{%- if enable_thinking %}\n    {%- if system_message != \"\" %}\n        {%- set system_message = \"Enable deep thinking subroutine.\n\n\" ~ system_message %}\n    {%- else %}\n        {%- set system_message = \"Enable deep thinking subroutine.\" %}\n    {%- endif %}\n{%- endif %}\n{#- Set the system message. In case there are tools present, add them to the system message. #}\n{%- if tools is not none or system_message != '' %}\n    {{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n    {{- system_message }}\n    {%- if tools is not none %}\n        {%- if system_message != \"\" %}\n            {{- \"\n\n\" }}\n        {%- endif %}\n        {{- \"Available Tools:\n\" }}\n        {%- for t in tools %}\n            {{- t | tojson(indent=4) }}\n            {{- \"\n\n\" }}\n        {%- endfor %}\n    {%- endif %}\n    {{- \"<|eot_id|>\" }}\n{%- endif %}\n\n{#- Rest of the messages #}\n{%- for message in messages %}\n    {#- The special cases are when the message is from a tool (via role ipython/tool/tool_results) or when the message is from the assistant, but has \"tool_calls\". If not, we add the message directly as usual. #}\n    {#- Case 1 - Usual, non tool related message. #}\n    {%- if not (message.role == \"ipython\" or message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}\n        {%- if message['content'] is string %}\n            {{- message['content'] | trim }}\n        {%- else %}\n            {%- for item in message['content'] %}\n                {%- if item.type == 'text' %}\n                    {{- item.text | trim }}\n                {%- endif %}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|eot_id|>' }}\n    \n    {#- Case 2 - the response is from the assistant, but has a tool call returned. The assistant may also have returned some content along with the tool call. #}\n    {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n        {{- \"<|start_header_id|>assistant<|end_header_id|>\n\n\" }}\n        {%- if message['content'] is string %}\n            {{- message['content'] | trim }}\n        {%- else %}\n            {%- for item in message['content'] %}\n                {%- if item.type == 'text' %}\n                    {{- item.text | trim }}\n                    {%- if item.text | trim != \"\" %}\n                        {{- \"\n\n\" }}\n                    {%- endif %}\n                {%- endif %}\n            {%- endfor %}\n        {%- endif %}\n        {{- \"[\" }}\n        {%- for tool_call in message.tool_calls %}\n            {%- set out = tool_call.function|tojson %}\n            {%- if not tool_call.id is defined %}\n                {{- out }}\n            {%- else %}\n                {{- out[:-1] }}\n                {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n            {%- endif %}\n            {%- if not loop.last %}\n                {{- \", \" }}\n            {%- else %}\n                {{- \"]<|eot_id|>\" }}\n            {%- endif %}\n        {%- endfor %}\n    \n    {#- Case 3 - the response is from a tool call. The tool call may have an id associated with it as well. If it does, we add it to the prompt. #}\n    {%- elif message.role == \"ipython\" or message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n        {%- if message.tool_call_id is defined and message.tool_call_id != '' %}\n            {{- '{\"content\": ' + (message.content | tojson) + ', \"call_id\": \"' + message.tool_call_id + '\"}' }}\n        {%- else %}\n            {{- '{\"content\": ' + (message.content | tojson) + '}' }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}",
+        "context_length": 131072,
+        "eos_token": "<|eot_id|>",
+        "total": 3606752320
+      },
+      "id": "cortexso/cogito-v1",
+      "lastModified": "2025-04-10T03:02:13.000Z",
+      "likes": 3,
+      "model-index": null,
+      "modelId": "cortexso/cogito-v1",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "7e55c8c2946b9b48c606431e7a2eaf299c15b80d",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-3b-q2_k.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-3b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-3b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-3b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-3b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-3b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-3b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-3b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-3b-q6_k.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-3b-q8_0.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-70b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-8b-q2_k.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-8b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-8b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-8b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-8b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-8b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-8b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-8b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-8b-q6_k.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-llama-8b-q8_0.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-14b-q2_k.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-14b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-14b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-14b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-14b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-14b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-14b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-14b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-14b-q6_k.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-14b-q8_0.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-32b-q2_k.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-32b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-32b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-32b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-32b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-32b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-32b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-32b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-32b-q6_k.gguf"
+        },
+        {
+          "rfilename": "cogito-v1-preview-qwen-32b-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "featured",
+        "text-generation",
+        "license:apache-2.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 417094614784,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "cogito-v1:8b",
+        "size": 4920738752
       },
       {
-        "id": "command-r:32b-gguf-q2-k",
-        "size": 12810767424
+        "id": "cogito-v1:70b",
+        "size": 42520398016
       },
       {
-        "id": "command-r:32b-gguf-q3-kl",
-        "size": 17563438144
+        "id": "cogito-v1:3b",
+        "size": 2241004384
       },
       {
-        "id": "command-r:gguf",
-        "size": 21527041888
+        "id": "cogito-v1:32b",
+        "size": 19848503488
       },
       {
-        "id": "command-r:32b-gguf-q3-ks",
-        "size": 14708689984
+        "id": "cogito-v1:14b",
+        "size": 8985277888
+      }
+    ]
+  },
+  {
+    "author": "ibm-granite",
+    "id": "cortexso/granite-3.2-it",
+    "metadata": {
+      "_id": "67ab23c8e77c0a1c32f62879",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2025-02-11T10:17:44.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\nGranite-3.2-it is an advanced AI language model derived from the IBM Granite framework, specifically designed for instruction-following tasks in Italian. Its primary purpose is to facilitate human-like interactions by understanding and generating responses that are contextually relevant and coherent. This model can be effectively utilized in various applications, including customer support, content creation, and language translation, enhancing communication efficiency across diverse sectors. Its performance demonstrates a strong ability to comprehend nuanced instructions and generate accurate outputs, making it suitable for professional and creative environments alike. Overall, Granite-3.2-it stands out for its adaptability, responsiveness, and proficiency in Italian language tasks.\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Granite-3.2-it-8b](https://huggingface.co/cortexso/granite-3.2-it/tree/8b) | cortex run granite-3.2-it:8b|\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/granite-3.2-it\n    ```\n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run granite-3.2-it\n    ```\n## Credits\n- **Author:** ibm-granite\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://www.apache.org/licenses/LICENSE-2.0)\n- **Paper:** [IBM Granite 3.2 Blog](https://www.ibm.com/new/announcements/ibm-granite-3-2-open-source-reasoning-and-vision)",
+      "disabled": false,
+      "downloads": 352,
+      "gated": false,
+      "gguf": {
+        "architecture": "granite",
+        "bos_token": "<|end_of_text|>",
+        "chat_template": "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"Knowledge Cutoff Date: April 2024.\nToday's Date: \" + strftime_now('%B %d, %Y') + \".\nYou are Granite, developed by IBM.\" %}\n    {%- if tools and documents %}\n        {%- set system_message = system_message + \" You are a helpful AI assistant with access to the following tools. When a tool is required to answer the user's query, respond with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.\n\nWrite the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data.\" %}\n    {%- elif tools %}\n        {%- set system_message = system_message + \" You are a helpful AI assistant with access to the following tools. When a tool is required to answer the user's query, respond with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.\" %}\n    {%- elif documents %}\n        {%- set system_message = system_message + \" Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data.\" %}\n    {%- elif thinking %}\n    {%- set system_message = system_message + \" You are a helpful AI assistant.\nRespond to every user query in a comprehensive and detailed way. You can write down your thoughts and reasoning process before responding. In the thought process, engage in a comprehensive cycle of analysis, summarization, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. In the response section, based on various attempts, explorations, and reflections from the thoughts section, systematically present the final solution that you deem correct. The response should summarize the thought process. Write your thoughts after 'Here is my thought process:' and write your response after 'Here is my response:' for each user query.\" %}\n    {%- else %}\n        {%- set system_message = system_message + \" You are a helpful AI assistant.\" %}    \n    {%- endif %}\n    {%- if 'citations' in controls and documents %}\n        {%- set system_message = system_message + '\n\nIn your response, use the symbols <co> and </co> to indicate when a fact comes from a document in the search result, e.g <co>0</co> for a fact from document 0. Afterwards, list all the citations with their corresponding documents in an ordered list.' %}\n    {%- endif %}\n    {%- if 'hallucinations' in controls and documents %}\n        {%- set system_message = system_message + '\n\nFinally, after the response is written, include a numbered list of sentences from the response that are potentially hallucinated and not based in the documents.' %}\n    {%- endif %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{{- '<|start_of_role|>system<|end_of_role|>' + system_message + '<|end_of_text|>\n' }}\n{%- if tools %}\n    {{- '<|start_of_role|>tools<|end_of_role|>' }}\n    {{- tools | tojson(indent=4) }}\n    {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- if documents %}\n    {{- '<|start_of_role|>documents<|end_of_role|>' }}\n    {%- for document in documents %}\n        {{- 'Document ' + loop.index0 | string + '\n' }}\n        {{- document['text'] }}\n        {%- if not loop.last %}\n            {{- '\n\n'}}\n        {%- endif%}\n    {%- endfor %}\n    {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in loop_messages %}\n    {{- '<|start_of_role|>' + message['role'] + '<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n    {%- if loop.last and add_generation_prompt %}\n        {{- '<|start_of_role|>assistant' }}\n            {%- if controls %}\n                {{- ' ' + controls | tojson()}}\n            {%- endif %}\n        {{- '<|end_of_role|>' }}\n    {%- endif %}\n{%- endfor %}",
+        "context_length": 131072,
+        "eos_token": "<|end_of_text|>",
+        "total": 8170848256
+      },
+      "id": "cortexso/granite-3.2-it",
+      "lastModified": "2025-03-03T02:11:18.000Z",
+      "likes": 1,
+      "model-index": null,
+      "modelId": "cortexso/granite-3.2-it",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "2fb3d81e43760500c0ad28f9b7d047c75abc16dd",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "granite-3.2-8b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "granite-3.2-8b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "granite-3.2-8b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "granite-3.2-8b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "granite-3.2-8b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "granite-3.2-8b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "granite-3.2-8b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "granite-3.2-8b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "granite-3.2-8b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "granite-3.2-8b-instruct-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 56447768704,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "granite-3.2-it:8b",
+        "size": 4942859456
+      }
+    ]
+  },
+  {
+    "author": "allenai",
+    "id": "cortexso/olmo-2",
+    "metadata": {
+      "_id": "6746c45ca0de7ab99efe78d5",
+      "author": "cortexso",
+      "cardData": {
+        "license": "other",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-11-27T07:03:56.000Z",
+      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\nOLMo-2 is a series of Open Language Models designed to enable the science of language models. These models are trained on the Dolma dataset, with all code, checkpoints, logs (coming soon), and associated training details made openly available.\n\nThe OLMo-2 13B Instruct November 2024 is a post-trained variant of the OLMo-2 13B model, which has undergone supervised fine-tuning on an OLMo-specific variant of the Tülu 3 dataset. Additional training techniques include Direct Preference Optimization (DPO) and Reinforcement Learning from Virtual Rewards (RLVR), optimizing it for state-of-the-art performance across various tasks, including chat, MATH, GSM8K, and IFEval.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Olmo-2-7b](https://huggingface.co/cortexso/olmo-2/tree/7b) | `cortex run olmo-2:7b` |\n| 2 | [Olmo-2-13b](https://huggingface.co/cortexso/olmo-2/tree/13b) | `cortex run olmo-2:13b` |\n| 3 | [Olmo-2-32b](https://huggingface.co/cortexso/olmo-2/tree/32b) | `cortex run olmo-2:32b` |\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/olmo-2\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run olmo-2\n    ```\n    \n## Credits\n\n- **Author:** allenai\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Paper](https://arxiv.org/abs/2501.00656)",
+      "disabled": false,
+      "downloads": 352,
+      "gated": false,
+      "gguf": {
+        "architecture": "olmo2",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|assistant|>\n'  + message['content'] + eos_token + '\n' }}{% else %}{{ '<|assistant|>\n'  + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}{% endfor %}",
+        "context_length": 4096,
+        "eos_token": "<|endoftext|>",
+        "total": 32234279936
+      },
+      "id": "cortexso/olmo-2",
+      "lastModified": "2025-03-14T03:06:15.000Z",
+      "likes": 1,
+      "model-index": null,
+      "modelId": "cortexso/olmo-2",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "b76f7629d2da0ccc9535845bab99291e317de088",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "olmo-2-0325-32b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "olmo-2-0325-32b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "olmo-2-0325-32b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "olmo-2-0325-32b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "olmo-2-0325-32b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "olmo-2-0325-32b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "olmo-2-0325-32b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "olmo-2-0325-32b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "olmo-2-0325-32b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "olmo-2-0325-32b-instruct-q8_0.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-13b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-13b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-13b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-13b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-13b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-13b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-13b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-13b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-13b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-13b-instruct-q8_0.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-7b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-7b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-7b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-7b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-7b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-7b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-7b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-7b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-7b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "olmo-2-1124-7b-instruct-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2501.00656",
+        "license:other",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 335683989120,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "olmo-2:32b",
+        "size": 19482558496
       },
       {
-        "id": "command-r:32b-gguf-q3-km",
-        "size": 16231746624
+        "id": "olmo-2:13b",
+        "size": 8354349408
       },
       {
-        "id": "command-r:32b-gguf-q4-ks",
-        "size": 18849516608
+        "id": "olmo-2:7b",
+        "size": 4472020160
+      }
+    ]
+  },
+  {
+    "author": "Microsoft",
+    "id": "cortexso/phi-4",
+    "metadata": {
+      "_id": "677f682eb2e41c2f45dbee73",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2025-01-09T06:09:50.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\nPhi-4 model, a state-of-the-art 14B parameter Transformer designed for advanced reasoning, conversational AI, and high-quality text generation. Built on a mix of synthetic datasets, filtered public domain content, academic books, and Q&A datasets, Phi-4 ensures exceptional performance through data quality and alignment. It features a 16K token context length, trained on 9.8T tokens over 21 days using 1920 H100-80G GPUs. Phi-4 underwent rigorous fine-tuning and preference optimization to enhance instruction adherence and safety. Released on December 12, 2024, it represents a static model with data cutoff as of June 2024, suitable for diverse applications in research and dialogue systems.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Phi-4-14b](https://huggingface.co/cortexso/phi-4/tree/14b) | `cortex run phi-4:14b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```text\n    cortexso/phi-4\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run phi-4\n    ```\n\n## Credits\n\n- **Author:** Microsoft Research\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/microsoft/phi-4/blob/main/LICENSE)\n- **Papers:** [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)",
+      "disabled": false,
+      "downloads": 463,
+      "gated": false,
+      "gguf": {
+        "architecture": "phi3",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}",
+        "context_length": 16384,
+        "eos_token": "<|im_end|>",
+        "total": 14659507200
+      },
+      "id": "cortexso/phi-4",
+      "lastModified": "2025-03-02T15:30:47.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/phi-4",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "cc1f8271734a2ac438a1a7c60a62f111b9476524",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "phi-4-q2_k.gguf"
+        },
+        {
+          "rfilename": "phi-4-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "phi-4-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-4-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-4-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-4-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-4-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-4-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-4-q6_k.gguf"
+        },
+        {
+          "rfilename": "phi-4-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2412.08905",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 93205915520,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "phi-4:14b",
+        "size": 9053114560
+      }
+    ]
+  },
+  {
+    "author": "MistralAI",
+    "id": "cortexso/mistral-small-24b",
+    "metadata": {
+      "_id": "679c3a8f4061a1ab60e703b7",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2025-01-31T02:50:55.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\nThe 'mistral-small-24b' model is an advanced AI language model optimized for a variety of natural language processing tasks. It is particularly well-suited for applications such as text generation, chatbots, content summarization, and language translation. Built on the foundation of 'mistralai/Mistral-Small-24B-Base-2501', it leverages state-of-the-art techniques for understanding and generating human-like text. Users can expect significant improvements in fluency and contextual relevance, making it effective for both professional and creative use cases. Its efficiency allows for deployment in resource-constrained environments, catering to a diverse range of industries and applications.\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Mistral-Small-24b](https://huggingface.co/cortexso/mistral-small-24b/tree/24b) | cortex run mistral-small-24b:24b |\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    \n    ```bash\n    cortexso/mistral-small-24b\n    ```\n    \n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    \n    ```bash\n      cortex run mistral-small-24b\n    ```\n    \n## Credits\n- **Author:** mistralai\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)\n- **Paper:** [Mistral Small 3 Blog](https://mistral.ai/news/mistral-small-3)",
+      "disabled": false,
+      "downloads": 683,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<s>",
+        "context_length": 32768,
+        "eos_token": "</s>",
+        "total": 23572403200
+      },
+      "id": "cortexso/mistral-small-24b",
+      "lastModified": "2025-03-03T06:09:47.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/mistral-small-24b",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "5a28cb4b0f1aa4e0b55f527b71c88eb5b56ebd71",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "mistral-small-24b-base-2501-q2_k.gguf"
+        },
+        {
+          "rfilename": "mistral-small-24b-base-2501-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "mistral-small-24b-base-2501-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "mistral-small-24b-base-2501-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "mistral-small-24b-base-2501-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "mistral-small-24b-base-2501-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "mistral-small-24b-base-2501-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "mistral-small-24b-base-2501-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "mistral-small-24b-base-2501-q6_k.gguf"
+        },
+        {
+          "rfilename": "mistral-small-24b-base-2501-q8_0.gguf"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us"
+      ],
+      "usedStorage": 148517729600,
+      "widgetData": [
+        {
+          "text": "My name is Julien and I like to"
+        },
+        {
+          "text": "I like traveling by train because"
+        },
+        {
+          "text": "Paris is an amazing place to visit,"
+        },
+        {
+          "text": "Once upon a time,"
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "mistral-small-24b:24b",
+        "size": 14333907488
+      }
+    ]
+  },
+  {
+    "author": "DeepSeek-AI",
+    "id": "cortexso/deepseek-r1-distill-qwen-7b",
+    "metadata": {
+      "_id": "6790a5b2044aeb2bd5922877",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2025-01-22T08:00:50.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Qwen 7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) model, a distilled version of the Qwen 7B language model. This version is fine-tuned for high-performance text generation and optimized for dialogue and information-seeking tasks, providing even greater capabilities with its larger size compared to the 7B variant.\n\nThe model is designed for applications in customer support, conversational AI, and research, focusing on delivering accurate, helpful, and safe outputs while maintaining efficiency.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-qwen-7b-7b](https://huggingface.co/cortexso/deepseek-r1-distill-qwen-7b/tree/7b) | `cortex run deepseek-r1-distill-qwen-7b:7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepseek-r1-distill-qwen-7b\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepseek-r1-distill-qwen-7b\n    ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)",
+      "disabled": false,
+      "downloads": 1008,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<｜begin▁of▁sentence｜>",
+        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
+        "context_length": 131072,
+        "eos_token": "<｜end▁of▁sentence｜>",
+        "total": 7615616512
+      },
+      "id": "cortexso/deepseek-r1-distill-qwen-7b",
+      "lastModified": "2025-03-03T06:27:42.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/deepseek-r1-distill-qwen-7b",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "8e256fee6ed3616f3f90b0eb453083a115f1fe40",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-7b-q2_k.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-7b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-7b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-7b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-7b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-7b-q6_k.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-7b-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 53341802656,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "deepseek-r1-distill-qwen-7b:7b",
+        "size": 4683073184
+      }
+    ]
+  },
+  {
+    "author": "DeepSeek-AI",
+    "id": "cortexso/deepseek-r1-distill-qwen-14b",
+    "metadata": {
+      "_id": "678fdf2be186002cc0ba006e",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2025-01-21T17:53:47.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Qwen 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) model, a distilled version of the Qwen 14B language model. This variant represents the largest and most powerful model in the DeepSeek R1 Distill series, fine-tuned for high-performance text generation, dialogue optimization, and advanced reasoning tasks. \n\nThe model is designed for applications that require extensive understanding, such as conversational AI, research, large-scale knowledge systems, and customer service, providing superior performance in accuracy, efficiency, and safety.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-qwen-14b-14b](https://huggingface.co/cortexso/deepseek-r1-distill-qwen-14b/tree/14b) | `cortex run deepseek-r1-distill-qwen-14b:14b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepseek-r1-distill-qwen-14b\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepseek-r1-distill-qwen-14b\n    ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)",
+      "disabled": false,
+      "downloads": 1261,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<｜begin▁of▁sentence｜>",
+        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
+        "context_length": 131072,
+        "eos_token": "<｜end▁of▁sentence｜>",
+        "total": 14770033664
+      },
+      "id": "cortexso/deepseek-r1-distill-qwen-14b",
+      "lastModified": "2025-03-03T06:40:22.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/deepseek-r1-distill-qwen-14b",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "ca42c63b1c148ac7be176ef0ed8384d3775bed5b",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-14b-q2_k.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-14b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-14b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-14b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-14b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-14b-q6_k.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-14b-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 102845421536,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "deepseek-r1-distill-qwen-14b:14b",
+        "size": 8988109920
+      }
+    ]
+  },
+  {
+    "author": "DeepSeek-AI",
+    "id": "cortexso/deepseek-r1-distill-qwen-32b",
+    "metadata": {
+      "_id": "678fe132df84bd3d94f37e58",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2025-01-21T18:02:26.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Qwen 32B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) model, a distilled version of the Qwen 32B language model. This is the most advanced and largest model in the DeepSeek R1 Distill family, offering unparalleled performance in text generation, dialogue optimization, and reasoning tasks. \n\nThe model is tailored for large-scale applications in conversational AI, research, enterprise solutions, and knowledge systems, delivering exceptional accuracy, efficiency, and safety at scale.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-qwen-32b-32b](https://huggingface.co/cortexso/deepseek-r1-distill-qwen-32b/tree/32b) | `cortex run deepseek-r1-distill-qwen-32b:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepseek-r1-distill-qwen-32b\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepseek-r1-distill-qwen-32b\n    ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)",
+      "disabled": false,
+      "downloads": 597,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<｜begin▁of▁sentence｜>",
+        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
+        "context_length": 131072,
+        "eos_token": "<｜end▁of▁sentence｜>",
+        "total": 32763876352
+      },
+      "id": "cortexso/deepseek-r1-distill-qwen-32b",
+      "lastModified": "2025-03-03T06:41:05.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/deepseek-r1-distill-qwen-32b",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "0ec9981b2b5ad5c04a5357a3c328f10735efc79a",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-32b-q2_k.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-32b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-32b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-32b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-32b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-32b-q6_k.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-32b-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 225982083296,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "deepseek-r1-distill-qwen-32b:32b",
+        "size": 19851335520
+      }
+    ]
+  },
+  {
+    "author": "DeepSeek-AI",
+    "id": "cortexso/deepseek-r1-distill-llama-70b",
+    "metadata": {
+      "_id": "678fe1673b0a6384a4e1f887",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2025-01-21T18:03:19.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Llama 70B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B) model, a distilled version of the Llama 70B language model. This model represents the pinnacle of the DeepSeek R1 Distill series, designed for exceptional performance in text generation, dialogue tasks, and advanced reasoning, offering unparalleled capabilities for large-scale AI applications.\n\nThe model is ideal for enterprise-grade applications, research, conversational AI, and large-scale knowledge systems, providing top-tier accuracy, safety, and efficiency.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-llama-70b-70b](https://huggingface.co/cortexso/deepseek-r1-distill-llama-70b/tree/70b) | `cortex run deepseek-r1-distill-llama-70b:70b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepseek-r1-distill-llama-70b\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepseek-r1-distill-llama-70b\n    ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)",
+      "disabled": false,
+      "downloads": 580,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<｜begin▁of▁sentence｜>",
+        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
+        "context_length": 131072,
+        "eos_token": "<｜end▁of▁sentence｜>",
+        "total": 70553706560
+      },
+      "id": "cortexso/deepseek-r1-distill-llama-70b",
+      "lastModified": "2025-03-03T06:42:21.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/deepseek-r1-distill-llama-70b",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "d03fa1c83966573864075845a4b493af9aa8ed53",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-llama-70b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 85040791136,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "deepseek-r1-distill-llama-70b:70b",
+        "size": 42520395584
+      }
+    ]
+  },
+  {
+    "author": "DeepSeek-AI",
+    "id": "cortexso/deepseek-r1-distill-llama-8b",
+    "metadata": {
+      "_id": "678f4b5625a9b93997f1f666",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2025-01-21T07:23:02.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Llama 8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) model, a distilled version of the Llama 8B language model. This variant is fine-tuned for high-performance text generation, optimized for dialogue, and tailored for information-seeking tasks. It offers a robust balance between model size and performance, making it suitable for demanding conversational AI and research use cases.\n\nThe model is designed to deliver accurate, efficient, and safe responses in applications such as customer support, knowledge systems, and research environments.\n\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-llama-8b-8b](https://huggingface.co/cortexso/deepseek-r1-distill-llama-8b/tree/8b) | `cortex run deepseek-r1-distill-llama-8b:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepseek-r1-distill-llama-8b\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepseek-r1-distill-llama-8b\n    ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)",
+      "disabled": false,
+      "downloads": 933,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<｜begin▁of▁sentence｜>",
+        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
+        "context_length": 131072,
+        "eos_token": "<｜end▁of▁sentence｜>",
+        "total": 8030261312
+      },
+      "id": "cortexso/deepseek-r1-distill-llama-8b",
+      "lastModified": "2025-03-03T06:33:03.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/deepseek-r1-distill-llama-8b",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "b3321ad8a700b3aa2c3fc44ac84a167bd11ecdb8",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-llama-8b-q2_k.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-llama-8b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-llama-8b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-llama-8b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-llama-8b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-llama-8b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-llama-8b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-llama-8b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-llama-8b-q6_k.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-llama-8b-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 56187723232,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "deepseek-r1-distill-llama-8b:8b",
+        "size": 4920736256
+      }
+    ]
+  },
+  {
+    "author": "NovaSky-AI",
+    "id": "cortexso/sky-t1",
+    "metadata": {
+      "_id": "6782f82c860ee02fe01dbd60",
+      "author": "cortexso",
+      "cardData": {
+        "license": "apache-2.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2025-01-11T23:01:00.000Z",
+      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**NovaSky Team** developed and released the [Sky-T1](https://huggingface.co/novasky-ai/Sky-T1-32B-Preview), a 32-billion parameter reasoning model adapted from Qwen2.5-32B-Instruct. This model is designed for advanced reasoning, coding, and mathematical tasks, achieving performance comparable to state-of-the-art models like o1-preview while being cost-efficient. Sky-T1 was trained on 17K verified responses from Qwen/QwQ-32B-Preview, with additional science data from the Still-2 dataset, ensuring high-quality and diverse learning sources.\n\nThe model supports complex reasoning via long chain-of-thought processes and excels in both coding and mathematical challenges. Utilizing Llama-Factory with DeepSpeed Zero-3 Offload, Sky-T1 training was completed in just 19 hours on 8 H100 GPUs, demonstrating efficient resource utilization. These capabilities make Sky-T1 an exceptional tool for applications in programming, academic research, and reasoning-intensive tasks.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Sky-t1-32b](https://huggingface.co/cortexso/sky-t1/tree/32b) | `cortex run sky-t1:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/sky-t1\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run sky-t1\n    ```\n\n## Credits\n\n- **Author:** NovaSky Team\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Sky-T1: Fully Open-Source Reasoning Model](https://novasky-ai.github.io/posts/sky-t1/)",
+      "disabled": false,
+      "downloads": 116,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+        "context_length": 32768,
+        "eos_token": "<|im_end|>",
+        "total": 32763876352
+      },
+      "id": "cortexso/sky-t1",
+      "lastModified": "2025-03-03T05:51:45.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/sky-t1",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "05f06ab0191808f8eb21fa3c60c9ec4a6bef4978",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "sky-t1-32b-preview-q2_k.gguf"
+        },
+        {
+          "rfilename": "sky-t1-32b-preview-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "sky-t1-32b-preview-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "sky-t1-32b-preview-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "sky-t1-32b-preview-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "sky-t1-32b-preview-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "sky-t1-32b-preview-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "sky-t1-32b-preview-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "sky-t1-32b-preview-q6_k.gguf"
+        },
+        {
+          "rfilename": "sky-t1-32b-preview-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:apache-2.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 225982094944,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "sky-t1:32b",
+        "size": 19851336576
+      }
+    ]
+  },
+  {
+    "author": "CohereForAI",
+    "id": "cortexso/aya",
+    "metadata": {
+      "_id": "672aa4167f36760042e632ed",
+      "author": "cortexso",
+      "cardData": {
+        "license": "cc-by-nc-4.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-11-05T23:02:46.000Z",
+      "description": "---\nlicense: cc-by-nc-4.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**Cohere For AI** developed and released the [Aya 23](https://huggingface.co/CohereForAI/aya-23-35B), an open weights instruction fine-tuned model with advanced multilingual capabilities. Aya 23 is built upon the highly performant Command family of models and fine-tuned using the Aya Collection to deliver state-of-the-art performance across 23 languages. This multilingual large language model is designed to support a wide range of use cases, including multilingual text generation, understanding, and translation tasks.\n\nAya 23, balancing efficiency and performance. It offers robust multilingual support for languages such as Arabic, Chinese, English, Spanish, Hindi, Vietnamese, and more, making it a versatile tool for global applications. A 35-billion parameter version is also available [here](https://huggingface.co/CohereForAI/aya-23-35b).\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Aya-8b](https://huggingface.co/cortexso/aya/tree/8b) | `cortex run aya:8b` |\n| 2 | [Aya-35b](https://huggingface.co/cortexso/aya/tree/35b) | `cortex run aya:35b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/aya\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run aya\n    ```\n\n## Credits\n\n- **Author:** Cohere For AI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://spdx.org/licenses/CC-BY-NC-4.0)",
+      "disabled": false,
+      "downloads": 168,
+      "gated": false,
+      "gguf": {
+        "architecture": "command-r",
+        "bos_token": "<BOS_TOKEN>",
+        "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Aya, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
+        "context_length": 8192,
+        "eos_token": "<|END_OF_TURN_TOKEN|>",
+        "total": 34980831232
+      },
+      "id": "cortexso/aya",
+      "lastModified": "2025-03-02T14:58:34.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/aya",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "d97fef50adc54a22ec1e3133771f7cb17528742b",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "aya-23-35b-q2_k.gguf"
+        },
+        {
+          "rfilename": "aya-23-35b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "aya-23-35b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "aya-23-35b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "aya-23-35b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "aya-23-35b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "aya-23-35b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "aya-23-35b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "aya-23-35b-q6_k.gguf"
+        },
+        {
+          "rfilename": "aya-23-35b-q8_0.gguf"
+        },
+        {
+          "rfilename": "aya-23-8b-q2_k.gguf"
+        },
+        {
+          "rfilename": "aya-23-8b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "aya-23-8b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "aya-23-8b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "aya-23-8b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "aya-23-8b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "aya-23-8b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "aya-23-8b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "aya-23-8b-q6_k.gguf"
+        },
+        {
+          "rfilename": "aya-23-8b-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:cc-by-nc-4.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 302730192928,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "aya:35b",
+        "size": 21527043520
+      },
+      {
+        "id": "aya:8b",
+        "size": 5056974496
+      }
+    ]
+  },
+  {
+    "author": "PowerInfer",
+    "id": "cortexso/small-thinker",
+    "metadata": {
+      "_id": "6777192582e1ec3ecb79d1a4",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2025-01-02T22:54:29.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**PowerInfer** developed and released the [SmallThinker-3B-preview](https://huggingface.co/PowerInfer/SmallThinker-3B-Preview), a fine-tuned version of the Qwen2.5-3B-Instruct model. SmallThinker is optimized for efficient deployment on resource-constrained devices while maintaining high performance in reasoning, coding, and general text generation tasks. It outperforms its base model on key benchmarks, including AIME24, AMC23, and GAOKAO2024, making it a robust tool for both edge deployment and as a draft model for larger systems like QwQ-32B-Preview.\n\nSmallThinker was fine-tuned in two phases using high-quality datasets, including PowerInfer/QWQ-LONGCOT-500K and PowerInfer/LONGCOT-Refine-500K. Its small size allows for up to 70% faster inference speeds compared to larger models, making it ideal for applications requiring quick responses and efficient computation.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Small-thinker-3b](https://huggingface.co/cortexso/small-thinker/tree/3b) | `cortex run small-thinker:3b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/small-thinker\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run small-thinker\n    ```\n\n## Credits\n\n- **Author:** PowerInfer\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/PowerInfer/SmallThinker-3B-Preview/blob/main/LICENSE)",
+      "disabled": false,
+      "downloads": 273,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{% set system_message = 'You are a helpful assistant.' %}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
+        "context_length": 32768,
+        "eos_token": "<|im_end|>",
+        "total": 3397103616
+      },
+      "id": "cortexso/small-thinker",
+      "lastModified": "2025-03-03T06:05:50.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/small-thinker",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "f2746c69548d6ff92db6ec663400ad9a0dc51bbc",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "smallthinker-3b-preview-q2_k.gguf"
+        },
+        {
+          "rfilename": "smallthinker-3b-preview-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "smallthinker-3b-preview-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "smallthinker-3b-preview-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "smallthinker-3b-preview-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "smallthinker-3b-preview-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "smallthinker-3b-preview-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "smallthinker-3b-preview-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "smallthinker-3b-preview-q6_k.gguf"
+        },
+        {
+          "rfilename": "smallthinker-3b-preview-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 23981289568,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "small-thinker:3b",
+        "size": 2104931616
       }
     ]
   },
@@ -96,20 +2326,31 @@
       "_id": "66b06c37491b555fefe0a0bf",
       "author": "cortexso",
       "cardData": {
-        "license": "gemma"
+        "license": "gemma",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
       },
       "createdAt": "2024-08-05T06:07:51.000Z",
-      "description": "---\nlicense: gemma\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nThe [Gemma](https://huggingface.co/google/gemma-2-2b-it), state-of-the-art open model trained with the Gemma datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Gemma family with the 4B, 7B version in two variants 8K and 128K which is the context length (in tokens) that it can support.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Gemma2-2b](https://huggingface.co/cortexso/gemma2/tree/2b) | `cortex run gemma2:2b` |\n| 2 | [Gemma2-9b](https://huggingface.co/cortexso/gemma2/tree/9b) | `cortex run gemma2:9b` |\n| 3 | [Gemma2-27b](https://huggingface.co/cortexso/gemma2/tree/27b) | `cortex run gemma2:27b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/gemma2\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run gemma2\n    ```\n    \n## Credits\n\n- **Author:** Go\u200cogle\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://ai.google.dev/gemma/terms)\n- **Papers:** [Gemma Technical Report](https://arxiv.org/abs/2403.08295)",
+      "description": "---\nlicense: gemma\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nThe [Gemma](https://huggingface.co/google/gemma-2-2b-it), state-of-the-art open model trained with the Gemma datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Gemma family with the 4B, 7B version in two variants 8K and 128K which is the context length (in tokens) that it can support.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Gemma2-2b](https://huggingface.co/cortexso/gemma2/tree/2b) | `cortex run gemma2:2b` |\n| 2 | [Gemma2-9b](https://huggingface.co/cortexso/gemma2/tree/9b) | `cortex run gemma2:9b` |\n| 3 | [Gemma2-27b](https://huggingface.co/cortexso/gemma2/tree/27b) | `cortex run gemma2:27b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/gemma2\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run gemma2\n    ```\n    \n## Credits\n\n- **Author:** Go‌ogle\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://ai.google.dev/gemma/terms)\n- **Papers:** [Gemma Technical Report](https://arxiv.org/abs/2403.08295)",
       "disabled": false,
-      "downloads": 190,
+      "downloads": 796,
       "gated": false,
+      "gguf": {
+        "architecture": "gemma2",
+        "bos_token": "<bos>",
+        "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
+        "context_length": 8192,
+        "eos_token": "<eos>",
+        "total": 27227128320
+      },
       "id": "cortexso/gemma2",
-      "lastModified": "2024-11-12T20:13:02.000Z",
+      "lastModified": "2025-03-03T06:25:38.000Z",
       "likes": 0,
       "model-index": null,
       "modelId": "cortexso/gemma2",
+      "pipeline_tag": "text-generation",
       "private": false,
-      "sha": "5fe1c79fabadcd2cb59cd05f76019d0a5fd71ce0",
+      "sha": "36fdfde32513f2a0be9e1b166952d4cee227aaf6",
       "siblings": [
         {
           "rfilename": ".gitattributes"
@@ -117,6 +2358,96 @@
         {
           "rfilename": "README.md"
         },
+        {
+          "rfilename": "gemma-2-27b-it-q2_k.gguf"
+        },
+        {
+          "rfilename": "gemma-2-27b-it-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "gemma-2-27b-it-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-2-27b-it-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-2-27b-it-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-2-27b-it-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-2-27b-it-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-2-27b-it-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-2-27b-it-q6_k.gguf"
+        },
+        {
+          "rfilename": "gemma-2-27b-it-q8_0.gguf"
+        },
+        {
+          "rfilename": "gemma-2-2b-it-q2_k.gguf"
+        },
+        {
+          "rfilename": "gemma-2-2b-it-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "gemma-2-2b-it-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-2-2b-it-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-2-2b-it-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-2-2b-it-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-2-2b-it-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-2-2b-it-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-2-2b-it-q6_k.gguf"
+        },
+        {
+          "rfilename": "gemma-2-2b-it-q8_0.gguf"
+        },
+        {
+          "rfilename": "gemma-2-9b-it-q2_k.gguf"
+        },
+        {
+          "rfilename": "gemma-2-9b-it-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "gemma-2-9b-it-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-2-9b-it-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-2-9b-it-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-2-9b-it-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-2-9b-it-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-2-9b-it-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-2-9b-it-q6_k.gguf"
+        },
+        {
+          "rfilename": "gemma-2-9b-it-q8_0.gguf"
+        },
         {
           "rfilename": "metadata.yml"
         },
@@ -125,165 +2456,300 @@
         }
       ],
       "spaces": [],
-      "tags": ["arxiv:2403.08295", "license:gemma", "region:us"],
-      "usedStorage": 265964141287
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2403.08295",
+        "license:gemma",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 280987360512,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
     },
     "models": [
       {
-        "id": "gemma2:2b-gguf-q6-k",
-        "size": 2151393024
-      },
-      {
-        "id": "gemma2:2b-gguf-q3-km",
-        "size": 1461667584
-      },
-      {
-        "id": "gemma2:gguf",
-        "size": 1708582496
-      },
-      {
-        "id": "gemma2:9b-gguf-q4-km",
+        "id": "gemma2:9b",
         "size": 5761057888
       },
       {
-        "id": "gemma2:9b-gguf-q3-ks",
-        "size": 4337665120
-      },
-      {
-        "id": "gemma2:2b-gguf-q4-ks",
-        "size": 1638651648
-      },
-      {
-        "id": "gemma2:9b-gguf-q5-ks",
-        "size": 6483592288
-      },
-      {
-        "id": "gemma2:9b-gguf-q3-km",
-        "size": 4761781344
-      },
-      {
-        "id": "gemma2:9b-gguf-q3-kl",
-        "size": 5132452960
-      },
-      {
-        "id": "gemma2:27b-gguf-q5-ks",
-        "size": 18884206240
-      },
-      {
-        "id": "gemma2:9b-gguf-q2-k",
-        "size": 3805398112
-      },
-      {
-        "id": "gemma2:27b-gguf-q3-km",
-        "size": 13424647840
-      },
-      {
-        "id": "gemma2:2b-gguf",
-        "size": 1708582496
-      },
-      {
-        "id": "gemma2:onnx",
-        "size": 1708582496
-      },
-      {
-        "id": "gemma2:27b-gguf-q4-km",
+        "id": "gemma2:27b",
         "size": 16645381792
       },
       {
-        "id": "gemma2:9b-gguf-q5-km",
-        "size": 6647366752
-      },
-      {
-        "id": "gemma2:27b-gguf-q2-k",
-        "size": 10449575584
-      },
-      {
-        "id": "gemma2:9b-gguf-q4-ks",
-        "size": 5478925408
-      },
-      {
-        "id": "gemma2:27b-gguf-q3-ks",
-        "size": 12169060000
-      },
-      {
-        "id": "gemma2:2b-gguf-q2-k",
-        "size": 1229829888
-      },
-      {
-        "id": "gemma2:2b-gguf-q4-km",
+        "id": "gemma2:2b",
         "size": 1708582656
-      },
-      {
-        "id": "gemma2:27b-gguf-q4-ks",
-        "size": 15739264672
-      },
-      {
-        "id": "gemma2:9b-gguf-q8-0",
-        "size": 9827148896
-      },
-      {
-        "id": "gemma2:27b-gguf-q8-0",
-        "size": 28937387680
-      },
-      {
-        "id": "gemma2:9b-gguf-q6-k",
-        "size": 7589069920
-      },
-      {
-        "id": "gemma2:2b-gguf-q8-0",
-        "size": 2784495360
-      },
-      {
-        "id": "gemma2:27b-gguf-q5-km",
-        "size": 19408117408
-      },
-      {
-        "id": "gemma2:2b-gguf-q3-kl",
-        "size": 1550436096
-      },
-      {
-        "id": "gemma2:27b-gguf-q6-k",
-        "size": 22343524000
-      },
-      {
-        "id": "gemma2:2b-gguf-q3-ks",
-        "size": 1360660224
-      },
-      {
-        "id": "gemma2:27b-gguf-q3-kl",
-        "size": 14519361184
-      },
-      {
-        "id": "gemma2:2b-gguf-q5-ks",
-        "size": 1882543872
-      },
-      {
-        "id": "gemma2:2b-gguf-q5-km",
-        "size": 1923278592
       }
     ]
   },
   {
-    "author": "CohereForAI",
-    "id": "cortexso/aya",
+    "author": "agentica-org",
+    "id": "cortexso/deepscaler",
     "metadata": {
-      "_id": "66790e21db26e8589ccd3816",
+      "_id": "67aaa7a5a6e6b3d852e347b2",
       "author": "cortexso",
       "cardData": {
-        "license": "apache-2.0"
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
       },
-      "createdAt": "2024-06-24T06:11:45.000Z",
-      "description": "---\nlicense: cc-by-sa-4.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nAya Expanse is an open-weight research release of a model with highly advanced multilingual capabilities. It focuses on pairing a highly performant pre-trained Command family of models with the result of a year\u2019s dedicated research from Cohere For AI, including data arbitrage, multilingual preference training, safety tuning, and model merging. The result is a powerful multilingual large language model serving 23 languages.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [main](https://huggingface.co/cortexso/aya-expanse/tree/main) | `cortex run aya-expanse` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```\n    cortexso/aya-expanse\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```\n    cortex run aya-expanse\n    ```\n\n## Credits\n\n- **Author:** CohereAI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://cohere.com/c4ai-cc-by-nc-license)\n- **Papers:** [Aya Expanse Blog](https://cohere.com/blog/aya-expanse-connecting-our-world)",
+      "createdAt": "2025-02-11T01:28:05.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\nDeepscaler is an advanced AI model developed from the agentica-org's DeepScaleR-1.5B-Preview, designed to enhance the efficiency and scalability of various machine learning tasks. Its core purpose is to provide high-quality predictive analytics and data processing capabilities while optimizing resource usage. Deepscaler is particularly useful in scenarios such as natural language processing, computer vision, and more complex data interpretation tasks, making it suitable for applications in industries like finance, healthcare, and entertainment. Users can leverage its performance to achieve faster training times and improved accuracy in their models. Overall, Deepscaler's architecture allows it to deliver robust results with reduced computational overhead, making it an excellent choice for developers and organizations aiming to scale their AI solutions.\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepscaler-1.5b](https://huggingface.co/cortexso/deepscaler/tree/1.5b) | cortex run deepscaler:1.5b |\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepscaler\n    ```\n    \n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepscaler\n    ```\n## Credits\n- **Author:** agentica-org\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [LICENSE](https://huggingface.co/agentica-org/DeepScaleR-1.5B-Preview/blob/main/LICENSE)",
       "disabled": false,
-      "downloads": 25,
+      "downloads": 404,
       "gated": false,
-      "id": "cortexso/aya",
-      "lastModified": "2024-11-12T20:24:22.000Z",
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<｜begin▁of▁sentence｜>",
+        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+        "context_length": 131072,
+        "eos_token": "<｜end▁of▁sentence｜>",
+        "total": 1777088000
+      },
+      "id": "cortexso/deepscaler",
+      "lastModified": "2025-03-03T06:07:30.000Z",
+      "likes": 1,
+      "model-index": null,
+      "modelId": "cortexso/deepscaler",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "f2ac6bdbe311a9dbaf2bc4d77baa460b06b169e6",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "deepscaler-1.5b-preview-q2_k.gguf"
+        },
+        {
+          "rfilename": "deepscaler-1.5b-preview-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "deepscaler-1.5b-preview-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "deepscaler-1.5b-preview-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "deepscaler-1.5b-preview-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "deepscaler-1.5b-preview-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "deepscaler-1.5b-preview-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "deepscaler-1.5b-preview-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "deepscaler-1.5b-preview-q6_k.gguf"
+        },
+        {
+          "rfilename": "deepscaler-1.5b-preview-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 12728615584,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "deepscaler:1.5b",
+        "size": 1117321888
+      }
+    ]
+  },
+  {
+    "author": "Falcon LLM TII UAE",
+    "id": "cortexso/falcon3",
+    "metadata": {
+      "_id": "6761d4519d9bc9c3b6e25ad4",
+      "author": "cortexso",
+      "cardData": {
+        "license": "apache-2.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-12-17T19:43:13.000Z",
+      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n\n## Overview\n\nFalcon3-10B-Instruct is part of the Falcon3 family of Open Foundation Models, offering state-of-the-art performance in reasoning, language understanding, instruction following, code, and mathematics. With 10 billion parameters, Falcon3-10B-Instruct is optimized for high-quality instruction-following tasks and supports multilingual capabilities in English, French, Spanish, and Portuguese. It provides a long context length of up to 32K tokens, making it suitable for extended document understanding and processing.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Falcon3-10b](https://huggingface.co/cortexso/falcon3/tree/10b) | `cortex run falcon3:10b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/falcon3\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run falcon3\n    ```\n    \n## Credits\n\n- **Author:** Falcon3 Team\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://falconllm.tii.ae/falcon-terms-and-conditions.html)\n- **Papers:** [Paper](https://arxiv.org/abs/2311.16867)",
+      "disabled": false,
+      "downloads": 276,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{%- if tools %}\n{{- '<|system|>\\n' }}\n{%- if messages[0]['role'] == 'system' %}\n{{- messages[0]['content'] }}\n{%- set remaining_messages = messages[1:] %}\n{%- else %}\n{%- set remaining_messages = messages %}\n{%- endif %}\n{{- 'You are a Falcon assistant skilled in function calling. You are helpful, respectful, and concise.\\n\\n# Tools\\n\\nYou have access to the following functions. You MUST use them to answer questions when needed. For each function call, you MUST return a JSON object inside <tool_call></tool_call> tags.\\n\\n<tools>' + tools|tojson(indent=2) + '</tools>\\n\\n# Output Format\\n\\nYour response MUST follow this format when making function calls:\\n<tool_call>\\n[\\n  {\"name\": \"function_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}},\\n  {\"name\": \"another_function\", \"arguments\": {\"arg\": \"value\"}}\\n]\\n</tool_call>\\nIf no function calls are needed, respond normally without the tool_call tags.\\n' }}\n{%- for message in remaining_messages %}\n{%- if message['role'] == 'user' %}\n{{- '<|user|>\\n' + message['content'] + '\\n' }}\n{%- elif message['role'] == 'assistant' %}\n{%- if message.content %}\n{{- '<|assistant|>\\n' + message['content'] }}\n{%- endif %}\n{%- if message.tool_calls %}\n{{- '\\n<tool_call>\\n' }}\n{{- message.tool_calls|tojson(indent=2) }}\n{{- '\\n</tool_call>' }}\n{%- endif %}\n{{- eos_token + '\\n' }}\n{%- elif message['role'] == 'tool' %}\n{{- '<|assistant|>\\n<tool_response>\\n' + message['content'] + '\\n</tool_response>\\n' }}\n{%- endif %}\n{%- endfor %}\n{{- '<|assistant|>\\n' if add_generation_prompt }}\n{%- else %}\n{%- for message in messages %}\n{%- if message['role'] == 'system' %}\n{{- '<|system|>\\n' + message['content'] + '\\n' }}\n{%- elif message['role'] == 'user' %}\n{{- '<|user|>\\n' + message['content'] + '\\n' }}\n{%- elif message['role'] == 'assistant' %}\n{%- if not loop.last %}\n{{- '<|assistant|>\\n' + message['content'] + eos_token + '\\n' }}\n{%- else %}\n{{- '<|assistant|>\\n' + message['content'] + eos_token }}\n{%- endif %}\n{%- endif %}\n{%- if loop.last and add_generation_prompt %}\n{{- '<|assistant|>\\n' }}\n{%- endif %}\n{%- endfor %}\n{%- endif %}",
+        "context_length": 32768,
+        "eos_token": "<|endoftext|>",
+        "total": 10305653760
+      },
+      "id": "cortexso/falcon3",
+      "lastModified": "2025-03-03T03:54:15.000Z",
+      "likes": 1,
+      "model-index": null,
+      "modelId": "cortexso/falcon3",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "60030375504feacf3ba4205e8b9809e3dffc2ef7",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "falcon3-10b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "falcon3-10b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "falcon3-10b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "falcon3-10b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "falcon3-10b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "falcon3-10b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "falcon3-10b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "falcon3-10b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "falcon3-10b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "falcon3-10b-instruct-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2311.16867",
+        "license:apache-2.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 65157537088,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "falcon3:10b",
+        "size": 6287521312
+      }
+    ]
+  },
+  {
+    "author": "Qwen",
+    "id": "cortexso/qwen2",
+    "metadata": {
+      "_id": "667917d974da9f6bfc120671",
+      "author": "cortexso",
+      "cardData": {
+        "license": "other",
+        "license_link": "https://huggingface.co/Qwen/Qwen2-72B-Instruct/blob/main/LICENSE",
+        "license_name": "tongyi-qianwen",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-06-24T06:53:13.000Z",
+      "description": "---\nlicense: other\nlicense_name: tongyi-qianwen\nlicense_link: https://huggingface.co/Qwen/Qwen2-72B-Instruct/blob/main/LICENSE\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nQwen2 is the new series of Qwen large language models. For Qwen2, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters, including a Mixture-of-Experts model. This repo contains the instruction-tuned 72B Qwen2 model.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Qwen2-7b](https://huggingface.co/cortexso/qwen2/tree/7b) | `cortex run qwen2:7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/qwen2\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run qwen2\n    ```\n    \n## Credits\n\n- **Author:** Qwen\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/Qwen/Qwen2-72B-Instruct/blob/main/LICENSE)",
+      "disabled": false,
+      "downloads": 130,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+        "context_length": 32768,
+        "eos_token": "<|im_end|>",
+        "total": 7615616512
+      },
+      "id": "cortexso/qwen2",
+      "lastModified": "2025-03-02T15:15:09.000Z",
       "likes": 0,
       "model-index": null,
-      "modelId": "cortexso/aya",
+      "modelId": "cortexso/qwen2",
+      "pipeline_tag": "text-generation",
       "private": false,
-      "sha": "cae2291fec1dc73739fb8189f9165d23ebe398b8",
+      "sha": "e2c6376ad87c7b2da92bc2a2b63ba168d85b1c6d",
       "siblings": [
         {
           "rfilename": ".gitattributes"
@@ -296,20 +2762,987 @@
         },
         {
           "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "qwen2-7b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "qwen2-7b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "qwen2-7b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen2-7b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen2-7b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen2-7b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen2-7b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen2-7b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen2-7b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "qwen2-7b-instruct-q8_0.gguf"
         }
       ],
       "spaces": [],
-      "tags": ["license:apache-2.0", "region:us"],
-      "usedStorage": 21527051168
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:other",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 53341783520,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
     },
     "models": [
       {
-        "id": "aya:12.9b-gguf",
-        "size": 21527051168
+        "id": "qwen2:7b",
+        "size": 4683071456
+      }
+    ]
+  },
+  {
+    "author": "Nous Research",
+    "id": "cortexso/hermes3",
+    "metadata": {
+      "_id": "675a4743cb0f75e1a3a19ae5",
+      "author": "cortexso",
+      "cardData": {
+        "license": "llama3",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-12-12T02:15:31.000Z",
+      "description": "---\nlicense: llama3\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**Nous Research** developed and released the [Hermes 3](https://huggingface.co/NousResearch/Hermes-3-Llama-3.2-3B), a state-of-the-art instruction-tuned language model built on Llama-3.2-3B. This 3-billion parameter model is a fine-tuned version of Llama-3.2 and represents a leap forward in reasoning, multi-turn conversation, and structured outputs. It incorporates advanced role-playing capabilities, reliable function calling, and improved coherence over long contexts, making it a versatile assistant for various applications.\n\nHermes 3 was trained with high-quality data, leveraging fine-tuning techniques on H100 GPUs via LambdaLabs GPU Cloud. The model excels in both general-purpose and specialized tasks, including code generation, reasoning, and advanced conversational abilities. With support for ChatML prompt formatting, Hermes 3 ensures compatibility with OpenAI endpoints and facilitates structured, steerable interactions for end-users.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Hermes3-3b](https://huggingface.co/cortexso/hermes3/tree/main) | `cortex run hermes3:3b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/hermes3\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run hermes3\n    ```\n\n## Credits\n\n- **Author:** Nous Research\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/meta-llama/Meta-Llama-3-8B/blob/main/LICENSE)\n- **Papers:** [Hermes 3 Technical Report](https://arxiv.org/pdf/2408.11857)",
+      "disabled": false,
+      "downloads": 421,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<|begin_of_text|>",
+        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+        "context_length": 131072,
+        "eos_token": "<|im_end|>",
+        "total": 3212749888
+      },
+      "id": "cortexso/hermes3",
+      "lastModified": "2025-03-03T02:36:41.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/hermes3",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "b987bf2aa863d1c3590e242aaf5b81a5dc3ea8f3",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "hermes-3-llama-3.2-3b-q2_k.gguf"
+        },
+        {
+          "rfilename": "hermes-3-llama-3.2-3b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "hermes-3-llama-3.2-3b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "hermes-3-llama-3.2-3b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "hermes-3-llama-3.2-3b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "hermes-3-llama-3.2-3b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "hermes-3-llama-3.2-3b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "hermes-3-llama-3.2-3b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "hermes-3-llama-3.2-3b-q6_k.gguf"
+        },
+        {
+          "rfilename": "hermes-3-llama-3.2-3b-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2408.11857",
+        "license:llama3",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 23033625536,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "hermes3:3b",
+        "size": 2019373888
+      }
+    ]
+  },
+  {
+    "author": "Qwen",
+    "id": "cortexso/qwen2.5-coder",
+    "metadata": {
+      "_id": "6732691d254c0b2144f11764",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-11-11T20:29:17.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**Qwen Labs** developed and released the [Qwen2.5-Coder](https://huggingface.co/Qwen) model, a state-of-the-art language model tailored for code generation, understanding, and completion tasks. Featuring a 2.5B parameter dense Transformer architecture, Qwen2.5-Coder is designed to assist developers and researchers by generating high-quality code snippets, providing algorithm explanations, and completing coding prompts with accuracy. The model was trained on a diverse blend of programming languages and frameworks using carefully filtered code datasets to ensure precision and relevance. It leverages advanced fine-tuning techniques and rigorous safety measures to optimize instruction adherence and deliver reliable, contextually aware outputs. Released in November 2024, Qwen2.5-Coder offers an effective tool for software development, academic research, and programming education.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Qwen2.5-coder-14b](https://huggingface.co/cortexso/qwen2.5-coder/tree/14b) | `cortex run qwen2.5-coder:14b` |\n| 1 | [Qwen2.5-coder-32b](https://huggingface.co/cortexso/qwen2.5-coder/tree/32b) | `cortex run qwen2.5-coder:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/qwen2.5-coder\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run qwen2.5-coder\n    ```\n\n## Credits\n\n- **Author:** Qwen Labs\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct/blob/main/LICENSE)\n- **Papers:** [Qwen2.5-Coder Technical Report](https://arxiv.org/abs/2409.12186)",
+      "disabled": false,
+      "downloads": 1369,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+        "context_length": 32768,
+        "eos_token": "<|im_end|>",
+        "total": 14770033664
+      },
+      "id": "cortexso/qwen2.5-coder",
+      "lastModified": "2025-03-03T04:26:33.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/qwen2.5-coder",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "b472c129cc68732d81e50ce48e621fe1861e8d1c",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "qwen2.5-coder-14b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-14b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-14b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-14b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-14b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-14b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-14b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-14b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-14b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-14b-instruct-q8_0.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-32b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-32b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-32b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-32b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-32b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-32b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-32b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-32b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-32b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "qwen2.5-coder-32b-instruct-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2409.12186",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 328827521152,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "qwen2.5-coder:32b",
+        "size": 19851336256
       },
       {
-        "id": "aya:gguf",
-        "size": 21527051168
+        "id": "qwen2.5-coder:14b",
+        "size": 8988110656
+      }
+    ]
+  },
+  {
+    "author": "Microsoft",
+    "id": "cortexso/phi-3.5",
+    "metadata": {
+      "_id": "67211d1b527f6fcd90b9dca3",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-10-29T17:36:27.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n**Microsoft** developed and released the [Phi-3.5](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) model, a state-of-the-art large language model built upon the Phi-3 architecture. With its focus on high-quality, reasoning-dense data, this model represents a significant advancement in instruction-tuned language models. Phi-3.5 has been fine-tuned through supervised learning, proximal policy optimization (PPO), and direct preference optimization (DPO) to ensure precise instruction following and robust safety measures. Supporting a 128K token context length, the model demonstrates exceptional performance in tasks requiring extended context understanding and complex reasoning. The model's training data consists of synthetic datasets and carefully filtered publicly available web content, inheriting the high-quality foundation established in the Phi-3 series.\n\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Phi-3.5-3b](https://huggingface.co/cortexso/phi-3.5/tree/3b) | `cortex run phi-3.5:3b` |\n\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/phi-3.5\n    ```\n\n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run phi-3.5\n    ```\n\n## Credits\n- **Author:** Microsoft\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/mit/)\n- **Papers:** [Phi-3.5 Paper](https://arxiv.org/abs/2404.14219)",
+      "disabled": false,
+      "downloads": 299,
+      "gated": false,
+      "gguf": {
+        "architecture": "phi3",
+        "bos_token": "<s>",
+        "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
+        "context_length": 131072,
+        "eos_token": "<|endoftext|>",
+        "total": 3821079648
+      },
+      "id": "cortexso/phi-3.5",
+      "lastModified": "2025-03-03T05:42:47.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/phi-3.5",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "7fd139ae9bdff00feae40ad3e4d7ce6dc0c48a91",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "phi-3.5-mini-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "phi-3.5-mini-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "phi-3.5-mini-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-3.5-mini-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-3.5-mini-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-3.5-mini-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-3.5-mini-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "phi-3.5-mini-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "phi-3.5-mini-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "phi-3.5-mini-instruct-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2404.14219",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 26770128384,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "phi-3.5:3b",
+        "size": 2393232384
+      }
+    ]
+  },
+  {
+    "author": "meta-llama",
+    "id": "cortexso/llama3.3",
+    "metadata": {
+      "_id": "67568c9b6ac1ee73523d7623",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-12-09T06:22:19.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**Meta** developed and released the [Llama3.3](https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct) model, a state-of-the-art multilingual large language model designed for instruction-tuned generative tasks. With 70 billion parameters, this model is optimized for multilingual dialogue use cases, providing high-quality text input and output. Llama3.3 has been fine-tuned through supervised learning and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. It sets a new standard in performance, outperforming many open-source and closed-source chat models on common industry benchmarks. The model’s capabilities make it a powerful tool for applications requiring conversational AI, multilingual support, and instruction adherence.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Llama3.3-70b](https://huggingface.co/cortexso/llama3.3/tree/70b) | `cortex run llama3.3:70b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/llama3.3\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run llama3.3\n    ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://llama.meta.com/llama3/license/)\n- **Papers:** [Llama-3 Blog](https://llama.meta.com/llama3/)",
+      "disabled": false,
+      "downloads": 964,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<|begin_of_text|>",
+        "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
+        "context_length": 131072,
+        "eos_token": "<|eot_id|>",
+        "total": 70553706560
+      },
+      "id": "cortexso/llama3.3",
+      "lastModified": "2025-03-03T03:59:38.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/llama3.3",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "9cc0507ae02f03cf59c630c1ffa5d369441e27eb",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "llama-3.3-70b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 42520398432,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "llama3.3:70b",
+        "size": 42520398432
+      }
+    ]
+  },
+  {
+    "author": "inftech.ai",
+    "id": "cortexso/opencoder",
+    "metadata": {
+      "_id": "672fb2f43db04d9bf3f4c393",
+      "author": "cortexso",
+      "cardData": {
+        "license": "other",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-11-09T19:07:32.000Z",
+      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nOpenCoder is an open and reproducible code LLM family, featuring 1.5B and 8B base and chat models that support both English and Chinese languages. Built from scratch, OpenCoder is pretrained on 2.5 trillion tokens, composed of 90% raw code and 10% code-related web data. It undergoes supervised fine-tuning (SFT) with over 4.5 million high-quality examples, achieving performance on par with top-tier code LLMs\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Opencoder-8b](https://huggingface.co/cortexso/opencoder/tree/8b) | `cortex run opencoder:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/opencoder\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run opencoder\n    ```\n    \n## Credits\n\n- **Author:** inftech.ai\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/infly/OpenCoder-8B-Instruct/blob/main/LICENSE)\n- **Papers:** [Paper](https://arxiv.org/abs/2411.04905)",
+      "disabled": false,
+      "downloads": 650,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<|im_start|>",
+        "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are OpenCoder, created by OpenCoder Team.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+        "context_length": 8192,
+        "eos_token": "<|im_end|>",
+        "total": 7771262976
+      },
+      "id": "cortexso/opencoder",
+      "lastModified": "2025-03-03T02:25:59.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/opencoder",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "2b98756c8b01811470941deb8a0259de3dd4018c",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "opencoder-8b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "opencoder-8b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "opencoder-8b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "opencoder-8b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "opencoder-8b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "opencoder-8b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "opencoder-8b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "opencoder-8b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "opencoder-8b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "opencoder-8b-instruct-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2411.04905",
+        "license:other",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 54076349664,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "opencoder:8b",
+        "size": 4736059168
+      }
+    ]
+  },
+  {
+    "author": "Google",
+    "id": "cortexso/gemma",
+    "metadata": {
+      "_id": "6667b642f760460127737cc6",
+      "author": "cortexso",
+      "cardData": {
+        "license": "gemma",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-06-11T02:28:18.000Z",
+      "description": "---\nlicense: gemma\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nThe [Gemma](https://huggingface.co/google/gemma-7b), state-of-the-art open model trained with the Gemma datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Gemma family with the 4B, 7B version in two variants 8K and 128K which is the context length (in tokens) that it can support.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Gemma-7b](https://huggingface.co/cortexso/gemma/tree/7b) | `cortex run gemma:7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/gemma\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run gemma\n    ```\n    \n## Credits\n\n- **Author:** Go‌ogle\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://ai.google.dev/gemma/terms)\n- **Papers:** [Gemma Technical Report](https://arxiv.org/abs/2403.08295)",
+      "disabled": false,
+      "downloads": 280,
+      "gated": false,
+      "gguf": {
+        "architecture": "gemma",
+        "bos_token": "<bos>",
+        "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
+        "context_length": 8192,
+        "eos_token": "<eos>",
+        "total": 8537680896
+      },
+      "id": "cortexso/gemma",
+      "lastModified": "2025-03-03T06:14:39.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/gemma",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "801b78a606397281d5953e5e8f2a64b6158e2db2",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "gemma-7b-it-q2_k.gguf"
+        },
+        {
+          "rfilename": "gemma-7b-it-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "gemma-7b-it-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-7b-it-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-7b-it-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-7b-it-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-7b-it-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "gemma-7b-it-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "gemma-7b-it-q6_k.gguf"
+        },
+        {
+          "rfilename": "gemma-7b-it-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2403.08295",
+        "license:gemma",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 60258935328,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "gemma:7b",
+        "size": 5329759680
+      }
+    ]
+  },
+  {
+    "author": "MistralAI",
+    "id": "cortexso/mistral-nemo",
+    "metadata": {
+      "_id": "66f4e292515759ca6d5287bd",
+      "author": "cortexso",
+      "cardData": {
+        "license": "apache-2.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-09-26T04:26:58.000Z",
+      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nMistralai developed and released the [Mistral-Nemo](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407) family of large language models (LLMs).\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Mistral-nemo-12b](https://huggingface.co/cortexso/mistral-nemo/tree/12b) | `cortex run mistral-nemo:12b` ||\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/mistral-nemo\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run mistral-nemo\n    ```\n\n## Credits\n\n- **Author:** MistralAI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Apache 2 License](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Mistral Nemo Blog](https://mistral.ai/news/mistral-nemo/)",
+      "disabled": false,
+      "downloads": 546,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<s>",
+        "chat_template": "{%- if messages[0][\"role\"] == \"system\" %}\n    {%- set system_message = messages[0][\"content\"] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n    {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n        {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n            {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n        {%- endif %}\n        {%- set ns.index = ns.index + 1 %}\n    {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if message[\"role\"] == \"user\" %}\n        {%- if tools is not none and (message == user_messages[-1]) %}\n            {{- \"[AVAILABLE_TOOLS][\" }}\n            {%- for tool in tools %}\n                {%- set tool = tool.function %}\n                {{- '{\"type\": \"function\", \"function\": {' }}\n                {%- for key, val in tool.items() if key != \"return\" %}\n                    {%- if val is string %}\n                        {{- '\"' + key + '\": \"' + val + '\"' }}\n                    {%- else %}\n                        {{- '\"' + key + '\": ' + val|tojson }}\n                    {%- endif %}\n                    {%- if not loop.last %}\n                        {{- \", \" }}\n                    {%- endif %}\n                {%- endfor %}\n                {{- \"}}\" }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- else %}\n                    {{- \"]\" }}\n                {%- endif %}\n            {%- endfor %}\n            {{- \"[/AVAILABLE_TOOLS]\" }}\n            {%- endif %}\n        {%- if loop.last and system_message is defined %}\n            {{- \"[INST]\" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n        {%- else %}\n            {{- \"[INST]\" + message[\"content\"] + \"[/INST]\" }}\n        {%- endif %}\n    {%- elif (message.tool_calls is defined and message.tool_calls is not none) %}\n        {{- \"[TOOL_CALLS][\" }}\n        {%- for tool_call in message.tool_calls %}\n            {%- set out = tool_call.function|tojson %}\n            {{- out[:-1] }}\n            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n                {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n            {%- endif %}\n            {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n            {%- if not loop.last %}\n                {{- \", \" }}\n            {%- else %}\n                {{- \"]\" + eos_token }}\n            {%- endif %}\n        {%- endfor %}\n    {%- elif message[\"role\"] == \"assistant\" %}\n        {{- message[\"content\"] + eos_token}}\n    {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n        {%- if message.content is defined and message.content.content is defined %}\n            {%- set content = message.content.content %}\n        {%- else %}\n            {%- set content = message.content %}\n        {%- endif %}\n        {{- '[TOOL_RESULTS]{\"content\": ' + content|string + \", \" }}\n        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n            {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n        {%- endif %}\n        {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n    {%- else %}\n        {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n    {%- endif %}\n{%- endfor %}\n",
+        "context_length": 131072,
+        "eos_token": "</s>",
+        "total": 12247782400
+      },
+      "id": "cortexso/mistral-nemo",
+      "lastModified": "2025-03-03T02:42:16.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/mistral-nemo",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "487a202e44ea08566ab73ed16b5f7f685d12cf6b",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "mistral-nemo-instruct-2407-q2_k.gguf"
+        },
+        {
+          "rfilename": "mistral-nemo-instruct-2407-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "mistral-nemo-instruct-2407-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "mistral-nemo-instruct-2407-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "mistral-nemo-instruct-2407-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "mistral-nemo-instruct-2407-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "mistral-nemo-instruct-2407-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "mistral-nemo-instruct-2407-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "mistral-nemo-instruct-2407-q6_k.gguf"
+        },
+        {
+          "rfilename": "mistral-nemo-instruct-2407-q8_0.gguf"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:apache-2.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 85369454144,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "mistral-nemo:12b",
+        "size": 7477207744
+      }
+    ]
+  },
+  {
+    "author": "meta-llama",
+    "id": "cortexso/llama3.2",
+    "metadata": {
+      "_id": "66f63309ba963b1db95deaa4",
+      "author": "cortexso",
+      "cardData": {
+        "license": "llama3.2",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp", "featured"]
+      },
+      "createdAt": "2024-09-27T04:22:33.000Z",
+      "description": "---\nlicense: llama3.2\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\nMeta developed and released the [Meta Llama 3.2](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [LLama3.2-1b](https://huggingface.co/cortexso/llama3.2/tree/1b) | `cortex run llama3.2:1b` |\n| 2 | [LLama3.2-3b](https://huggingface.co/cortexso/llama3.2/tree/3b) | `cortex run llama3.2:3b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/llama3.2\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run llama3.2\n    ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct/blob/main/LICENSE.txt)\n- **Papers:** [Llama-3.2 Blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)",
+      "disabled": false,
+      "downloads": 11227,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<|begin_of_text|>",
+        "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- if strftime_now is defined %}\n        {%- set date_string = strftime_now(\"%d %b %Y\") %}\n    {%- else %}\n        {%- set date_string = \"26 Jul 2024\" %}\n    {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n        {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n        {{- '\"parameters\": ' }}\n        {{- tool_call.arguments | tojson }}\n        {{- \"}\" }}\n        {{- \"<|eot_id|>\" }}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
+        "context_length": 131072,
+        "eos_token": "<|eot_id|>",
+        "total": 1235814432
+      },
+      "id": "cortexso/llama3.2",
+      "lastModified": "2025-03-03T06:22:08.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/llama3.2",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "73313225fbeff0cebf5ccf48121cba6ca1a80e7d",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "llama-3.2-1b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-1b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-1b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-1b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-1b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-1b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-1b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-1b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-1b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-1b-instruct-q8_0.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-3b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-3b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-3b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-3b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-3b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-3b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-3b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-3b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-3b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "llama-3.2-3b-instruct-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "featured",
+        "text-generation",
+        "license:llama3.2",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 31409886432,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "llama3.2:3b",
+        "size": 2019377312
+      },
+      {
+        "id": "llama3.2:1b",
+        "size": 911503104
       }
     ]
   },
@@ -327,7 +3760,7 @@
       "createdAt": "2024-10-26T15:40:05.000Z",
       "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nQwen2.5 by Qwen is a family of model include various specialized models for coding and mathematics available in multiple sizes from 0.5B to 72B parameters\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Qwen-2.5-0.5b](https://huggingface.co/cortexso/qwen2.5/tree/0.5b) | `cortex run qwen2.5:0.5b` |\n| 2 | [Qwen-2.5-1.5b](https://huggingface.co/cortexso/qwen2.5/tree/1.5b) | `cortex run qwen2.5:1.5b` |\n| 3 | [Qwen-2.5-3b](https://huggingface.co/cortexso/qwen2.5/tree/3b) | `cortex run qwen2.5:3b` |\n| 4 | [Qwen-2.5-7b](https://huggingface.co/cortexso/qwen2.5/tree/7b) | `cortex run qwen2.5:7b` |\n| 5 | [Qwen-2.5-14b](https://huggingface.co/cortexso/qwen2.5/tree/14b) | `cortex run qwen2.5:14b` |\n| 6 | [Qwen-2.5-32b](https://huggingface.co/cortexso/qwen2.5/tree/32b) | `cortex run qwen2.5:32b` |\n| 7 | [Qwen-2.5-72b](https://huggingface.co/cortexso/qwen2.5/tree/72b) | `cortex run qwen2.5:72b` |\n| 8 | [Qwen-2.5-coder-1.5b](https://huggingface.co/cortexso/qwen2.5/tree/coder-1.5b) | `cortex run qwen2.5:coder-1.5b` |\n| 9 | [Qwen-2.5-coder-7b](https://huggingface.co/cortexso/qwen2.5/tree/coder-7b) | `cortex run qwen2.5:coder-7b` |\n| 10 | [Qwen-2.5-math-1.5b](https://huggingface.co/cortexso/qwen2.5/tree/math-1.5b) | `cortex run qwen2.5:math-1.5b` |\n| 11 | [Qwen-2.5-math-7b](https://huggingface.co/cortexso/qwen2.5/tree/math-7b) | `cortex run qwen2.5:math-7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```\n    cortexso/qwen2.5\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```\n    cortex run qwen2.5\n    ```\n\n## Credits\n\n- **Author:** Qwen\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License Apache 2.0](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Qwen2.5 Blog](https://qwenlm.github.io/blog/qwen2.5/)",
       "disabled": false,
-      "downloads": 2482,
+      "downloads": 3608,
       "gated": false,
       "gguf": {
         "architecture": "qwen2",
@@ -338,13 +3771,13 @@
         "total": 494032768
       },
       "id": "cortexso/qwen2.5",
-      "lastModified": "2025-02-25T07:36:34.000Z",
-      "likes": 0,
+      "lastModified": "2025-03-03T04:07:15.000Z",
+      "likes": 1,
       "model-index": null,
       "modelId": "cortexso/qwen2.5",
       "pipeline_tag": "text-generation",
       "private": false,
-      "sha": "7b8b2c31e393f5cf085fe6e535fa5d6ee1cb1c5c",
+      "sha": "d801e60d205491ab449425f3779b13bedbbe463d",
       "siblings": [
         {
           "rfilename": ".gitattributes"
@@ -672,19 +4105,19 @@
         "region:us",
         "conversational"
       ],
-      "usedStorage": 1466939625856,
+      "usedStorage": 596251612960,
       "widgetData": [
         {
           "text": "Hi, what can you help me with?"
         },
         {
-          "text": "Hey, let's have a conversation!"
+          "text": "What is 84 * 3 / 2?"
         },
         {
-          "text": "Hello there!"
+          "text": "Tell me an interesting fact about the universe!"
         },
         {
-          "text": "Hey my name is Clara! How are you?"
+          "text": "Explain quantum computing in simple terms."
         }
       ]
     },
@@ -698,13 +4131,21 @@
         "size": 986048416
       },
       {
-        "id": "qwen2.5:7b",
-        "size": 4683073856
+        "id": "qwen2.5:3b",
+        "size": 1929902912
+      },
+      {
+        "id": "qwen2.5:14b",
+        "size": 8988110592
       },
       {
         "id": "qwen2.5:0.5b",
         "size": 397807808
       },
+      {
+        "id": "qwen2.5:72b",
+        "size": 47415715104
+      },
       {
         "id": "qwen2.5:coder-1.5b",
         "size": 986048480
@@ -713,64 +4154,52 @@
         "id": "qwen2.5:32b",
         "size": 19851336192
       },
-      {
-        "id": "qwen2.5:3b",
-        "size": 1929902912
-      },
-      {
-        "id": "qwen2.5:14b",
-        "size": 8988110592
-      },
       {
         "id": "qwen2.5:math-7b",
         "size": 4683073856
       },
       {
-        "id": "qwen2.5:72b",
-        "size": 47415715104
+        "id": "qwen2.5:7b",
+        "size": 4683073856
       },
       {
         "id": "qwen2.5:coder-7b",
         "size": 4683073920
-      },
-      {
-        "id": "qwen2.5:main",
-        "size": 8098525504
       }
     ]
   },
   {
-    "author": "meta-llama",
-    "id": "cortexso/llama3.2",
+    "author": "MistralAI",
+    "id": "cortexso/codestral",
     "metadata": {
-      "_id": "66f63309ba963b1db95deaa4",
+      "_id": "66724fb044ee478111905260",
       "author": "cortexso",
       "cardData": {
-        "license": "llama3.2",
+        "license": "other",
         "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp", "featured"]
+        "tags": ["cortex.cpp"]
       },
-      "createdAt": "2024-09-27T04:22:33.000Z",
-      "description": "---\nlicense: llama3.2\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\nMeta developed and released the [Meta Llama 3.2](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 2 | [gguf](https://huggingface.co/cortexso/llama3.2/tree/main) | `cortex run llama3.2` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/llama3.2\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run llama3.2\n    ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct/blob/main/LICENSE.txt)\n- **Papers:** [Llama-3.2 Blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)",
+      "createdAt": "2024-06-19T03:25:36.000Z",
+      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nCodestral-22B-v0.1 is trained on a diverse dataset of 80+ programming languages, including the most popular ones, such as Python, Java, C, C++, JavaScript, and Bash\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Codestral-22b](https://huggingface.co/cortexso/codestral/tree/22b) | `cortex run codestral:22b` |\n\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/codestral\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run codestral\n    ```\n    \n## Credits\n\n- **Author:** Mistral AI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://mistral.ai/licenses/MNPL-0.1.md)\n- **Papers:** [Codestral Blog](https://mistral.ai/news/codestral/)",
       "disabled": false,
-      "downloads": 761,
+      "downloads": 517,
       "gated": false,
       "gguf": {
         "architecture": "llama",
-        "bos_token": "<|begin_of_text|>",
-        "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- if strftime_now is defined %}\n        {%- set date_string = strftime_now(\"%d %b %Y\") %}\n    {%- else %}\n        {%- set date_string = \"26 Jul 2024\" %}\n    {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n        {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n        {{- '\"parameters\": ' }}\n        {{- tool_call.arguments | tojson }}\n        {{- \"}\" }}\n        {{- \"<|eot_id|>\" }}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
-        "context_length": 131072,
-        "eos_token": "<|eot_id|>",
-        "total": 1235814432
+        "bos_token": "<s>",
+        "chat_template": "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n    {%- endif %}\n    {%- if message['role'] == 'user' %}\n        {%- if loop.last and system_message is defined %}\n            {{- '[INST] ' + system_message + '\\n\\n' + message['content'] + '[/INST]' }}\n        {%- else %}\n            {{- '[INST] ' + message['content'] + '[/INST]' }}\n        {%- endif %}\n    {%- elif message['role'] == 'assistant' %}\n        {{- ' ' + message['content'] + eos_token}}\n    {%- else %}\n        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n    {%- endif %}\n{%- endfor %}\n",
+        "context_length": 32768,
+        "eos_token": "</s>",
+        "total": 22247282688
       },
-      "id": "cortexso/llama3.2",
-      "lastModified": "2025-02-24T10:25:50.000Z",
+      "id": "cortexso/codestral",
+      "lastModified": "2025-03-02T15:11:11.000Z",
       "likes": 0,
       "model-index": null,
-      "modelId": "cortexso/llama3.2",
+      "modelId": "cortexso/codestral",
       "pipeline_tag": "text-generation",
       "private": false,
-      "sha": "5aabb7db00af6183d866ff69260db98b55760359",
+      "sha": "6b522a6f0ce9c94a2f317c3802180aca4f526a30",
       "siblings": [
         {
           "rfilename": ".gitattributes"
@@ -779,64 +4208,34 @@
           "rfilename": "README.md"
         },
         {
-          "rfilename": "llama-3.2-1b-instruct-q2_k.gguf"
+          "rfilename": "codestral-22b-v0.1-q2_k.gguf"
         },
         {
-          "rfilename": "llama-3.2-1b-instruct-q3_k_l.gguf"
+          "rfilename": "codestral-22b-v0.1-q3_k_l.gguf"
         },
         {
-          "rfilename": "llama-3.2-1b-instruct-q3_k_m.gguf"
+          "rfilename": "codestral-22b-v0.1-q3_k_m.gguf"
         },
         {
-          "rfilename": "llama-3.2-1b-instruct-q3_k_s.gguf"
+          "rfilename": "codestral-22b-v0.1-q3_k_s.gguf"
         },
         {
-          "rfilename": "llama-3.2-1b-instruct-q4_k_m.gguf"
+          "rfilename": "codestral-22b-v0.1-q4_k_m.gguf"
         },
         {
-          "rfilename": "llama-3.2-1b-instruct-q4_k_s.gguf"
+          "rfilename": "codestral-22b-v0.1-q4_k_s.gguf"
         },
         {
-          "rfilename": "llama-3.2-1b-instruct-q5_k_m.gguf"
+          "rfilename": "codestral-22b-v0.1-q5_k_m.gguf"
         },
         {
-          "rfilename": "llama-3.2-1b-instruct-q5_k_s.gguf"
+          "rfilename": "codestral-22b-v0.1-q5_k_s.gguf"
         },
         {
-          "rfilename": "llama-3.2-1b-instruct-q6_k.gguf"
+          "rfilename": "codestral-22b-v0.1-q6_k.gguf"
         },
         {
-          "rfilename": "llama-3.2-1b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q8_0.gguf"
+          "rfilename": "codestral-22b-v0.1-q8_0.gguf"
         },
         {
           "rfilename": "metadata.yml"
@@ -849,76 +4248,67 @@
       "tags": [
         "gguf",
         "cortex.cpp",
-        "featured",
         "text-generation",
-        "license:llama3.2",
+        "license:other",
         "endpoints_compatible",
         "region:us",
         "conversational"
       ],
-      "usedStorage": 50404795008,
+      "usedStorage": 166025350400,
       "widgetData": [
         {
           "text": "Hi, what can you help me with?"
         },
         {
-          "text": "Hey, let's have a conversation!"
+          "text": "What is 84 * 3 / 2?"
         },
         {
-          "text": "Hello there!"
+          "text": "Tell me an interesting fact about the universe!"
         },
         {
-          "text": "Hey my name is Clara! How are you?"
+          "text": "Explain quantum computing in simple terms."
         }
       ]
     },
     "models": [
       {
-        "id": "llama3.2:1b",
-        "size": 911503104
-      },
-      {
-        "id": "llama3.2:main",
-        "size": 3421898912
-      },
-      {
-        "id": "llama3.2:3b",
-        "size": 2019377312
+        "id": "codestral:22b",
+        "size": 13341239008
       }
     ]
   },
   {
-    "author": "DeepSeek-AI",
-    "id": "cortexso/deepseek-r1",
+    "author": "Nous Research",
+    "id": "cortexso/openhermes-2.5",
     "metadata": {
-      "_id": "67a0bcf13ac2dd6adf0bdfcf",
+      "_id": "6669ee8d6993100c6f8befa7",
       "author": "cortexso",
       "cardData": {
-        "license": "mit",
+        "license": "apache-2.0",
         "pipeline_tag": "text-generation",
-        "tags": ["cortexp.cpp", "featured"]
+        "tags": ["cortex.cpp"]
       },
-      "createdAt": "2025-02-03T12:56:17.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortexp.cpp\n- featured\n---\n\n## Overview\n\n**DeepSeek** developed and released the **DeepSeek-R1** series, featuring multiple model sizes fine-tuned for high-performance text generation. These models are optimized for dialogue, reasoning, and information-seeking tasks, providing a balance of efficiency and accuracy while maintaining a smaller footprint compared to their original counterparts.\n\nThe DeepSeek-R1 models include distilled and full-scale variants of both **Qwen** and **Llama** architectures, catering to various applications such as customer support, conversational AI, research, and enterprise automation.\n\n## Variants\n\n### DeepSeek-R1\n\n| No | Variant                                                                                        | Branch | Cortex CLI command                          |\n| -- | ---------------------------------------------------------------------------------------------- | ------- | ------------------------------------------ |\n| 1  | [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/cortexso/deepseek-r1/tree/1.5b)         | 1.5b    | `cortex run deepseek-r1:1.5b` |\n| 2  | [DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/cortexso/deepseek-r1/tree/7b)             | 7b      | `cortex run deepseek-r1:7b`   |\n| 3  | [DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/cortexso/deepseek-r1/tree/8b)           | 8b      | `cortex run deepseek-r1:8b`  |\n| 4  | [DeepSeek-R1-Distill-Qwen-14B](https://huggingface.co/cortexso/deepseek-r1/tree/14b)           | 14b     | `cortex run deepseek-r1:14b`  |\n| 5  | [DeepSeek-R1-Distill-Qwen-32B](https://huggingface.co/cortexso/deepseek-r1/tree/32b)           | 32b     | `cortex run deepseek-r1:32b`  |\n| 6  | [DeepSeek-R1-Distill-Llama-70B](https://huggingface.co/cortexso/deepseek-r1/tree/70b)         | 70b     | `cortex run deepseek-r1:70b` |\n\nEach branch contains a default quantized version:\n- **Qwen-1.5B:** q4-km\n- **Qwen-7B:** q4-km\n- **Llama-8B:** q4-km\n- **Qwen-14B:** q4-km\n- **Qwen-32B:** q4-km\n- **Llama-70B:** q4-km\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n   ```text\n   cortexso/deepseek-r1\n   ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n   ```bash\n   cortex run deepseek-r1\n   ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1#license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)",
+      "createdAt": "2024-06-12T18:53:01.000Z",
+      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nOpenHermes 2.5 Mistral 7B is a state of the art Mistral Fine-tune, a continuation of OpenHermes 2 model, which trained on additional code datasets.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [OpenHermes 2.5-7b](https://huggingface.co/cortexso/openhermes-2.5/tree/7b) | `cortex run openhermes-2.5:7b` |\n\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/openhermes-2.5\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run openhermes-2.5\n    ```\n    \n## Credits\n\n- **Author:** Nous Research\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md)\n- **Papers:** [Openhermes 2.5](https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B)",
       "disabled": false,
-      "downloads": 999,
+      "downloads": 230,
       "gated": false,
       "gguf": {
         "architecture": "llama",
-        "bos_token": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
-        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<\uff5cUser\uff5c>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<\uff5cAssistant\uff5c><\uff5ctool\u2581calls\u2581begin\uff5c><\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{{'<\uff5ctool\u2581calls\u2581end\uff5c><\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>' + message['content'] + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<\uff5cAssistant\uff5c>' + content + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<\uff5ctool\u2581outputs\u2581begin\uff5c><\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<\uff5cAssistant\uff5c><think>\\n'}}{% endif %}",
-        "context_length": 131072,
-        "eos_token": "<\uff5cend\u2581of\u2581sentence\uff5c>",
-        "total": 70553706560
+        "bos_token": "<s>",
+        "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+        "context_length": 32768,
+        "eos_token": "<|im_end|>",
+        "total": 7241748480
       },
-      "id": "cortexso/deepseek-r1",
-      "lastModified": "2025-02-24T17:35:43.000Z",
+      "id": "cortexso/openhermes-2.5",
+      "lastModified": "2025-03-02T14:54:17.000Z",
       "likes": 0,
       "model-index": null,
-      "modelId": "cortexso/deepseek-r1",
+      "modelId": "cortexso/openhermes-2.5",
       "pipeline_tag": "text-generation",
       "private": false,
-      "sha": "b08ca722cc176e8d830a4e348f51367ea47b7bed",
+      "sha": "e4ef98ea46b61d21e434a79704717f7065c306a9",
       "siblings": [
         {
           "rfilename": ".gitattributes"
@@ -927,217 +4317,1700 @@
           "rfilename": "README.md"
         },
         {
-          "rfilename": "deepseek-r1-distill-llama-70b-q4_k_m.gguf"
+          "rfilename": "metadata.yml"
         },
         {
-          "rfilename": "deepseek-r1-distill-llama-8b-q2_k.gguf"
+          "rfilename": "model.yml"
         },
         {
-          "rfilename": "deepseek-r1-distill-llama-8b-q3_k_l.gguf"
+          "rfilename": "openhermes-2.5-mistral-7b-q2_k.gguf"
         },
         {
-          "rfilename": "deepseek-r1-distill-llama-8b-q3_k_m.gguf"
+          "rfilename": "openhermes-2.5-mistral-7b-q3_k_l.gguf"
         },
         {
-          "rfilename": "deepseek-r1-distill-llama-8b-q3_k_s.gguf"
+          "rfilename": "openhermes-2.5-mistral-7b-q3_k_m.gguf"
         },
         {
-          "rfilename": "deepseek-r1-distill-llama-8b-q4_k_m.gguf"
+          "rfilename": "openhermes-2.5-mistral-7b-q3_k_s.gguf"
         },
         {
-          "rfilename": "deepseek-r1-distill-llama-8b-q4_k_s.gguf"
+          "rfilename": "openhermes-2.5-mistral-7b-q4_k_m.gguf"
         },
         {
-          "rfilename": "deepseek-r1-distill-llama-8b-q5_k_m.gguf"
+          "rfilename": "openhermes-2.5-mistral-7b-q4_k_s.gguf"
         },
         {
-          "rfilename": "deepseek-r1-distill-llama-8b-q5_k_s.gguf"
+          "rfilename": "openhermes-2.5-mistral-7b-q5_k_m.gguf"
         },
         {
-          "rfilename": "deepseek-r1-distill-llama-8b-q6_k.gguf"
+          "rfilename": "openhermes-2.5-mistral-7b-q5_k_s.gguf"
         },
         {
-          "rfilename": "deepseek-r1-distill-llama-8b-q8_0.gguf"
+          "rfilename": "openhermes-2.5-mistral-7b-q6_k.gguf"
         },
         {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q2_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q6_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q8_0.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q2_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q6_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q8_0.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q2_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q6_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q8_0.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q2_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q6_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yaml"
+          "rfilename": "openhermes-2.5-mistral-7b-q8_0.gguf"
         }
       ],
       "spaces": [],
       "tags": [
         "gguf",
-        "cortexp.cpp",
-        "featured",
+        "cortex.cpp",
         "text-generation",
-        "license:mit",
+        "license:apache-2.0",
         "endpoints_compatible",
         "region:us",
         "conversational"
       ],
-      "usedStorage": 825182913408,
+      "usedStorage": 122667617430,
       "widgetData": [
         {
           "text": "Hi, what can you help me with?"
         },
         {
-          "text": "Hey, let's have a conversation!"
+          "text": "What is 84 * 3 / 2?"
         },
         {
-          "text": "Hello there!"
+          "text": "Tell me an interesting fact about the universe!"
         },
         {
-          "text": "Hey my name is Clara! How are you?"
+          "text": "Explain quantum computing in simple terms."
         }
       ]
     },
     "models": [
       {
-        "id": "deepseek-r1:1.5b",
-        "size": 1117320480
+        "id": "openhermes-2.5:7b",
+        "size": 4368451712
+      }
+    ]
+  },
+  {
+    "author": "sail",
+    "id": "cortexso/sailor-2",
+    "metadata": {
+      "_id": "674f5d998f1ed02584bf68d8",
+      "author": "cortexso",
+      "cardData": {
+        "license": "apache-2.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-12-03T19:35:53.000Z",
+      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nSailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). It is designed to address the growing demand for diverse, robust, and accessible language technologies in the region. Built upon the foundation of Qwen 2.5, Sailor2 is continuously pre-trained on 500B tokens, significantly improving its support for 15 languages with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray.\n\nSailor2 is available in three sizes: 1B, 8B, and 20B, which are expansions from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively. These models serve a wide range of applications, from production use to research and speculative decoding, ensuring accessibility to advanced language technologies across SEA.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Sailor-2-1b](https://huggingface.co/cortexso/sailor-2/tree/1b) | `cortex run sailor-2:1b` |\n| 2 | [Sailor-2-8b](https://huggingface.co/cortexso/sailor-2/tree/8b) | `cortex run sailor-2:8b` |\n| 3 | [Sailor-2-20b](https://huggingface.co/cortexso/sailor-2/tree/20b) | `cortex run sailor-2:20b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/sailor-2\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run sailor-2\n    ```\n    \n## Credits\n\n- **Author:** Community-driven (Sailor2 Initiative)\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Technical Paper](https://arxiv.org/pdf/2502.12982)",
+      "disabled": false,
+      "downloads": 178,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are an AI assistant named Sailor2, created by Sea AI Lab. As an AI assistant, you can answer questions in English, Chinese, and Southeast Asian languages such as Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. Your responses should be friendly, unbiased, informative, detailed, and faithful.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+        "context_length": 4096,
+        "eos_token": "<|im_end|>",
+        "total": 988064640
+      },
+      "id": "cortexso/sailor-2",
+      "lastModified": "2025-03-03T02:58:28.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/sailor-2",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "89b3079762dedf6ff4fbc94545632b3554c16420",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "sailor2-1b-chat-q2_k.gguf"
+        },
+        {
+          "rfilename": "sailor2-1b-chat-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "sailor2-1b-chat-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "sailor2-1b-chat-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "sailor2-1b-chat-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "sailor2-1b-chat-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "sailor2-1b-chat-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "sailor2-1b-chat-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "sailor2-1b-chat-q6_k.gguf"
+        },
+        {
+          "rfilename": "sailor2-1b-chat-q8_0.gguf"
+        },
+        {
+          "rfilename": "sailor2-20b-chat-q2_k.gguf"
+        },
+        {
+          "rfilename": "sailor2-20b-chat-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "sailor2-20b-chat-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "sailor2-20b-chat-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "sailor2-20b-chat-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "sailor2-20b-chat-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "sailor2-20b-chat-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "sailor2-20b-chat-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "sailor2-20b-chat-q6_k.gguf"
+        },
+        {
+          "rfilename": "sailor2-20b-chat-q8_0.gguf"
+        },
+        {
+          "rfilename": "sailor2-8b-chat-q2_k.gguf"
+        },
+        {
+          "rfilename": "sailor2-8b-chat-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "sailor2-8b-chat-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "sailor2-8b-chat-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "sailor2-8b-chat-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "sailor2-8b-chat-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "sailor2-8b-chat-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "sailor2-8b-chat-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "sailor2-8b-chat-q6_k.gguf"
+        },
+        {
+          "rfilename": "sailor2-8b-chat-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2502.12982",
+        "license:apache-2.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 201040376768,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "sailor-2:1b",
+        "size": 738628256
       },
       {
-        "id": "deepseek-r1:14b",
-        "size": 8988109920
+        "id": "sailor-2:20b",
+        "size": 11622380384
       },
       {
-        "id": "deepseek-r1:70b",
-        "size": 42520395584
+        "id": "sailor-2:8b",
+        "size": 5242934176
+      }
+    ]
+  },
+  {
+    "author": "CohereForAI",
+    "id": "cortexso/aya-expanse",
+    "metadata": {
+      "_id": "671ac0aee98f80735b80ce0d",
+      "author": "cortexso",
+      "cardData": {
+        "license": "cc-by-sa-4.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-10-24T21:48:30.000Z",
+      "description": "---\nlicense: cc-by-sa-4.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nAya Expanse is an open-weight research release of a model with highly advanced multilingual capabilities. It focuses on pairing a highly performant pre-trained Command family of models with the result of a year’s dedicated research from Cohere For AI, including data arbitrage, multilingual preference training, safety tuning, and model merging. The result is a powerful multilingual large language model serving 23 languages.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Aya-expanse-8b](https://huggingface.co/cortexso/aya-expanse/tree/8b) | `cortex run aya-expanse:8b` |\n| 2 | [Aya-expanse-32b](https://huggingface.co/cortexso/aya-expanse/tree/32b) | `cortex run aya-expanse:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/aya-expanse\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run aya-expanse\n    ```\n\n## Credits\n\n- **Author:** CohereAI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://cohere.com/c4ai-cc-by-nc-license)\n- **Papers:** [Aya Expanse Blog](https://cohere.com/blog/aya-expanse-connecting-our-world)",
+      "disabled": false,
+      "downloads": 219,
+      "gated": false,
+      "gguf": {
+        "architecture": "command-r",
+        "bos_token": "<BOS_TOKEN>",
+        "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Aya, a brilliant, sophisticated, multilingual AI-assistant trained to assist human users by providing thorough responses. You are able to interact and respond to questions in 23 languages and you are powered by a multilingual model built by Cohere For AI.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
+        "context_length": 8192,
+        "eos_token": "<|END_OF_TURN_TOKEN|>",
+        "total": 32296476672
+      },
+      "id": "cortexso/aya-expanse",
+      "lastModified": "2025-03-03T05:45:56.000Z",
+      "likes": 1,
+      "model-index": null,
+      "modelId": "cortexso/aya-expanse",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "d3de661105fcf536bac3f1ec747a2d39d25fe08f",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "aya-expanse-32b-q2_k.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-32b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-32b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-32b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-32b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-32b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-32b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-32b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-32b-q6_k.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-32b-q8_0.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-8b-q2_k.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-8b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-8b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-8b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-8b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-8b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-8b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-8b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-8b-q6_k.gguf"
+        },
+        {
+          "rfilename": "aya-expanse-8b-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:cc-by-sa-4.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 283759636448,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "aya-expanse:8b",
+        "size": 5056974624
       },
       {
-        "id": "deepseek-r1:8b",
-        "size": 4920736256
+        "id": "aya-expanse:32b",
+        "size": 19800825408
+      }
+    ]
+  },
+  {
+    "author": "CohereForAI",
+    "id": "cortexso/command-r",
+    "metadata": {
+      "_id": "66751b98585f2bf57092b2ae",
+      "author": "cortexso",
+      "cardData": {
+        "license": "cc-by-nc-4.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-06-21T06:20:08.000Z",
+      "description": "---\nlicense: cc-by-nc-4.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nC4AI Command-R is a research release of a 35 billion parameter highly performant generative model. Command-R is a large language model with open weights optimized for a variety of use cases including reasoning, summarization, and question answering. Command-R has the capability for multilingual generation evaluated in 10 languages and highly performant RAG capabilities.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Command-r-32b](https://huggingface.co/cortexhub/command-r/tree/32b) | `cortex run command-r:32b` |\n| 1 | [Command-r-35b](https://huggingface.co/cortexhub/command-r/tree/35b) | `cortex run command-r:35b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/command-r\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run command-r\n    ```\n    \n## Credits\n\n- **Author:** Cohere For AI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://cohere.com/c4ai-cc-by-nc-license)",
+      "disabled": false,
+      "downloads": 613,
+      "gated": false,
+      "gguf": {
+        "architecture": "command-r",
+        "bos_token": "<BOS_TOKEN>",
+        "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are a large language model called Command R built by the company Cohere. You act as a brilliant, sophisticated, AI-assistant chatbot trained to assist human users by providing thorough responses.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
+        "context_length": 131072,
+        "eos_token": "<|END_OF_TURN_TOKEN|>",
+        "total": 32296476672
+      },
+      "id": "cortexso/command-r",
+      "lastModified": "2025-03-03T05:55:03.000Z",
+      "likes": 1,
+      "model-index": null,
+      "modelId": "cortexso/command-r",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "829fc0c4d726206187684dcbaf2a53c658d5d34a",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "c4ai-command-r-08-2024-q2_k.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-08-2024-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-08-2024-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-08-2024-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-08-2024-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-08-2024-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-08-2024-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-08-2024-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-08-2024-q6_k.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-08-2024-q8_0.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-v01-q2_k.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-v01-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-v01-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-v01-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-v01-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-v01-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-v01-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-v01-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-v01-q6_k.gguf"
+        },
+        {
+          "rfilename": "c4ai-command-r-v01-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:cc-by-nc-4.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 471257928608,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "command-r:32b",
+        "size": 19800837184
       },
       {
-        "id": "deepseek-r1:main",
-        "size": 8098524832
+        "id": "command-r:35b",
+        "size": 21527055296
+      }
+    ]
+  },
+  {
+    "author": "simplescaling",
+    "id": "cortexso/simplescaling-s1",
+    "metadata": {
+      "_id": "67a4e03a6f317f30b9a285b0",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2025-02-06T16:15:54.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\nThe 'simplescaling-s1' model is a refined version of 'simplescaling/s1-32B,' designed to enhance scalability and streamline tasks in AI applications. It focuses on efficiently managing resource allocation while maintaining high performance across various workloads. This model is particularly effective for text generation, summarization, and conversational AI, as it balances speed and accuracy. Users can leverage 'simplescaling-s1' for building scalable applications that require processing large datasets or generating content quickly. Overall, the model achieves impressive results with reduced computational overhead, making it suitable for both research and practical deployments.\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Simplescaling-s1-32b](https://huggingface.co/cortexso/simplescaling-s1/tree/32b) | cortex run simplescaling-s1:32b |\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/simplescaling-s1\n    ```\n    \n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run simplescaling-s1\n    ```\n## Credits\n- **Author:** simplescaling\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)\n- **Paper**: [Paper](https://arxiv.org/abs/2501.19393)",
+      "disabled": false,
+      "downloads": 104,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+        "context_length": 32768,
+        "eos_token": "<|im_end|>",
+        "total": 32763876352
+      },
+      "id": "cortexso/simplescaling-s1",
+      "lastModified": "2025-03-03T03:46:24.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/simplescaling-s1",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "5755e76ec22a9ca9d0271ce16f5287bb9ad3c1a6",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "s1-32b-q2_k.gguf"
+        },
+        {
+          "rfilename": "s1-32b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "s1-32b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "s1-32b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "s1-32b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "s1-32b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "s1-32b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "s1-32b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "s1-32b-q6_k.gguf"
+        },
+        {
+          "rfilename": "s1-32b-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2501.19393",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 206130756480,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "simplescaling-s1:32b",
+        "size": 19851336384
+      }
+    ]
+  },
+  {
+    "author": "Qwen",
+    "id": "cortexso/qwq",
+    "metadata": {
+      "_id": "67497b496615e96c7c8d6b05",
+      "author": "cortexso",
+      "cardData": {
+        "license": "other",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-11-29T08:28:57.000Z",
+      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nQwQ-32B-Preview is an experimental large-scale research model by the Qwen Team, focusing on advanced AI reasoning. While it demonstrates strong analytical capabilities, it also presents notable limitations:\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Qwq-32b](https://huggingface.co/cortexso/qwq/tree/32b) | `cortex run qwq:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/qwq\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run qwq\n    ```\n    \n## Credits\n\n- **Author:** Qwen\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/Qwen/QwQ-32B-Preview/blob/main/LICENSE)\n- **Papers:** [QwQ Blog](https://qwenlm.github.io/blog/qwq-32b-preview/)",
+      "disabled": false,
+      "downloads": 101,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+        "context_length": 32768,
+        "eos_token": "<|im_end|>",
+        "total": 32763876352
+      },
+      "id": "cortexso/qwq",
+      "lastModified": "2025-03-03T02:23:40.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/qwq",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "fc6f23c0d5c8faf8b79b11e03aaa7c656fed8dfd",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "qwq-32b-preview-q2_k.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-preview-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-preview-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-preview-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-preview-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-preview-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-preview-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-preview-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-preview-q6_k.gguf"
+        },
+        {
+          "rfilename": "qwq-32b-preview-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:other",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 206130755200,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "qwq:32b",
+        "size": 19851336256
+      }
+    ]
+  },
+  {
+    "author": "Nexusflow",
+    "id": "cortexso/athene",
+    "metadata": {
+      "_id": "6737ae7de6b1d15ff54d0a08",
+      "author": "cortexso",
+      "cardData": {
+        "license": "other",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-11-15T20:26:37.000Z",
+      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nAthene-V2-Chat-72B is an open-weight LLM that competes on par with GPT-4o across various benchmarks. It is currently ranked as the best open model on Chatbot Arena, where it outperforms GPT-4o-0513 (the highest-ranked GPT-4o model on Arena) in hard and math categories. It also matches GPT-4o-0513 in coding, instruction following, longer queries, and multi-turn conversations.\n\nTrained through RLHF with Qwen-2.5-72B-Instruct as the base model, Athene-V2-Chat-72B excels in chat, math, and coding. Additionally, its sister model, Athene-V2-Agent-72B, surpasses GPT-4o in complex function calling and agentic applications, further extending its capabilities.\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Athene-72b](https://huggingface.co/cortexso/athene/tree/72b) | `cortex run athene:72b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/athene\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run athene\n    ```\n    \n## Credits\n\n- **Author:** Nexusflow\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/Nexusflow/Athene-V2-Chat/blob/main/Nexusflow_Research_License_.pdf)\n- **Papers:** [Athene V2 Blog](https://nexusflow.ai/blogs/athene-v2)",
+      "disabled": false,
+      "downloads": 13,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+        "context_length": 32768,
+        "eos_token": "<|im_end|>",
+        "total": 72706203648
+      },
+      "id": "cortexso/athene",
+      "lastModified": "2025-03-03T06:04:09.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/athene",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "a92447ca675e741541855ac03b8f144dee1067c4",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "athene-v2-chat-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:other",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 47415715136,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "athene:72b",
+        "size": 47415715136
+      }
+    ]
+  },
+  {
+    "author": "MistralAI",
+    "id": "cortexso/mistral",
+    "metadata": {
+      "_id": "6667b1796e382e809d62b9fc",
+      "author": "cortexso",
+      "cardData": {
+        "license": "other",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-06-11T02:07:53.000Z",
+      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nMistral 7B, a 7-billion-parameter Large Language Model by Mistral AI. Designed for efficiency and performance, it suits real-time applications requiring swift responses.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Mistra-7b](https://huggingface.co/cortexhub/mistral/tree/7b) | `cortex run mistral:7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/mistral\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run mistral\n    ```\n    \n## Credits\n\n- **Author:** MistralAI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://mistral.ai/licenses/MNPL-0.1.md)\n- **Papers:** [Mistral paper](https://arxiv.org/abs/2310.06825)",
+      "disabled": false,
+      "downloads": 1895,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<s>",
+        "chat_template": "{%- if messages[0][\"role\"] == \"system\" %}\n    {%- set system_message = messages[0][\"content\"] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n    {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n        {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n            {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n        {%- endif %}\n        {%- set ns.index = ns.index + 1 %}\n    {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if message[\"role\"] == \"user\" %}\n        {%- if tools is not none and (message == user_messages[-1]) %}\n            {{- \"[AVAILABLE_TOOLS] [\" }}\n            {%- for tool in tools %}\n                {%- set tool = tool.function %}\n                {{- '{\"type\": \"function\", \"function\": {' }}\n                {%- for key, val in tool.items() if key != \"return\" %}\n                    {%- if val is string %}\n                        {{- '\"' + key + '\": \"' + val + '\"' }}\n                    {%- else %}\n                        {{- '\"' + key + '\": ' + val|tojson }}\n                    {%- endif %}\n                    {%- if not loop.last %}\n                        {{- \", \" }}\n                    {%- endif %}\n                {%- endfor %}\n                {{- \"}}\" }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- else %}\n                    {{- \"]\" }}\n                {%- endif %}\n            {%- endfor %}\n            {{- \"[/AVAILABLE_TOOLS]\" }}\n            {%- endif %}\n        {%- if loop.last and system_message is defined %}\n            {{- \"[INST] \" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n        {%- else %}\n            {{- \"[INST] \" + message[\"content\"] + \"[/INST]\" }}\n        {%- endif %}\n    {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n        {{- \"[TOOL_CALLS] [\" }}\n        {%- for tool_call in message.tool_calls %}\n            {%- set out = tool_call.function|tojson %}\n            {{- out[:-1] }}\n            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n                {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n            {%- endif %}\n            {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n            {%- if not loop.last %}\n                {{- \", \" }}\n            {%- else %}\n                {{- \"]\" + eos_token }}\n            {%- endif %}\n        {%- endfor %}\n    {%- elif message[\"role\"] == \"assistant\" %}\n        {{- \" \" + message[\"content\"]|trim + eos_token}}\n    {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n        {%- if message.content is defined and message.content.content is defined %}\n            {%- set content = message.content.content %}\n        {%- else %}\n            {%- set content = message.content %}\n        {%- endif %}\n        {{- '[TOOL_RESULTS] {\"content\": ' + content|string + \", \" }}\n        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n            {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n        {%- endif %}\n        {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n    {%- else %}\n        {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n    {%- endif %}\n{%- endfor %}\n",
+        "context_length": 32768,
+        "eos_token": "</s>",
+        "total": 7248023552
+      },
+      "id": "cortexso/mistral",
+      "lastModified": "2025-03-03T02:39:43.000Z",
+      "likes": 1,
+      "model-index": null,
+      "modelId": "cortexso/mistral",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "125b0ef1bdf6441d5c00f6a6a24a491214e532bd",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "mistral-7b-instruct-v0.3-q2_k.gguf"
+        },
+        {
+          "rfilename": "mistral-7b-instruct-v0.3-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "mistral-7b-instruct-v0.3-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "mistral-7b-instruct-v0.3-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "mistral-7b-instruct-v0.3-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "mistral-7b-instruct-v0.3-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "mistral-7b-instruct-v0.3-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "mistral-7b-instruct-v0.3-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "mistral-7b-instruct-v0.3-q6_k.gguf"
+        },
+        {
+          "rfilename": "mistral-7b-instruct-v0.3-q8_0.gguf"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2310.06825",
+        "license:other",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 49914826528,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "mistral:7b",
+        "size": 4372815680
+      }
+    ]
+  },
+  {
+    "author": "HuggingFaceTB",
+    "id": "cortexso/smollm2",
+    "metadata": {
+      "_id": "672408e4603a8644ff7505f0",
+      "author": "cortexso",
+      "cardData": {
+        "license": "apache-2.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-10-31T22:47:00.000Z",
+      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nSmolLM2 is a family of compact language models available in three sizes: 135M, 360M, and 1.7B parameters. These models are designed to solve a wide range of tasks while being lightweight enough for on-device deployment. More details can be found in the [SmolLM2 paper](https://arxiv.org/abs/2502.02737v1).\n\nThe **1.7B variant** demonstrates significant improvements over its predecessor, SmolLM1-1.7B, especially in instruction following, knowledge retention, reasoning, and mathematical problem-solving. It was trained on **11 trillion tokens** using a diverse dataset combination, including **FineWeb-Edu, DCLM, The Stack**, and newly curated mathematics and coding datasets that will be released soon.\n\nThe **instruct version** of SmolLM2 was developed through **supervised fine-tuning (SFT)** using a mix of public datasets and curated proprietary datasets. It further benefits from **Direct Preference Optimization (DPO)** using **UltraFeedback**. \n\nAdditionally, the instruct model supports tasks such as **text rewriting, summarization, and function calling**, enabled by datasets from **Argilla**, including **Synth-APIGen-v0.1**. The SFT dataset is available at: [SmolTalk SFT Dataset](https://huggingface.co/datasets/HuggingFaceTB/smoltalk).\n\nFor further details, visit the [SmolLM2 GitHub repository](https://github.com/huggingface/smollm), where you will find resources for **pre-training, post-training, evaluation, and local inference**.\n\n## Variants\n\n| No | Variant                                                | Cortex CLI command     |\n| -- | ------------------------------------------------------ | ---------------------- |\n| 1  | [Smollm2-1.7b](https://huggingface.co/cortexso/smollm2/tree/1.7b)           | `cortex run smollm2:1.7b`  |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n   ```bash\n   cortexhub/smollm2\n   ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n   ```bash\n   cortex run smollm2\n   ```\n\n## Credits\n\n- **Author:** SmolLM2 Team\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [SmolLM2 Research](https://arxiv.org/abs/2502.02737v1)",
+      "disabled": false,
+      "downloads": 237,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<|im_start|>",
+        "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+        "context_length": 8192,
+        "eos_token": "<|im_end|>",
+        "total": 1711376384
+      },
+      "id": "cortexso/smollm2",
+      "lastModified": "2025-03-03T03:51:13.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/smollm2",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "b825edad383d925571b4433f8d6b16eb7cc1e9fc",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "smollm2-1.7b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "smollm2-1.7b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "smollm2-1.7b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "smollm2-1.7b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "smollm2-1.7b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "smollm2-1.7b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "smollm2-1.7b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "smollm2-1.7b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "smollm2-1.7b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "smollm2-1.7b-instruct-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2502.02737",
+        "license:apache-2.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 11998369216,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "smollm2:1.7b",
+        "size": 1055609728
+      }
+    ]
+  },
+  {
+    "author": "allenai",
+    "id": "cortexso/tulu3",
+    "metadata": {
+      "_id": "6744a6a2e08fe3da3fcdfb36",
+      "author": "cortexso",
+      "cardData": {
+        "license": "other",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-11-25T16:32:34.000Z",
+      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nTülu3 is a state-of-the-art instruction-following model family developed by Allen Institute for AI. It is designed to excel in a wide range of tasks beyond standard chat applications, including complex problem-solving in domains such as MATH, GSM8K, and IFEval. The Tülu3 series provides a fully open-source ecosystem, offering access to datasets, training code, and fine-tuning recipes to facilitate advanced model customization and experimentation.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Tulu3-8b](https://huggingface.co/cortexso/tulu3/tree/8b) | `cortex run tulu3:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/tulu3\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run tulu3\n    ```\n    \n## Credits\n\n- **Author:** Allenai\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct/blob/main/LICENSE)\n- **Papers:** [Paper](https://arxiv.org/abs/2411.15124)",
+      "disabled": false,
+      "downloads": 252,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<|begin_of_text|>",
+        "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|assistant|>\n'  + message['content'] + eos_token + '\n' }}{% else %}{{ '<|assistant|>\n'  + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}{% endfor %}",
+        "context_length": 131072,
+        "eos_token": "<|end_of_text|>",
+        "total": 8030326848
+      },
+      "id": "cortexso/tulu3",
+      "lastModified": "2025-03-03T03:48:16.000Z",
+      "likes": 1,
+      "model-index": null,
+      "modelId": "cortexso/tulu3",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "693fb27ee973a686d66f33ecc72b41172ec5a7d6",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "llama-3.1-tulu-3-8b-sft-q2_k.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-tulu-3-8b-sft-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-tulu-3-8b-sft-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-tulu-3-8b-sft-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-tulu-3-8b-sft-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-tulu-3-8b-sft-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-tulu-3-8b-sft-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-tulu-3-8b-sft-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-tulu-3-8b-sft-q6_k.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-tulu-3-8b-sft-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2411.15124",
+        "license:other",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 56188233120,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "tulu3:8b",
+        "size": 4920780768
+      }
+    ]
+  },
+  {
+    "author": "Qwen Team",
+    "id": "cortexso/qwen3",
+    "metadata": {
+      "_id": "6810288ccbe4f92b62636b50",
+      "author": "cortexso",
+      "cardData": {
+        "license": "apache-2.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp", "featured"]
+      },
+      "createdAt": "2025-04-29T01:17:00.000Z",
+      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\n**Qwen Team** developed and released the **Qwen3** series, a state-of-the-art family of language models optimized for advanced reasoning, dialogue, instruction-following, and agentic use cases. Qwen3 introduces innovative thinking/non-thinking mode switching, long context capabilities, and multilingual support, all while achieving high efficiency and performance.\n\nThe Qwen3 models span several sizes and include support for seamless reasoning, complex tool usage, and detailed multi-turn conversations, making them ideal for applications such as research assistants, code generation, enterprise chatbots, and more.\n\n## Variants\n\n### Qwen3\n\n| No | Variant                                                                                   | Branch | Cortex CLI command             |\n|----|--------------------------------------------------------------------------------------------|--------|-------------------------------|\n| 1  | [Qwen3-0.6B](https://huggingface.co/cortexso/qwen3/tree/0.6b)                                  | 0.6b     | `cortex run qwen3:0.6b`         |\n| 2  | [Qwen3-1.7B](https://huggingface.co/cortexso/qwen3/tree/1.7b)                                  | 1.7b     | `cortex run qwen3:1.7b`         |\n| 3  | [Qwen3-4B](https://huggingface.co/cortexso/qwen3/tree/4b)                                  | 4b     | `cortex run qwen3:4b`         |\n| 4  | [Qwen3-8B](https://huggingface.co/cortexso/qwen3/tree/8b)                                  | 8b     | `cortex run qwen3:8b`         |\n| 5  | [Qwen3-14B](https://huggingface.co/cortexso/qwen3/tree/14b)                                  | 14b     | `cortex run qwen3:14b`         |\n| 6  | [Qwen3-32B](https://huggingface.co/cortexso/qwen3/tree/32b)                                | 32b    | `cortex run qwen3:32b`        |\n| 7  | [Qwen3-30B-A3B](https://huggingface.co/cortexso/qwen3/tree/30b-a3b)                        | 30b-a3b| `cortex run qwen3:30b-a3b`    |\n\nEach branch contains multiple quantized GGUF versions:\n- **Qwen3-0.6B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-1.7B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-4B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-8B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-32B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-30B-A3B:** *q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n   ```bash\n   cortexso/qwen3\n   ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n   ```bash\n   cortex run qwen3\n   ```\n\n## Credits\n\n- **Author:** Qwen Team\n- **Converter:** [Menlo Research](https://menlo.ai/)\n- **Original License:** [License](https://www.apache.org/licenses/LICENSE-2.0)\n- **Blogs:** [Qwen3: Think Deeper, Act Faster](https://qwenlm.github.io/blog/qwen3/)",
+      "disabled": false,
+      "downloads": 6693,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen3",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set content = message.content %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in message.content %}\n                {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n    {%- if enable_thinking is defined and enable_thinking is false %}\n        {{- '<think>\\n\\n</think>\\n\\n' }}\n    {%- endif %}\n{%- endif %}",
+        "context_length": 40960,
+        "eos_token": "<|im_end|>",
+        "total": 751632384
+      },
+      "id": "cortexso/qwen3",
+      "lastModified": "2025-05-08T15:50:21.000Z",
+      "likes": 1,
+      "model-index": null,
+      "modelId": "cortexso/qwen3",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "d25d0999fbab8909f16173f21f2db8f9f58c0a28",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "qwen3-0.6b-q2_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-0.6b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "qwen3-0.6b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-0.6b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-0.6b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-0.6b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-0.6b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-0.6b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-0.6b-q6_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-0.6b-q8_0.gguf"
+        },
+        {
+          "rfilename": "qwen3-1.7b-q2_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-1.7b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "qwen3-1.7b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-1.7b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-1.7b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-1.7b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-1.7b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-1.7b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-1.7b-q6_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-1.7b-q8_0.gguf"
+        },
+        {
+          "rfilename": "qwen3-14b-q2_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-14b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "qwen3-14b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-14b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-14b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-14b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-14b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-14b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-14b-q6_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-14b-q8_0.gguf"
+        },
+        {
+          "rfilename": "qwen3-30b-a3b-q2_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-30b-a3b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "qwen3-30b-a3b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-30b-a3b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-30b-a3b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-30b-a3b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-30b-a3b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-30b-a3b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-30b-a3b-q6_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-30b-a3b-q8_0.gguf"
+        },
+        {
+          "rfilename": "qwen3-32b-q2_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-32b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "qwen3-32b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-32b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-32b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-32b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-32b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-32b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-32b-q6_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-32b-q8_0.gguf"
+        },
+        {
+          "rfilename": "qwen3-4b-q2_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-4b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "qwen3-4b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-4b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-4b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-4b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-4b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-4b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-4b-q6_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-4b-q8_0.gguf"
+        },
+        {
+          "rfilename": "qwen3-8b-q2_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-8b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "qwen3-8b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-8b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-8b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-8b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-8b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "qwen3-8b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "qwen3-8b-q6_k.gguf"
+        },
+        {
+          "rfilename": "qwen3-8b-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "featured",
+        "text-generation",
+        "license:apache-2.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 588411644672,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "qwen3:32b",
+        "size": 19762149088
       },
       {
-        "id": "deepseek-r1:7b",
-        "size": 4683073184
+        "id": "qwen3:8b",
+        "size": 5027783808
       },
       {
-        "id": "deepseek-r1:32b",
-        "size": 19851335520
+        "id": "qwen3:0.6b",
+        "size": 484219968
+      },
+      {
+        "id": "qwen3:4b",
+        "size": 2497280608
+      },
+      {
+        "id": "qwen3:30b-a3b",
+        "size": 18556686208
+      },
+      {
+        "id": "qwen3:14b",
+        "size": 9001753280
+      },
+      {
+        "id": "qwen3:1.7b",
+        "size": 1282439232
+      }
+    ]
+  },
+  {
+    "author": "TinyLlama",
+    "id": "cortexso/tinyllama",
+    "metadata": {
+      "_id": "66791800ca45b9165970f2fe",
+      "author": "cortexso",
+      "cardData": {
+        "license": "apache-2.0",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-06-24T06:53:52.000Z",
+      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nThe [TinyLlama](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) project aims to pretrain a 1.1B Llama model on 3 trillion tokens. This is the chat model finetuned  on a diverse range of synthetic dialogues generated by ChatGPT.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [TinyLLama-1b](https://huggingface.co/cortexso/tinyllama/tree/1b) | `cortex run tinyllama:1b` |\n\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/tinyllama\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run tinyllama\n    ```\n    \n## Credits\n\n- **Author:** Microsoft\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Tinyllama Paper](https://arxiv.org/abs/2401.02385)",
+      "disabled": false,
+      "downloads": 562,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<s>",
+        "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
+        "context_length": 2048,
+        "eos_token": "</s>",
+        "total": 1100048384
+      },
+      "id": "cortexso/tinyllama",
+      "lastModified": "2025-03-03T06:16:24.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/tinyllama",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "953054fd3565023c2bbd2381f2566f904f5bdc1f",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        },
+        {
+          "rfilename": "tinyllama-1.1b-chat-v1.0-q2_k.gguf"
+        },
+        {
+          "rfilename": "tinyllama-1.1b-chat-v1.0-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "tinyllama-1.1b-chat-v1.0-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "tinyllama-1.1b-chat-v1.0-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "tinyllama-1.1b-chat-v1.0-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "tinyllama-1.1b-chat-v1.0-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "tinyllama-1.1b-chat-v1.0-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "tinyllama-1.1b-chat-v1.0-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "tinyllama-1.1b-chat-v1.0-q6_k.gguf"
+        },
+        {
+          "rfilename": "tinyllama-1.1b-chat-v1.0-q8_0.gguf"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2401.02385",
+        "license:apache-2.0",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 8451229056,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "tinyllama:1b",
+        "size": 782045248
+      }
+    ]
+  },
+  {
+    "author": "meta-llama",
+    "id": "cortexso/llama3",
+    "metadata": {
+      "_id": "6667a6d52e5f1c08ec14469c",
+      "author": "cortexso",
+      "cardData": {
+        "license": "llama3",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-06-11T01:22:29.000Z",
+      "description": "---\nlicense: llama3\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nMeta developed and released the [Meta Llama 3](https://huggingface.co/meta-llama/Meta-Llama-3-8B) family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Llama3-8b](https://huggingface.co/cortexso/llama3/tree/8b) | `cortex run llama3:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/llama3\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run llama3\n    ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://llama.meta.com/llama3/license/)\n- **Papers:** [Llama-3 Blog](https://llama.meta.com/llama3/)",
+      "disabled": false,
+      "downloads": 646,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<|begin_of_text|>",
+        "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
+        "context_length": 131072,
+        "eos_token": "<|eot_id|>",
+        "total": 8030261312
+      },
+      "id": "cortexso/llama3",
+      "lastModified": "2025-03-03T06:19:24.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/llama3",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "fcf18c0b14bb2dc64c7f78da40ca88a8ff759fd5",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "llama-3.1-8b-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-8b-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-8b-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-8b-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-8b-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-8b-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-8b-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-8b-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-8b-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "llama-3.1-8b-instruct-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:llama3",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 70949951936,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "llama3:8b",
+        "size": 4920739072
       }
     ]
   },
@@ -1155,7 +6028,7 @@
       "createdAt": "2024-07-29T10:25:05.000Z",
       "description": "---\nlicense: llama3.1\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nMeta developed and released the [Meta Llama 3.1](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Llama3.1-8b](https://huggingface.co/cortexso/llama3.1/tree/8b) | `cortex run llama3.1:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/llama3.1\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run llama3.1\n    ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B/blob/main/LICENSE)\n- **Papers:** [Llama-3.1 Blog](https://ai.meta.com/blog/meta-llama-3-1/)",
       "disabled": false,
-      "downloads": 275,
+      "downloads": 1048,
       "gated": false,
       "gguf": {
         "architecture": "llama",
@@ -1165,13 +6038,13 @@
         "total": 8030261312
       },
       "id": "cortexso/llama3.1",
-      "lastModified": "2025-02-25T07:41:12.000Z",
+      "lastModified": "2025-03-02T14:27:57.000Z",
       "likes": 0,
       "model-index": null,
       "modelId": "cortexso/llama3.1",
       "pipeline_tag": "text-generation",
       "private": false,
-      "sha": "f83805762b13bfe9aaa071c065edb74c48281367",
+      "sha": "256c4f2118a75d93a1dc368ac4ccf1fea16751c2",
       "siblings": [
         {
           "rfilename": ".gitattributes"
@@ -1225,7 +6098,7 @@
         "endpoints_compatible",
         "region:us"
       ],
-      "usedStorage": 227069905920,
+      "usedStorage": 66029173888,
       "widgetData": [
         {
           "text": "My name is Julien and I like to"
@@ -1245,10 +6118,334 @@
       {
         "id": "llama3.1:8b",
         "size": 4920734176
+      }
+    ]
+  },
+  {
+    "author": "AIDC-AI",
+    "id": "cortexso/marco-o1",
+    "metadata": {
+      "_id": "6743b6140d46fa30e6ff2879",
+      "author": "cortexso",
+      "cardData": {
+        "license": "other",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
       },
+      "createdAt": "2024-11-24T23:26:12.000Z",
+      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\nMarco-o1 not only focuses on disciplines with standard answers, such as mathematics, physics, and coding—which are well-suited for reinforcement learning (RL)—but also places greater emphasis on open-ended resolutions. We aim to address the question: \"Can the o1 model effectively generalize to broader domains where clear standards are absent and rewards are challenging to quantify?\"\n\nCurrently, Marco-o1 Large Language Model (LLM) is powered by Chain-of-Thought (CoT) fine-tuning, Monte Carlo Tree Search (MCTS), reflection mechanisms, and innovative reasoning strategies—optimized for complex real-world problem-solving tasks.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Marco-o1-8b](https://huggingface.co/cortexso/marco-o1/tree/8b) | `cortex run marco-o1:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/marco-o1\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run marco-o1\n    ```\n    \n## Credits\n\n- **Author:** AIDC-AI\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/AIDC-AI/Marco-o1/blob/main/LICENSE)\n- **Papers:** [Paper](https://arxiv.org/abs/2411.14405)",
+      "disabled": false,
+      "downloads": 122,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手，你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n        \n## 重要！！！！！\n当你回答问题时，你的思考应该在<Thought>内完成，<Output>内输出你的结果。\n<Thought>应该尽可能是英文，但是有2个特例，一个是对原文中的引用，另一个是是数学应该使用markdown格式，<Output>内的输出需要遵循用户输入的语言。\n        <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+        "context_length": 32768,
+        "eos_token": "<|im_end|>",
+        "total": 7615616512
+      },
+      "id": "cortexso/marco-o1",
+      "lastModified": "2025-03-03T02:27:27.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/marco-o1",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "0c8e0cdbfb898e000cad200b2694c5c6e6710fc6",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "marco-o1-q2_k.gguf"
+        },
+        {
+          "rfilename": "marco-o1-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "marco-o1-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "marco-o1-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "marco-o1-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "marco-o1-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "marco-o1-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "marco-o1-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "marco-o1-q6_k.gguf"
+        },
+        {
+          "rfilename": "marco-o1-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "arxiv:2411.14405",
+        "license:other",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 53341785824,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
       {
-        "id": "llama3.1:main",
-        "size": 8540770784
+        "id": "marco-o1:8b",
+        "size": 4683071648
+      }
+    ]
+  },
+  {
+    "author": "DeepSeek-AI",
+    "id": "cortexso/deepseek-r1-distill-qwen-1.5b",
+    "metadata": {
+      "_id": "678e84d99d66241aabee008a",
+      "author": "cortexso",
+      "cardData": {
+        "license": "mit",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2025-01-20T17:16:09.000Z",
+      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Qwen 1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) model, a distilled version of the Qwen 1.5B language model. It is fine-tuned for high-performance text generation and optimized for dialogue and information-seeking tasks. This model achieves a balance of efficiency and accuracy while maintaining a smaller footprint compared to the original Qwen 1.5B.\n\nThe model is designed for applications in customer support, conversational AI, and research, prioritizing both helpfulness and safety.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-qwen-1.5b-1.5b](https://huggingface.co/cortexso/deepseek-r1-distill-qwen-1.5b/tree/1.5b) | `cortex run deepseek-r1-distill-qwen-1.5b:1.5b` |\n\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepseek-r1-distill-qwen-1.5b\n    ```\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepseek-r1-distill-qwen-1.5b\n    ```\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)",
+      "disabled": false,
+      "downloads": 539,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen2",
+        "bos_token": "<｜begin▁of▁sentence｜>",
+        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
+        "context_length": 131072,
+        "eos_token": "<｜end▁of▁sentence｜>",
+        "total": 1777088000
+      },
+      "id": "cortexso/deepseek-r1-distill-qwen-1.5b",
+      "lastModified": "2025-03-03T05:24:13.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/deepseek-r1-distill-qwen-1.5b",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "14cbd3c8ac57a346c35f676fd5fe55befebd911e",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-1.5b-q2_k.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-1.5b-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-1.5b-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-1.5b-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-1.5b-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-1.5b-q6_k.gguf"
+        },
+        {
+          "rfilename": "deepseek-r1-distill-qwen-1.5b-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:mit",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 12728600096,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "deepseek-r1-distill-qwen-1.5b:1.5b",
+        "size": 1117320480
+      }
+    ]
+  },
+  {
+    "author": "PrimeIntellect",
+    "id": "cortexso/intellect-1",
+    "metadata": {
+      "_id": "674e48fc24f1ef616cd485de",
+      "author": "cortexso",
+      "cardData": {
+        "license": "other",
+        "pipeline_tag": "text-generation",
+        "tags": ["cortex.cpp"]
+      },
+      "createdAt": "2024-12-02T23:55:40.000Z",
+      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nIntellect-1 is a high-performance instruction-tuned model developed by Qwen, designed to handle a broad range of natural language processing tasks with efficiency and precision. Optimized for dialogue, reasoning, and knowledge-intensive applications, Intellect-1 excels in structured generation, summarization, and retrieval-augmented tasks. It is part of an open ecosystem, providing transparency in training data, model architecture, and fine-tuning methodologies.\n\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Intellect-1-10b](https://huggingface.co/cortexso/intellect-1/tree/10b) | `cortex run intellect-1:10b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/intellect-1\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run intellect-1\n    ```\n    \n## Credits\n\n- **Author:** Qwen\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Technical Paper](https://github.com/PrimeIntellect-ai/prime)",
+      "disabled": false,
+      "downloads": 182,
+      "gated": false,
+      "gguf": {
+        "architecture": "llama",
+        "bos_token": "<|begin_of_text|>",
+        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+        "context_length": 8192,
+        "eos_token": "<|eot_id|>",
+        "total": 10211381248
+      },
+      "id": "cortexso/intellect-1",
+      "lastModified": "2025-03-03T02:32:47.000Z",
+      "likes": 0,
+      "model-index": null,
+      "modelId": "cortexso/intellect-1",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "f46fd8109130aab2969fd9229d390051f774a761",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes"
+        },
+        {
+          "rfilename": "README.md"
+        },
+        {
+          "rfilename": "intellect-1-instruct-q2_k.gguf"
+        },
+        {
+          "rfilename": "intellect-1-instruct-q3_k_l.gguf"
+        },
+        {
+          "rfilename": "intellect-1-instruct-q3_k_m.gguf"
+        },
+        {
+          "rfilename": "intellect-1-instruct-q3_k_s.gguf"
+        },
+        {
+          "rfilename": "intellect-1-instruct-q4_k_m.gguf"
+        },
+        {
+          "rfilename": "intellect-1-instruct-q4_k_s.gguf"
+        },
+        {
+          "rfilename": "intellect-1-instruct-q5_k_m.gguf"
+        },
+        {
+          "rfilename": "intellect-1-instruct-q5_k_s.gguf"
+        },
+        {
+          "rfilename": "intellect-1-instruct-q6_k.gguf"
+        },
+        {
+          "rfilename": "intellect-1-instruct-q8_0.gguf"
+        },
+        {
+          "rfilename": "metadata.yml"
+        },
+        {
+          "rfilename": "model.yml"
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "cortex.cpp",
+        "text-generation",
+        "license:other",
+        "endpoints_compatible",
+        "region:us",
+        "conversational"
+      ],
+      "usedStorage": 71113603904,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "intellect-1:10b",
+        "size": 6229006784
       }
     ]
   }
diff --git a/web-app/src/hooks/useAssistant.ts b/web-app/src/hooks/useAssistant.ts
index 12feacdee..1c3181276 100644
--- a/web-app/src/hooks/useAssistant.ts
+++ b/web-app/src/hooks/useAssistant.ts
@@ -21,7 +21,7 @@ export const defaultAssistant: Assistant = {
   description:
     'Jan is a helpful desktop assistant that can reason through complex tasks and use tools to complete them on the user’s behalf.',
   instructions:
-    'Jan is a helpful desktop assistant that can reason through complex tasks and use tools to complete them on the user’s behalf. Respond naturally and concisely, take actions when needed, and guide the user toward their goals.',
+    'You have access to a set of tools to help you answer the user’s question. You can use only one tool per message, and you’ll receive the result of that tool in the user’s next response. To complete a task, use tools step by step—each step should be guided by the outcome of the previous one.\nTool Usage Rules:\n1. Always provide the correct values as arguments when using tools. Do not pass variable names—use actual values instead.\n2. You may perform multiple tool steps to complete a task.\n3. Avoid repeating a tool call with exactly the same parameters to prevent infinite loops.',
 }
 
 export const useAssistant = create<AssistantState>()((set, get) => ({
diff --git a/web-app/src/utils/models.ts b/web-app/src/utils/models.ts
index d2d2cb9ac..c21b4a3a9 100644
--- a/web-app/src/utils/models.ts
+++ b/web-app/src/utils/models.ts
@@ -1,6 +1,6 @@
 export const hardcodedModel = {
   author: 'Menlo',
-  id: 'https://huggingface.co/Menlo/Jan-nano',
+  id: 'Menlo/Jan-nano',
   metadata: {
     '_id': '68492cd9cada68b1d11ca1bd',
     'author': 'Menlo',
@@ -12,42 +12,39 @@ export const hardcodedModel = {
     'description':
       '---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)',
     'disabled': false,
-    'downloads': 0,
+    'downloads': 939,
     'gated': false,
     'gguf': {
       architecture: 'qwen3',
       bos_token: '<|endoftext|>',
       chat_template:
-        "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if message.content is string %}\n        {%- set content = message.content %}\n    {%- else %}\n        {%- set content = '' %}\n    {%- endif %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is string %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in content %}\n                {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n                {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n<think>\\n\\n</think>\\n\\n' }}\n{%- endif %}",
+        "{%- if tools %} {{- '<|im_start|>system\\n' }} {%- if messages[0].role == 'system' %} {{- messages[0].content + '\\n\\n' }} {%- endif %} {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }} {%- for tool in tools %} {{- \"\\n\" }} {{- tool | tojson }} {%- endfor %} {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }} {%- else %} {%- if messages[0].role == 'system' %} {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} {%- for message in messages[::-1] %} {%- set index = (messages|length - 1) - loop.index0 %} {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %} {%- set ns.multi_step_tool = false %} {%- set ns.last_query_index = index %} {%- endif %} {%- endfor %} {%- for message in messages %} {%- if message.content is string %} {%- set content = message.content %} {%- else %} {%- set content = '' %} {%- endif %} {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %} {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }} {%- elif message.role == \"assistant\" %} {%- set reasoning_content = '' %} {%- if message.reasoning_content is string %} {%- set reasoning_content = message.reasoning_content %} {%- else %} {%- if '</think>' in content %} {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %} {%- set content = content.split('</think>')[-1].lstrip('\\n') %} {%- endif %} {%- endif %} {%- if loop.index0 > ns.last_query_index %} {%- if loop.last or (not loop.last and reasoning_content) %} {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- if message.tool_calls %} {%- for tool_call in message.tool_calls %} {%- if (loop.first and content) or (not loop.first) %} {{- '\\n' }} {%- endif %} {%- if tool_call.function %} {%- set tool_call = tool_call.function %} {%- endif %} {{- '<tool_call>\\n{\"name\": \"' }} {{- tool_call.name }} {{- '\", \"arguments\": ' }} {%- if tool_call.arguments is string %} {{- tool_call.arguments }} {%- else %} {{- tool_call.arguments | tojson }} {%- endif %} {{- '}\\n</tool_call>' }} {%- endfor %} {%- endif %} {{- '<|im_end|>\\n' }} {%- elif message.role == \"tool\" %} {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %} {{- '<|im_start|>user' }} {%- endif %} {{- '\\n<tool_response>\\n' }} {{- content }} {{- '\\n</tool_response>' }} {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %} {{- '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {{- '<|im_start|>assistant\\n' }} {{- '<think>\\n\\n</think>\\n\\n' }} {%- endif %}",
       context_length: 40960,
       eos_token: '<|im_end|>',
+      quantize_imatrix_file: 'imatrix.dat',
       total: 4022468096,
     },
     'id': 'Menlo/Jan-nano',
-    'lastModified': '2025-06-11T10:42:16.000Z',
+    'lastModified': '2025-06-13T05:53:33.000Z',
     'likes': 2,
     'model-index': null,
     'modelId': 'Menlo/Jan-nano',
     'pipeline_tag': 'text-generation',
     'private': false,
-    'sha': 'f05b9e798d3cb66394a25d2a45cdc77fd1d5a3ba',
+    'sha': '782985633ac4080dfdaa52e62d61dcf637e9ff0d',
     'siblings': [
       {
         rfilename: '.gitattributes',
-        size: 1681,
-      },
-      {
-        rfilename: 'Jan-nano_q4_k_m.gguf',
-        size: 2497280288,
-      },
-      {
-        rfilename: 'Jan-nano_q8_0.gguf',
-        size: 4280400640,
+        size: 1742,
       },
       {
         rfilename: 'README.md',
         size: 776,
       },
+      {
+        rfilename: 'jan-nano-0.4-iQ4_XS.gguf',
+        size: 2270750400,
+      },
     ],
     'spaces': [],
     'tags': [
@@ -56,9 +53,10 @@ export const hardcodedModel = {
       'license:apache-2.0',
       'endpoints_compatible',
       'region:us',
+      'imatrix',
       'conversational',
     ],
-    'usedStorage': 11772241536,
+    'usedStorage': 20820673088,
     'widgetData': [
       {
         text: 'Hi, what can you help me with?',
@@ -76,12 +74,8 @@ export const hardcodedModel = {
   },
   models: [
     {
-      id: 'Menlo:Jan-nano:Jan-nano_q4_k_m.gguf',
-      size: 2497280288,
-    },
-    {
-      id: 'Menlo:Jan-nano:Jan-nano_q8_0.gguf',
-      size: 4280400640,
+      id: 'Menlo:Jan-nano:jan-nano-0.4-iQ4_XS.gguf',
+      size: 2270750400,
     },
   ],
 }

From 1e17cc6ec7285f2679074719f0e99663cb712df7 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Sat, 14 Jun 2025 16:32:15 +0700
Subject: [PATCH 04/48] =?UTF-8?q?=E2=9C=A8enhancement:=20model=20run=20imp?=
 =?UTF-8?q?rovement=20(#5268)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: mcp tool error handling

* fix: error message

* fix: trigger download from recommend model

* fix: can't scroll hub

* fix: show progress

* ✨enhancement: prompt users to increase context size

* ✨enhancement: rearrange action buttons for a better UX

* 🔧chore: clean up logics

---------

Co-authored-by: Faisal Amir <urmauur@gmail.com>
---
 .../inference-cortex-extension/src/index.ts   |   7 +-
 src-tauri/src/core/mcp.rs                     |   7 +-
 web-app/src/containers/ChatInput.tsx          |   6 +-
 web-app/src/containers/ThreadContent.tsx      |  24 +++-
 .../containers/dialogs/OutOfContextDialog.tsx | 104 +++++++++++++++
 web-app/src/hooks/useAppState.ts              |   7 +
 web-app/src/hooks/useChat.ts                  |  94 +++++++++++++-
 web-app/src/lib/completion.ts                 |  18 ++-
 web-app/src/locales/en/common.json            |   1 +
 web-app/src/routes/hub.tsx                    |  30 ++---
 web-app/src/routes/threads/$threadId.tsx      |   7 +
 web-app/src/utils/error.ts                    |   2 +
 web-app/src/utils/models.ts                   | 122 ++++++++++++++++--
 13 files changed, 387 insertions(+), 42 deletions(-)
 create mode 100644 web-app/src/containers/dialogs/OutOfContextDialog.tsx
 create mode 100644 web-app/src/utils/error.ts

diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index a49b1a852..626d53696 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -62,6 +62,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
   cache_type: string = 'q8'
   cpu_threads?: number
   auto_unload_models: boolean = true
+  reasoning_budget = -1 // Default reasoning budget in seconds
   /**
    * The URL for making inference requests.
    */
@@ -230,8 +231,6 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
     const loadedModels = await this.activeModels()
 
-    console.log('Loaded models:', loadedModels)
-
     // This is to avoid loading the same model multiple times
     if (loadedModels.some((e: { id: string }) => e.id === model.id)) {
       console.log(`Model ${model.id} already loaded`)
@@ -269,6 +268,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
             ...(this.cont_batching && this.n_parallel && this.n_parallel > 1
               ? { cont_batching: this.cont_batching }
               : {}),
+            ...(model.id.toLowerCase().includes('jan-nano')
+              ? { reasoning_budget: 0 }
+              : { reasoning_budget: this.reasoning_budget }),
+            ...{ 'no-context-shift': true },
           },
           timeout: false,
           signal,
diff --git a/src-tauri/src/core/mcp.rs b/src-tauri/src/core/mcp.rs
index 642505486..f9509c8e5 100644
--- a/src-tauri/src/core/mcp.rs
+++ b/src-tauri/src/core/mcp.rs
@@ -377,7 +377,12 @@ pub async fn call_tool(
                 });
 
                 return match timeout(MCP_TOOL_CALL_TIMEOUT, tool_call).await {
-                    Ok(result) => result.map_err(|e| e.to_string()),
+                    Ok(result) => {
+                        match result {
+                            Ok(ok_result) => Ok(ok_result),
+                            Err(e) => Err(e.to_string()),
+                        }
+                    }
                     Err(_) => Err(format!(
                         "Tool call '{}' timed out after {} seconds",
                         tool_name,
diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx
index af5da384c..a83adc59e 100644
--- a/web-app/src/containers/ChatInput.tsx
+++ b/web-app/src/containers/ChatInput.tsx
@@ -36,6 +36,7 @@ import { ModelLoader } from '@/containers/loaders/ModelLoader'
 import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable'
 import { getConnectedServers } from '@/services/mcp'
 import { stopAllModels } from '@/services/models'
+import { useOutOfContextPromiseModal } from './dialogs/OutOfContextDialog'
 
 type ChatInputProps = {
   className?: string
@@ -60,6 +61,8 @@ const ChatInput = ({
   const { t } = useTranslation()
   const { spellCheckChatInput } = useGeneralSetting()
   const { tokenSpeed } = useAppState()
+  const { showModal, PromiseModal: OutOfContextModal } =
+    useOutOfContextPromiseModal()
   const maxRows = 10
 
   const { selectedModel } = useModelProvider()
@@ -110,7 +113,7 @@ const ChatInput = ({
       return
     }
     setMessage('')
-    sendMessage(prompt)
+    sendMessage(prompt, showModal)
   }
 
   useEffect(() => {
@@ -611,6 +614,7 @@ const ChatInput = ({
           </div>
         </div>
       )}
+      <OutOfContextModal />
     </div>
   )
 }
diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx
index f067a4431..833846db1 100644
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@@ -1,6 +1,6 @@
 import { ThreadMessage } from '@janhq/core'
 import { RenderMarkdown } from './RenderMarkdown'
-import { Fragment, memo, useCallback, useMemo, useState } from 'react'
+import React, { Fragment, memo, useCallback, useMemo, useState } from 'react'
 import {
   IconCopy,
   IconCopyCheck,
@@ -79,6 +79,8 @@ export const ThreadContent = memo(
       showAssistant?: boolean
       // eslint-disable-next-line @typescript-eslint/no-explicit-any
       streamTools?: any
+      contextOverflowModal?: React.ReactNode | null
+      showContextOverflowModal?: () => Promise<unknown>
     }
   ) => {
     const [message, setMessage] = useState(item.content?.[0]?.text?.value || '')
@@ -129,7 +131,10 @@ export const ThreadContent = memo(
       }
       if (toSendMessage) {
         deleteMessage(toSendMessage.thread_id, toSendMessage.id ?? '')
-        sendMessage(toSendMessage.content?.[0]?.text?.value || '')
+        sendMessage(
+          toSendMessage.content?.[0]?.text?.value || '',
+          item.showContextOverflowModal
+        )
       }
     }, [deleteMessage, getMessages, item, sendMessage])
 
@@ -162,15 +167,25 @@ export const ThreadContent = memo(
     const editMessage = useCallback(
       (messageId: string) => {
         const threadMessages = getMessages(item.thread_id)
+
         const index = threadMessages.findIndex((msg) => msg.id === messageId)
         if (index === -1) return
+
         // Delete all messages after the edited message
         for (let i = threadMessages.length - 1; i >= index; i--) {
           deleteMessage(threadMessages[i].thread_id, threadMessages[i].id)
         }
-        sendMessage(message)
+
+        sendMessage(message, item.showContextOverflowModal)
       },
-      [deleteMessage, getMessages, item.thread_id, message, sendMessage]
+      [
+        deleteMessage,
+        getMessages,
+        item.thread_id,
+        message,
+        sendMessage,
+        item.showContextOverflowModal,
+      ]
     )
 
     const isToolCalls =
@@ -445,6 +460,7 @@ export const ThreadContent = memo(
             {image.detail && <p className="text-sm mt-1">{image.detail}</p>}
           </div>
         )}
+        {item.contextOverflowModal && item.contextOverflowModal}
       </Fragment>
     )
   }
diff --git a/web-app/src/containers/dialogs/OutOfContextDialog.tsx b/web-app/src/containers/dialogs/OutOfContextDialog.tsx
new file mode 100644
index 000000000..fb01d7907
--- /dev/null
+++ b/web-app/src/containers/dialogs/OutOfContextDialog.tsx
@@ -0,0 +1,104 @@
+import { t } from 'i18next'
+import {
+  Dialog,
+  DialogContent,
+  DialogDescription,
+  DialogFooter,
+  DialogHeader,
+  DialogTitle,
+} from '@/components/ui/dialog'
+
+import { ReactNode, useCallback, useState } from 'react'
+import { Button } from '@/components/ui/button'
+
+export function useOutOfContextPromiseModal() {
+  const [isOpen, setIsOpen] = useState(false)
+  const [modalProps, setModalProps] = useState<{
+    resolveRef: ((value: unknown) => void) | null
+  }>({
+    resolveRef: null,
+  })
+  // Function to open the modal and return a Promise
+  const showModal = useCallback(() => {
+    return new Promise((resolve) => {
+      setModalProps({
+        resolveRef: resolve,
+      })
+      setIsOpen(true)
+    })
+  }, [])
+
+  const PromiseModal = useCallback((): ReactNode => {
+    if (!isOpen) {
+      return null
+    }
+
+    const handleConfirm = () => {
+      setIsOpen(false)
+      if (modalProps.resolveRef) {
+        modalProps.resolveRef(true)
+      }
+    }
+
+    const handleCancel = () => {
+      setIsOpen(false)
+      if (modalProps.resolveRef) {
+        modalProps.resolveRef(false)
+      }
+    }
+
+    return (
+      <Dialog
+        open={isOpen}
+        onOpenChange={(open) => {
+          setIsOpen(open)
+          if (!open) handleCancel()
+        }}
+      >
+        <DialogContent>
+          <DialogHeader>
+            <DialogTitle>
+              {t('outOfContextError.title', 'Out of context error')}
+            </DialogTitle>
+          </DialogHeader>
+          <DialogDescription>
+            {t(
+              'outOfContextError.description',
+              'This chat is reaching the AI’s memory limit, like a whiteboard filling up. We can expand the memory window (called context size) so it remembers more, but it may use more of your computer’s memory.'
+            )}
+            <br />
+            <br />
+            {t(
+              'outOfContextError.increaseContextSizeDescription',
+              'Do you want to increase the context size?'
+            )}
+          </DialogDescription>
+          <DialogFooter className="flex gap-2">
+            <Button
+              variant="default"
+              className="bg-transparent border border-main-view-fg/20 hover:bg-main-view-fg/4"
+              onClick={() => setIsOpen(false)}
+            >
+              {t('common.cancel', 'Cancel')}
+            </Button>
+            <Button
+              asChild
+              onClick={() => {
+                handleConfirm()
+                setIsOpen(false)
+              }}
+            >
+              <span className="text-main-view-fg/70">
+                {t(
+                  'outOfContextError.increaseContextSize',
+                  'Increase Context Size'
+                )}
+              </span>
+            </Button>
+          </DialogFooter>
+        </DialogContent>
+      </Dialog>
+    )
+  }, [isOpen, modalProps])
+  return { showModal, PromiseModal }
+}
diff --git a/web-app/src/hooks/useAppState.ts b/web-app/src/hooks/useAppState.ts
index ace57d8d2..dc29f7f8a 100644
--- a/web-app/src/hooks/useAppState.ts
+++ b/web-app/src/hooks/useAppState.ts
@@ -12,6 +12,7 @@ type AppState = {
   abortControllers: Record<string, AbortController>
   tokenSpeed?: TokenSpeed
   currentToolCall?: ChatCompletionMessageToolCall
+  showOutOfContextDialog?: boolean
   setServerStatus: (value: 'running' | 'stopped' | 'pending') => void
   updateStreamingContent: (content: ThreadMessage | undefined) => void
   updateCurrentToolCall: (
@@ -22,6 +23,7 @@ type AppState = {
   setAbortController: (threadId: string, controller: AbortController) => void
   updateTokenSpeed: (message: ThreadMessage) => void
   resetTokenSpeed: () => void
+  setOutOfContextDialog: (show: boolean) => void
 }
 
 export const useAppState = create<AppState>()((set) => ({
@@ -99,4 +101,9 @@ export const useAppState = create<AppState>()((set) => ({
     set({
       tokenSpeed: undefined,
     }),
+  setOutOfContextDialog: (show) => {
+    set(() => ({
+      showOutOfContextDialog: show,
+    }))
+  },
 }))
diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts
index 3073ececf..164555563 100644
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@@ -24,10 +24,11 @@ import { getTools } from '@/services/mcp'
 import { MCPTool } from '@/types/completion'
 import { listen } from '@tauri-apps/api/event'
 import { SystemEvent } from '@/types/events'
-import { stopModel, startModel } from '@/services/models'
+import { stopModel, startModel, stopAllModels } from '@/services/models'
 
 import { useToolApproval } from '@/hooks/useToolApproval'
 import { useToolAvailable } from '@/hooks/useToolAvailable'
+import { OUT_OF_CONTEXT_SIZE } from '@/utils/error'
 
 export const useChat = () => {
   const { prompt, setPrompt } = usePrompt()
@@ -41,6 +42,7 @@ export const useChat = () => {
     setAbortController,
   } = useAppState()
   const { currentAssistant } = useAssistant()
+  const { updateProvider } = useModelProvider()
 
   const { approvedTools, showApprovalModal, allowAllMCPPermissions } =
     useToolApproval()
@@ -108,8 +110,60 @@ export const useChat = () => {
     currentAssistant,
   ])
 
+  const increaseModelContextSize = useCallback(
+    (model: Model, provider: ProviderObject) => {
+      /**
+       * Should increase the context size of the model by 2x
+       * If the context size is not set or too low, it defaults to 8192.
+       */
+      const ctxSize = Math.max(
+        model.settings?.ctx_len?.controller_props.value
+          ? typeof model.settings.ctx_len.controller_props.value === 'string'
+            ? parseInt(model.settings.ctx_len.controller_props.value as string)
+            : (model.settings.ctx_len.controller_props.value as number)
+          : 8192,
+        8192
+      )
+      const updatedModel = {
+        ...model,
+        settings: {
+          ...model.settings,
+          ctx_len: {
+            ...(model.settings?.ctx_len != null ? model.settings?.ctx_len : {}),
+            controller_props: {
+              ...(model.settings?.ctx_len?.controller_props ?? {}),
+              value: ctxSize * 2,
+            },
+          },
+        },
+      }
+
+      // Find the model index in the provider's models array
+      const modelIndex = provider.models.findIndex((m) => m.id === model.id)
+
+      if (modelIndex !== -1) {
+        // Create a copy of the provider's models array
+        const updatedModels = [...provider.models]
+
+        // Update the specific model in the array
+        updatedModels[modelIndex] = updatedModel as Model
+
+        // Update the provider with the new models array
+        updateProvider(provider.provider, {
+          models: updatedModels,
+        })
+      }
+      stopAllModels()
+    },
+    [updateProvider]
+  )
+
   const sendMessage = useCallback(
-    async (message: string) => {
+    async (
+      message: string,
+      showModal?: () => Promise<unknown>,
+      troubleshooting = true
+    ) => {
       const activeThread = await getCurrentThread()
 
       resetTokenSpeed()
@@ -121,7 +175,9 @@ export const useChat = () => {
       const abortController = new AbortController()
       setAbortController(activeThread.id, abortController)
       updateStreamingContent(emptyThreadContent)
-      addMessage(newUserThreadContent(activeThread.id, message))
+      // Do not add new message on retry
+      if (troubleshooting)
+        addMessage(newUserThreadContent(activeThread.id, message))
       updateThreadTimestamp(activeThread.id)
       setPrompt('')
       try {
@@ -180,6 +236,14 @@ export const useChat = () => {
             }
           } else {
             for await (const part of completion) {
+              // Error message
+              if (!part.choices) {
+                throw new Error(
+                  'message' in part
+                    ? (part.message as string)
+                    : (JSON.stringify(part) ?? '')
+                )
+              }
               const delta = part.choices[0]?.delta?.content || ''
 
               if (part.choices[0]?.delta?.tool_calls) {
@@ -252,9 +316,26 @@ export const useChat = () => {
           if (!followUpWithToolUse) availableTools = []
         }
       } catch (error) {
-        toast.error(
-          `Error sending message: ${error && typeof error === 'object' && 'message' in error ? error.message : error}`
-        )
+        const errorMessage =
+          error && typeof error === 'object' && 'message' in error
+            ? error.message
+            : error
+        if (
+          typeof errorMessage === 'string' &&
+          errorMessage.includes(OUT_OF_CONTEXT_SIZE) &&
+          selectedModel &&
+          troubleshooting
+        ) {
+          showModal?.().then((confirmed) => {
+            if (confirmed) {
+              increaseModelContextSize(selectedModel, activeProvider)
+              setTimeout(() => {
+                sendMessage(message, showModal, false) // Retry sending the message without troubleshooting
+              }, 1000)
+            }
+          })
+        }
+        toast.error(`Error sending message: ${errorMessage}`)
         console.error('Error sending message:', error)
       } finally {
         updateLoadingModel(false)
@@ -282,6 +363,7 @@ export const useChat = () => {
       allowAllMCPPermissions,
       showApprovalModal,
       updateTokenSpeed,
+      increaseModelContextSize,
     ]
   )
 
diff --git a/web-app/src/lib/completion.ts b/web-app/src/lib/completion.ts
index 1e8a5ec55..24daec3cd 100644
--- a/web-app/src/lib/completion.ts
+++ b/web-app/src/lib/completion.ts
@@ -134,7 +134,8 @@ export const sendCompletion = async (
     thread.model.id &&
     !(thread.model.id in Object.values(models).flat()) &&
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    !tokenJS.extendedModelExist(providerName as any, thread.model?.id)
+    !tokenJS.extendedModelExist(providerName as any, thread.model?.id) &&
+    provider.provider !== 'llama.cpp'
   ) {
     try {
       tokenJS.extendModelList(
@@ -323,7 +324,7 @@ export const postMessageProcessing = async (
           ? await showModal(toolCall.function.name, message.thread_id)
           : true)
 
-      const result = approved
+      let result = approved
         ? await callTool({
             toolName: toolCall.function.name,
             arguments: toolCall.function.arguments.length
@@ -335,7 +336,7 @@ export const postMessageProcessing = async (
               content: [
                 {
                   type: 'text',
-                  text: `Error calling tool ${toolCall.function.name}: ${e.message}`,
+                  text: `Error calling tool ${toolCall.function.name}: ${e.message ?? e}`,
                 },
               ],
               error: true,
@@ -350,7 +351,16 @@ export const postMessageProcessing = async (
             ],
           }
 
-      if ('error' in result && result.error) break
+      if (typeof result === 'string') {
+        result = {
+          content: [
+            {
+              type: 'text',
+              text: result,
+            },
+          ],
+        }
+      }
 
       message.metadata = {
         ...(message.metadata ?? {}),
diff --git a/web-app/src/locales/en/common.json b/web-app/src/locales/en/common.json
index 296c0162c..9abd447ad 100644
--- a/web-app/src/locales/en/common.json
+++ b/web-app/src/locales/en/common.json
@@ -23,6 +23,7 @@
     "reset": "Reset",
     "search": "Search",
     "name": "Name",
+    "cancel": "Cancel",
 
     "placeholder": {
       "chatInput": "Ask me anything..."
diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx
index 8c299bd5f..f58723ac4 100644
--- a/web-app/src/routes/hub.tsx
+++ b/web-app/src/routes/hub.tsx
@@ -77,6 +77,7 @@ function Hub() {
   const addModelSourceTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(
     null
   )
+  const downloadButtonRef = useRef<HTMLButtonElement>(null)
 
   const { getProviderByName } = useModelProvider()
   const llamaProvider = getProviderByName('llama.cpp')
@@ -233,18 +234,14 @@ function Hub() {
             isRecommended && 'hub-download-button-step'
           )}
         >
-          <div
-            className={cn(
-              'flex items-center gap-2 w-20 ',
-              !isDownloading &&
-                'opacity-0 visibility-hidden w-0 pointer-events-none'
-            )}
-          >
-            <Progress value={downloadProgress * 100} />
-            <span className="text-xs text-center text-main-view-fg/70">
-              {Math.round(downloadProgress * 100)}%
-            </span>
-          </div>
+          {isDownloading && !isDownloaded && (
+            <div className={cn('flex items-center gap-2 w-20')}>
+              <Progress value={downloadProgress * 100} />
+              <span className="text-xs text-center text-main-view-fg/70">
+                {Math.round(downloadProgress * 100)}%
+              </span>
+            </div>
+          )}
           {isDownloaded ? (
             <Button size="sm" onClick={() => handleUseModel(modelId)}>
               Use
@@ -254,6 +251,7 @@ function Hub() {
               size="sm"
               onClick={() => downloadModel(modelId)}
               className={cn(isDownloading && 'hidden')}
+              ref={isRecommended ? downloadButtonRef : undefined}
             >
               Download
             </Button>
@@ -266,6 +264,7 @@ function Hub() {
     llamaProvider?.models,
     handleUseModel,
     isRecommendedModel,
+    downloadButtonRef,
   ])
 
   const { step } = useSearch({ from: Route.id })
@@ -291,8 +290,9 @@ function Hub() {
         isRecommendedModel(model.metadata?.id)
       )
       if (recommendedModel && recommendedModel.models[0]?.id) {
-        downloadModel(recommendedModel.models[0].id)
-
+        if (downloadButtonRef.current) {
+          downloadButtonRef.current.click()
+        }
         return
       }
     }
@@ -413,7 +413,7 @@ function Hub() {
               </div>
             </div>
           </HeaderPage>
-          <div className="p-4 w-full h-[calc(100%-32px)] overflow-y-auto first-step-setup-local-provider">
+          <div className="p-4 w-full h-[calc(100%-32px)] !overflow-y-auto first-step-setup-local-provider">
             <div className="flex flex-col h-full justify-between gap-4 gap-y-3 w-4/5 mx-auto">
               {loading ? (
                 <div className="flex items-center justify-center">
diff --git a/web-app/src/routes/threads/$threadId.tsx b/web-app/src/routes/threads/$threadId.tsx
index 1190dfc1c..2f18f6094 100644
--- a/web-app/src/routes/threads/$threadId.tsx
+++ b/web-app/src/routes/threads/$threadId.tsx
@@ -18,6 +18,7 @@ import { useAppState } from '@/hooks/useAppState'
 import DropdownAssistant from '@/containers/DropdownAssistant'
 import { useAssistant } from '@/hooks/useAssistant'
 import { useAppearance } from '@/hooks/useAppearance'
+import { useOutOfContextPromiseModal } from '@/containers/dialogs/OutOfContextDialog'
 
 // as route.threadsDetail
 export const Route = createFileRoute('/threads/$threadId')({
@@ -47,6 +48,8 @@ function ThreadDetail() {
   const scrollContainerRef = useRef<HTMLDivElement>(null)
   const isFirstRender = useRef(true)
   const messagesCount = useMemo(() => messages?.length ?? 0, [messages])
+  const { showModal, PromiseModal: OutOfContextModal } =
+    useOutOfContextPromiseModal()
 
   // Function to check scroll position and scrollbar presence
   const checkScrollState = () => {
@@ -193,6 +196,8 @@ function ThreadDetail() {
 
   if (!messages || !threadModel) return null
 
+  const contextOverflowModalComponent = <OutOfContextModal />
+
   return (
     <div className="flex flex-col h-full">
       <HeaderPage>
@@ -233,6 +238,8 @@ function ThreadDetail() {
                           ))
                       }
                       index={index}
+                      showContextOverflowModal={showModal}
+                      contextOverflowModal={contextOverflowModalComponent}
                     />
                   </div>
                 )
diff --git a/web-app/src/utils/error.ts b/web-app/src/utils/error.ts
new file mode 100644
index 000000000..1019d2853
--- /dev/null
+++ b/web-app/src/utils/error.ts
@@ -0,0 +1,2 @@
+export const OUT_OF_CONTEXT_SIZE =
+  'the request exceeds the available context size.'
diff --git a/web-app/src/utils/models.ts b/web-app/src/utils/models.ts
index c21b4a3a9..7f8756f67 100644
--- a/web-app/src/utils/models.ts
+++ b/web-app/src/utils/models.ts
@@ -1,6 +1,6 @@
 export const hardcodedModel = {
-  author: 'Menlo',
-  id: 'Menlo/Jan-nano',
+  author: 'menlo',
+  id: 'menlo/jan-nano',
   metadata: {
     '_id': '68492cd9cada68b1d11ca1bd',
     'author': 'Menlo',
@@ -12,7 +12,7 @@ export const hardcodedModel = {
     'description':
       '---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)',
     'disabled': false,
-    'downloads': 939,
+    'downloads': 1434,
     'gated': false,
     'gguf': {
       architecture: 'qwen3',
@@ -25,17 +25,17 @@ export const hardcodedModel = {
       total: 4022468096,
     },
     'id': 'Menlo/Jan-nano',
-    'lastModified': '2025-06-13T05:53:33.000Z',
-    'likes': 2,
+    'lastModified': '2025-06-13T16:57:55.000Z',
+    'likes': 3,
     'model-index': null,
     'modelId': 'Menlo/Jan-nano',
     'pipeline_tag': 'text-generation',
     'private': false,
-    'sha': '782985633ac4080dfdaa52e62d61dcf637e9ff0d',
+    'sha': 'a04aab0878648d8f284c63a52664a482ead16f06',
     'siblings': [
       {
         rfilename: '.gitattributes',
-        size: 1742,
+        size: 3460,
       },
       {
         rfilename: 'README.md',
@@ -45,6 +45,58 @@ export const hardcodedModel = {
         rfilename: 'jan-nano-0.4-iQ4_XS.gguf',
         size: 2270750400,
       },
+      {
+        rfilename: 'jan-nano-4b-Q3_K_L.gguf',
+        size: 2239784384,
+      },
+      {
+        rfilename: 'jan-nano-4b-Q3_K_M.gguf',
+        size: 2075616704,
+      },
+      {
+        rfilename: 'jan-nano-4b-Q3_K_S.gguf',
+        size: 1886995904,
+      },
+      {
+        rfilename: 'jan-nano-4b-Q4_0.gguf',
+        size: 2369545664,
+      },
+      {
+        rfilename: 'jan-nano-4b-Q4_1.gguf',
+        size: 2596627904,
+      },
+      {
+        rfilename: 'jan-nano-4b-Q4_K_M.gguf',
+        size: 2497279424,
+      },
+      {
+        rfilename: 'jan-nano-4b-Q4_K_S.gguf',
+        size: 2383308224,
+      },
+      {
+        rfilename: 'jan-nano-4b-Q5_0.gguf',
+        size: 2823710144,
+      },
+      {
+        rfilename: 'jan-nano-4b-Q5_1.gguf',
+        size: 3050792384,
+      },
+      {
+        rfilename: 'jan-nano-4b-Q5_K_M.gguf',
+        size: 2889512384,
+      },
+      {
+        rfilename: 'jan-nano-4b-Q5_K_S.gguf',
+        size: 2823710144,
+      },
+      {
+        rfilename: 'jan-nano-4b-Q6_K.gguf',
+        size: 3306259904,
+      },
+      {
+        rfilename: 'jan-nano-4b-Q8_0.gguf',
+        size: 4280403904,
+      },
     ],
     'spaces': [],
     'tags': [
@@ -56,7 +108,7 @@ export const hardcodedModel = {
       'imatrix',
       'conversational',
     ],
-    'usedStorage': 20820673088,
+    'usedStorage': 93538518464,
     'widgetData': [
       {
         text: 'Hi, what can you help me with?',
@@ -74,8 +126,60 @@ export const hardcodedModel = {
   },
   models: [
     {
-      id: 'Menlo:Jan-nano:jan-nano-0.4-iQ4_XS.gguf',
+      id: 'menlo:jan-nano:jan-nano-0.4-iQ4_XS.gguf',
       size: 2270750400,
     },
+    {
+      id: 'menlo:jan-nano:jan-nano-4b-Q3_K_L.gguf',
+      size: 2239784384,
+    },
+    {
+      id: 'menlo:jan-nano:jan-nano-4b-Q3_K_M.gguf',
+      size: 2075616704,
+    },
+    {
+      id: 'menlo:jan-nano:jan-nano-4b-Q3_K_S.gguf',
+      size: 1886995904,
+    },
+    {
+      id: 'menlo:jan-nano:jan-nano-4b-Q4_0.gguf',
+      size: 2369545664,
+    },
+    {
+      id: 'menlo:jan-nano:jan-nano-4b-Q4_1.gguf',
+      size: 2596627904,
+    },
+    {
+      id: 'menlo:jan-nano:jan-nano-4b-Q4_K_M.gguf',
+      size: 2497279424,
+    },
+    {
+      id: 'menlo:jan-nano:jan-nano-4b-Q4_K_S.gguf',
+      size: 2383308224,
+    },
+    {
+      id: 'menlo:jan-nano:jan-nano-4b-Q5_0.gguf',
+      size: 2823710144,
+    },
+    {
+      id: 'menlo:jan-nano:jan-nano-4b-Q5_1.gguf',
+      size: 3050792384,
+    },
+    {
+      id: 'menlo:jan-nano:jan-nano-4b-Q5_K_M.gguf',
+      size: 2889512384,
+    },
+    {
+      id: 'menlo:jan-nano:jan-nano-4b-Q5_K_S.gguf',
+      size: 2823710144,
+    },
+    {
+      id: 'menlo:jan-nano:jan-nano-4b-Q6_K.gguf',
+      size: 3306259904,
+    },
+    {
+      id: 'menlo:jan-nano:jan-nano-4b-Q8_0.gguf',
+      size: 4280403904,
+    },
   ],
 }

From b5bdf3cfd2c162d72516a1cfc8a146532e555fe4 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Sat, 14 Jun 2025 17:38:14 +0700
Subject: [PATCH 05/48] fix: glitch download from onboarding (#5269)

---
 web-app/src/routes/hub.tsx | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx
index f58723ac4..22a747d70 100644
--- a/web-app/src/routes/hub.tsx
+++ b/web-app/src/routes/hub.tsx
@@ -78,6 +78,7 @@ function Hub() {
     null
   )
   const downloadButtonRef = useRef<HTMLButtonElement>(null)
+  const hasTriggeredDownload = useRef(false)
 
   const { getProviderByName } = useModelProvider()
   const llamaProvider = getProviderByName('llama.cpp')
@@ -285,12 +286,18 @@ function Hub() {
   const handleJoyrideCallback = (data: CallBackProps) => {
     const { status, index } = data
 
-    if (status === STATUS.FINISHED && !isDownloading && isLastStep) {
+    if (
+      status === STATUS.FINISHED &&
+      !isDownloading &&
+      isLastStep &&
+      !hasTriggeredDownload.current
+    ) {
       const recommendedModel = filteredModels.find((model) =>
         isRecommendedModel(model.metadata?.id)
       )
       if (recommendedModel && recommendedModel.models[0]?.id) {
         if (downloadButtonRef.current) {
+          hasTriggeredDownload.current = true
           downloadButtonRef.current.click()
         }
         return

From a10dbef2c81e555748739c9363f9d9f2707a167a Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Sat, 14 Jun 2025 22:00:43 +0700
Subject: [PATCH 06/48] =?UTF-8?q?=E2=9C=A8enhancement:=20Model=20sources?=
 =?UTF-8?q?=20should=20not=20be=20hard=20coded=20from=20frontend=20(#5270)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../model-extension/resources/default.json    | 183 ++++++++++++++++++
 extensions/model-extension/src/index.ts       |  12 +-
 web-app/src/services/models.ts                |   9 +-
 3 files changed, 198 insertions(+), 6 deletions(-)

diff --git a/extensions/model-extension/resources/default.json b/extensions/model-extension/resources/default.json
index dbd9d906e..8884af70f 100644
--- a/extensions/model-extension/resources/default.json
+++ b/extensions/model-extension/resources/default.json
@@ -1,4 +1,187 @@
 [
+  {
+    "author": "Menlo",
+    "id": "Menlo/Jan-nano",
+    "metadata": {
+      "_id": "68492cd9cada68b1d11ca1bd",
+      "author": "Menlo",
+      "cardData": {
+        "license": "apache-2.0",
+        "pipeline_tag": "text-generation"
+      },
+      "createdAt": "2025-06-11T07:14:33.000Z",
+      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)",
+      "disabled": false,
+      "downloads": 1434,
+      "gated": false,
+      "gguf": {
+        "architecture": "qwen3",
+        "bos_token": "<|endoftext|>",
+        "chat_template": "{%- if tools %} {{- '<|im_start|>system\\n' }} {%- if messages[0].role == 'system' %} {{- messages[0].content + '\\n\\n' }} {%- endif %} {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }} {%- for tool in tools %} {{- \"\\n\" }} {{- tool | tojson }} {%- endfor %} {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }} {%- else %} {%- if messages[0].role == 'system' %} {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} {%- for message in messages[::-1] %} {%- set index = (messages|length - 1) - loop.index0 %} {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %} {%- set ns.multi_step_tool = false %} {%- set ns.last_query_index = index %} {%- endif %} {%- endfor %} {%- for message in messages %} {%- if message.content is string %} {%- set content = message.content %} {%- else %} {%- set content = '' %} {%- endif %} {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %} {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }} {%- elif message.role == \"assistant\" %} {%- set reasoning_content = '' %} {%- if message.reasoning_content is string %} {%- set reasoning_content = message.reasoning_content %} {%- else %} {%- if '</think>' in content %} {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %} {%- set content = content.split('</think>')[-1].lstrip('\\n') %} {%- endif %} {%- endif %} {%- if loop.index0 > ns.last_query_index %} {%- if loop.last or (not loop.last and reasoning_content) %} {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- if message.tool_calls %} {%- for tool_call in message.tool_calls %} {%- if (loop.first and content) or (not loop.first) %} {{- '\\n' }} {%- endif %} {%- if tool_call.function %} {%- set tool_call = tool_call.function %} {%- endif %} {{- '<tool_call>\\n{\"name\": \"' }} {{- tool_call.name }} {{- '\", \"arguments\": ' }} {%- if tool_call.arguments is string %} {{- tool_call.arguments }} {%- else %} {{- tool_call.arguments | tojson }} {%- endif %} {{- '}\\n</tool_call>' }} {%- endfor %} {%- endif %} {{- '<|im_end|>\\n' }} {%- elif message.role == \"tool\" %} {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %} {{- '<|im_start|>user' }} {%- endif %} {{- '\\n<tool_response>\\n' }} {{- content }} {{- '\\n</tool_response>' }} {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %} {{- '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {{- '<|im_start|>assistant\\n' }} {{- '<think>\\n\\n</think>\\n\\n' }} {%- endif %}",
+        "context_length": 40960,
+        "eos_token": "<|im_end|>",
+        "quantize_imatrix_file": "imatrix.dat",
+        "total": 4022468096
+      },
+      "id": "Menlo/Jan-nano",
+      "lastModified": "2025-06-13T16:57:55.000Z",
+      "likes": 3,
+      "model-index": null,
+      "modelId": "Menlo/Jan-nano",
+      "pipeline_tag": "text-generation",
+      "private": false,
+      "sha": "a04aab0878648d8f284c63a52664a482ead16f06",
+      "siblings": [
+        {
+          "rfilename": ".gitattributes",
+          "size": 3460
+        },
+        {
+          "rfilename": "README.md",
+          "size": 776
+        },
+        {
+          "rfilename": "jan-nano-4b-iQ4_XS.gguf",
+          "size": 2270750400
+        },
+        {
+          "rfilename": "jan-nano-4b-Q3_K_L.gguf",
+          "size": 2239784384
+        },
+        {
+          "rfilename": "jan-nano-4b-Q3_K_M.gguf",
+          "size": 2075616704
+        },
+        {
+          "rfilename": "jan-nano-4b-Q3_K_S.gguf",
+          "size": 1886995904
+        },
+        {
+          "rfilename": "jan-nano-4b-Q4_0.gguf",
+          "size": 2369545664
+        },
+        {
+          "rfilename": "jan-nano-4b-Q4_1.gguf",
+          "size": 2596627904
+        },
+        {
+          "rfilename": "jan-nano-4b-Q4_K_M.gguf",
+          "size": 2497279424
+        },
+        {
+          "rfilename": "jan-nano-4b-Q4_K_S.gguf",
+          "size": 2383308224
+        },
+        {
+          "rfilename": "jan-nano-4b-Q5_0.gguf",
+          "size": 2823710144
+        },
+        {
+          "rfilename": "jan-nano-4b-Q5_1.gguf",
+          "size": 3050792384
+        },
+        {
+          "rfilename": "jan-nano-4b-Q5_K_M.gguf",
+          "size": 2889512384
+        },
+        {
+          "rfilename": "jan-nano-4b-Q5_K_S.gguf",
+          "size": 2823710144
+        },
+        {
+          "rfilename": "jan-nano-4b-Q6_K.gguf",
+          "size": 3306259904
+        },
+        {
+          "rfilename": "jan-nano-4b-Q8_0.gguf",
+          "size": 4280403904
+        }
+      ],
+      "spaces": [],
+      "tags": [
+        "gguf",
+        "text-generation",
+        "license:apache-2.0",
+        "endpoints_compatible",
+        "region:us",
+        "imatrix",
+        "conversational"
+      ],
+      "usedStorage": 93538518464,
+      "widgetData": [
+        {
+          "text": "Hi, what can you help me with?"
+        },
+        {
+          "text": "What is 84 * 3 / 2?"
+        },
+        {
+          "text": "Tell me an interesting fact about the universe!"
+        },
+        {
+          "text": "Explain quantum computing in simple terms."
+        }
+      ]
+    },
+    "models": [
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-iQ4_XS.gguf",
+        "size": 2270750400
+      },
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_L.gguf",
+        "size": 2239784384
+      },
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_M.gguf",
+        "size": 2075616704
+      },
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_S.gguf",
+        "size": 1886995904
+      },
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-Q4_0.gguf",
+        "size": 2369545664
+      },
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-Q4_1.gguf",
+        "size": 2596627904
+      },
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-Q4_K_M.gguf",
+        "size": 2497279424
+      },
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-Q4_K_S.gguf",
+        "size": 2383308224
+      },
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-Q5_0.gguf",
+        "size": 2823710144
+      },
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-Q5_1.gguf",
+        "size": 3050792384
+      },
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-Q5_K_M.gguf",
+        "size": 2889512384
+      },
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-Q5_K_S.gguf",
+        "size": 2823710144
+      },
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-Q6_K.gguf",
+        "size": 3306259904
+      },
+      {
+        "id": "Menlo:Jan-nano:jan-nano-4b-Q8_0.gguf",
+        "size": 4280403904
+      }
+    ]
+  },
   {
     "author": "PrimeIntellect",
     "id": "cortexso/intellect-2",
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 09d1252ce..9fa6132c5 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -25,6 +25,11 @@ type Data<T> = {
   data: T[]
 }
 
+/**
+ * Defaul mode sources
+ */
+const defaultModelSources = ['Menlo/Jan-nano']
+
 /**
  * A extension for models
  */
@@ -396,6 +401,11 @@ export default class JanModelExtension extends ModelExtension {
   fetchModelsHub = async () => {
     const models = await this.fetchModels()
 
+    defaultModelSources.forEach((model) => {
+      this.addSource(model).catch((e) => {
+        console.debug(`Failed to add default model source ${model}:`, e)
+      })
+    })
     return this.apiInstance()
       .then((api) =>
         api
@@ -403,7 +413,7 @@ export default class JanModelExtension extends ModelExtension {
           .json<Data<string>>()
           .then(async (e) => {
             await Promise.all(
-              e.data?.map((model) => {
+              [...(e.data ?? []), ...defaultModelSources].map((model) => {
                 if (
                   !models.some(
                     (e) => 'modelSource' in e && e.modelSource === model
diff --git a/web-app/src/services/models.ts b/web-app/src/services/models.ts
index 071f032b8..77fbde6de 100644
--- a/web-app/src/services/models.ts
+++ b/web-app/src/services/models.ts
@@ -1,7 +1,6 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import { ExtensionManager } from '@/lib/extension'
 import { normalizeProvider } from '@/lib/models'
-import { hardcodedModel } from '@/utils/models'
 import { EngineManager, ExtensionTypeEnum, ModelExtension } from '@janhq/core'
 import { Model as CoreModel } from '@janhq/core'
 
@@ -24,7 +23,7 @@ export const fetchModelSources = async (): Promise<any[]> => {
     ExtensionTypeEnum.Model
   )
 
-  if (!extension) return [hardcodedModel]
+  if (!extension) return []
 
   try {
     const sources = await extension.getSources()
@@ -34,10 +33,10 @@ export const fetchModelSources = async (): Promise<any[]> => {
     }))
 
     // Prepend the hardcoded model to the sources
-    return [hardcodedModel, ...mappedSources]
+    return [...mappedSources]
   } catch (error) {
     console.error('Failed to fetch model sources:', error)
-    return [hardcodedModel]
+    return []
   }
 }
 
@@ -51,7 +50,7 @@ export const fetchModelHub = async (): Promise<any[]> => {
     ?.fetchModelsHub()
 
   // Prepend the hardcoded model to the hub data
-  return hubData ? [hardcodedModel, ...hubData] : [hardcodedModel]
+  return hubData ? [...hubData] : []
 }
 
 /**

From 2634659366d43cec3527d0cca16c7d5a8feb67f4 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Sun, 15 Jun 2025 02:34:14 +0700
Subject: [PATCH 07/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20default=20onboarding?=
 =?UTF-8?q?=20model=20should=20use=20recommended=20quantizations=20(#5273)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 🐛fix: default onboarding model should use recommended quantizations

* ✨enhancement: show context shift option in provider settings

* 🔧chore: wording
---
 extensions/download-extension/package.json    |   2 +-
 .../resources/default_settings.json           |   9 +
 .../inference-cortex-extension/src/index.ts   |  12 +-
 web-app/src/routes/hub.tsx                    |  16 +-
 web-app/src/utils/models.ts                   | 185 ------------------
 5 files changed, 35 insertions(+), 189 deletions(-)
 delete mode 100644 web-app/src/utils/models.ts

diff --git a/extensions/download-extension/package.json b/extensions/download-extension/package.json
index 750934594..58fe42289 100644
--- a/extensions/download-extension/package.json
+++ b/extensions/download-extension/package.json
@@ -2,7 +2,7 @@
   "name": "@janhq/download-extension",
   "productName": "Download Manager",
   "version": "1.0.0",
-  "description": "Handle downloads",
+  "description": "Download and manage files and AI models in Jan.",
   "main": "dist/index.js",
   "author": "Jan <service@jan.ai>",
   "license": "AGPL-3.0",
diff --git a/extensions/inference-cortex-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json
index 451596842..d27624639 100644
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
@@ -8,6 +8,15 @@
       "value": true
     }
   },
+  {
+    "key": "context_shift",
+    "title": "Context Shift",
+    "description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.",
+    "controllerType": "checkbox",
+    "controllerProps": {
+      "value": true
+    }
+  },
   {
     "key": "cont_batching",
     "title": "Continuous Batching",
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 626d53696..3e8b60ebe 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -37,6 +37,7 @@ enum Settings {
   cpu_threads = 'cpu_threads',
   huggingfaceToken = 'hugging-face-access-token',
   auto_unload_models = 'auto_unload_models',
+  context_shift = 'context_shift',
 }
 
 type LoadedModelResponse = { data: { engine: string; id: string }[] }
@@ -63,6 +64,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
   cpu_threads?: number
   auto_unload_models: boolean = true
   reasoning_budget = -1 // Default reasoning budget in seconds
+  context_shift = true
   /**
    * The URL for making inference requests.
    */
@@ -128,6 +130,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       true
     )
     this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
+    this.context_shift = await this.getSetting<boolean>(
+      Settings.context_shift,
+      true
+    )
     this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
     if (this.caching_enabled)
       this.cache_type = await this.getSetting<string>(Settings.cache_type, 'q8')
@@ -209,6 +215,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       this.updateCortexConfig({ huggingface_token: value })
     } else if (key === Settings.auto_unload_models) {
       this.auto_unload_models = value as boolean
+    } else if (key === Settings.context_shift && typeof value === 'boolean') {
+      this.context_shift = value
     }
   }
 
@@ -271,7 +279,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
             ...(model.id.toLowerCase().includes('jan-nano')
               ? { reasoning_budget: 0 }
               : { reasoning_budget: this.reasoning_budget }),
-            ...{ 'no-context-shift': true },
+            ...(this.context_shift === false
+              ? { 'no-context-shift': true }
+              : {}),
           },
           timeout: false,
           signal,
diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx
index 22a747d70..ed7fb669b 100644
--- a/web-app/src/routes/hub.tsx
+++ b/web-app/src/routes/hub.tsx
@@ -49,6 +49,7 @@ type ModelProps = {
 type SearchParams = {
   repo: string
 }
+const defaultModelQuantizations = ['iq4_xs.gguf', 'q4_k_m.gguf']
 
 export const Route = createFileRoute(route.hub as any)({
   component: Hub,
@@ -219,7 +220,10 @@ function Hub() {
 
   const DownloadButtonPlaceholder = useMemo(() => {
     return ({ model }: ModelProps) => {
-      const modelId = model.models[0]?.id
+      const modelId =
+        model.models.find((e) =>
+          defaultModelQuantizations.some((m) => e.id.toLowerCase().includes(m))
+        )?.id ?? model.models[0]?.id
       const isDownloading = downloadProcesses.some((e) => e.id === modelId)
       const downloadProgress =
         downloadProcesses.find((e) => e.id === modelId)?.progress || 0
@@ -460,7 +464,15 @@ function Hub() {
                             </Link>
                             <div className="shrink-0 space-x-3 flex items-center">
                               <span className="text-main-view-fg/70 font-medium text-xs">
-                                {toGigabytes(model.models?.[0]?.size)}
+                                {toGigabytes(
+                                  (
+                                    model.models.find((m) =>
+                                      defaultModelQuantizations.some((e) =>
+                                        m.id.toLowerCase().includes(e)
+                                      )
+                                    ) ?? model.models?.[0]
+                                  )?.size
+                                )}
                               </span>
                               <DownloadButtonPlaceholder model={model} />
                             </div>
diff --git a/web-app/src/utils/models.ts b/web-app/src/utils/models.ts
deleted file mode 100644
index 7f8756f67..000000000
--- a/web-app/src/utils/models.ts
+++ /dev/null
@@ -1,185 +0,0 @@
-export const hardcodedModel = {
-  author: 'menlo',
-  id: 'menlo/jan-nano',
-  metadata: {
-    '_id': '68492cd9cada68b1d11ca1bd',
-    'author': 'Menlo',
-    'cardData': {
-      license: 'apache-2.0',
-      pipeline_tag: 'text-generation',
-    },
-    'createdAt': '2025-06-11T07:14:33.000Z',
-    'description':
-      '---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)',
-    'disabled': false,
-    'downloads': 1434,
-    'gated': false,
-    'gguf': {
-      architecture: 'qwen3',
-      bos_token: '<|endoftext|>',
-      chat_template:
-        "{%- if tools %} {{- '<|im_start|>system\\n' }} {%- if messages[0].role == 'system' %} {{- messages[0].content + '\\n\\n' }} {%- endif %} {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }} {%- for tool in tools %} {{- \"\\n\" }} {{- tool | tojson }} {%- endfor %} {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }} {%- else %} {%- if messages[0].role == 'system' %} {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} {%- for message in messages[::-1] %} {%- set index = (messages|length - 1) - loop.index0 %} {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %} {%- set ns.multi_step_tool = false %} {%- set ns.last_query_index = index %} {%- endif %} {%- endfor %} {%- for message in messages %} {%- if message.content is string %} {%- set content = message.content %} {%- else %} {%- set content = '' %} {%- endif %} {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %} {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }} {%- elif message.role == \"assistant\" %} {%- set reasoning_content = '' %} {%- if message.reasoning_content is string %} {%- set reasoning_content = message.reasoning_content %} {%- else %} {%- if '</think>' in content %} {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %} {%- set content = content.split('</think>')[-1].lstrip('\\n') %} {%- endif %} {%- endif %} {%- if loop.index0 > ns.last_query_index %} {%- if loop.last or (not loop.last and reasoning_content) %} {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- if message.tool_calls %} {%- for tool_call in message.tool_calls %} {%- if (loop.first and content) or (not loop.first) %} {{- '\\n' }} {%- endif %} {%- if tool_call.function %} {%- set tool_call = tool_call.function %} {%- endif %} {{- '<tool_call>\\n{\"name\": \"' }} {{- tool_call.name }} {{- '\", \"arguments\": ' }} {%- if tool_call.arguments is string %} {{- tool_call.arguments }} {%- else %} {{- tool_call.arguments | tojson }} {%- endif %} {{- '}\\n</tool_call>' }} {%- endfor %} {%- endif %} {{- '<|im_end|>\\n' }} {%- elif message.role == \"tool\" %} {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %} {{- '<|im_start|>user' }} {%- endif %} {{- '\\n<tool_response>\\n' }} {{- content }} {{- '\\n</tool_response>' }} {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %} {{- '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {{- '<|im_start|>assistant\\n' }} {{- '<think>\\n\\n</think>\\n\\n' }} {%- endif %}",
-      context_length: 40960,
-      eos_token: '<|im_end|>',
-      quantize_imatrix_file: 'imatrix.dat',
-      total: 4022468096,
-    },
-    'id': 'Menlo/Jan-nano',
-    'lastModified': '2025-06-13T16:57:55.000Z',
-    'likes': 3,
-    'model-index': null,
-    'modelId': 'Menlo/Jan-nano',
-    'pipeline_tag': 'text-generation',
-    'private': false,
-    'sha': 'a04aab0878648d8f284c63a52664a482ead16f06',
-    'siblings': [
-      {
-        rfilename: '.gitattributes',
-        size: 3460,
-      },
-      {
-        rfilename: 'README.md',
-        size: 776,
-      },
-      {
-        rfilename: 'jan-nano-0.4-iQ4_XS.gguf',
-        size: 2270750400,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q3_K_L.gguf',
-        size: 2239784384,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q3_K_M.gguf',
-        size: 2075616704,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q3_K_S.gguf',
-        size: 1886995904,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q4_0.gguf',
-        size: 2369545664,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q4_1.gguf',
-        size: 2596627904,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q4_K_M.gguf',
-        size: 2497279424,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q4_K_S.gguf',
-        size: 2383308224,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q5_0.gguf',
-        size: 2823710144,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q5_1.gguf',
-        size: 3050792384,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q5_K_M.gguf',
-        size: 2889512384,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q5_K_S.gguf',
-        size: 2823710144,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q6_K.gguf',
-        size: 3306259904,
-      },
-      {
-        rfilename: 'jan-nano-4b-Q8_0.gguf',
-        size: 4280403904,
-      },
-    ],
-    'spaces': [],
-    'tags': [
-      'gguf',
-      'text-generation',
-      'license:apache-2.0',
-      'endpoints_compatible',
-      'region:us',
-      'imatrix',
-      'conversational',
-    ],
-    'usedStorage': 93538518464,
-    'widgetData': [
-      {
-        text: 'Hi, what can you help me with?',
-      },
-      {
-        text: 'What is 84 * 3 / 2?',
-      },
-      {
-        text: 'Tell me an interesting fact about the universe!',
-      },
-      {
-        text: 'Explain quantum computing in simple terms.',
-      },
-    ],
-  },
-  models: [
-    {
-      id: 'menlo:jan-nano:jan-nano-0.4-iQ4_XS.gguf',
-      size: 2270750400,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q3_K_L.gguf',
-      size: 2239784384,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q3_K_M.gguf',
-      size: 2075616704,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q3_K_S.gguf',
-      size: 1886995904,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q4_0.gguf',
-      size: 2369545664,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q4_1.gguf',
-      size: 2596627904,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q4_K_M.gguf',
-      size: 2497279424,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q4_K_S.gguf',
-      size: 2383308224,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q5_0.gguf',
-      size: 2823710144,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q5_1.gguf',
-      size: 3050792384,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q5_K_M.gguf',
-      size: 2889512384,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q5_K_S.gguf',
-      size: 2823710144,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q6_K.gguf',
-      size: 3306259904,
-    },
-    {
-      id: 'menlo:jan-nano:jan-nano-4b-Q8_0.gguf',
-      size: 4280403904,
-    },
-  ],
-}

From c896398fabb10c8c83a17c6bb8479ed0437ba7db Mon Sep 17 00:00:00 2001
From: LazyYuuki <huy2840@gmail.com>
Date: Sun, 15 Jun 2025 10:45:37 +0800
Subject: [PATCH 08/48] =?UTF-8?q?=F0=9F=94=A7=20config:=20add=20to=20gitig?=
 =?UTF-8?q?nore?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index df59e4644..f702c6512 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,3 +45,7 @@ src-tauri/icons
 !src-tauri/icons/icon.png
 src-tauri/gen/apple
 src-tauri/resources/bin
+
+# Helper tools
+.opencode
+OpenCode.md

From 9ab69b157babb74976ef12269c6f53f1a68f2ce1 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Sun, 15 Jun 2025 11:17:41 +0700
Subject: [PATCH 09/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20Jan-nano=20repo=20name?=
 =?UTF-8?q?=20changed=20(#5274)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 extensions/model-extension/resources/default.json | 6 +++---
 extensions/model-extension/src/index.ts           | 4 +++-
 web-app/src/routes/hub.tsx                        | 3 ++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/extensions/model-extension/resources/default.json b/extensions/model-extension/resources/default.json
index 8884af70f..32bc278e4 100644
--- a/extensions/model-extension/resources/default.json
+++ b/extensions/model-extension/resources/default.json
@@ -1,7 +1,7 @@
 [
   {
     "author": "Menlo",
-    "id": "Menlo/Jan-nano",
+    "id": "Menlo/Jan-nano-gguf",
     "metadata": {
       "_id": "68492cd9cada68b1d11ca1bd",
       "author": "Menlo",
@@ -23,11 +23,11 @@
         "quantize_imatrix_file": "imatrix.dat",
         "total": 4022468096
       },
-      "id": "Menlo/Jan-nano",
+      "id": "Menlo/Jan-nano-gguf",
       "lastModified": "2025-06-13T16:57:55.000Z",
       "likes": 3,
       "model-index": null,
-      "modelId": "Menlo/Jan-nano",
+      "modelId": "Menlo/Jan-nano-gguf",
       "pipeline_tag": "text-generation",
       "private": false,
       "sha": "a04aab0878648d8f284c63a52664a482ead16f06",
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 9fa6132c5..f0f0589df 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -28,7 +28,7 @@ type Data<T> = {
 /**
  * Defaul mode sources
  */
-const defaultModelSources = ['Menlo/Jan-nano']
+const defaultModelSources = ['Menlo/Jan-nano-gguf']
 
 /**
  * A extension for models
@@ -291,6 +291,8 @@ export default class JanModelExtension extends ModelExtension {
     const sources = await this.apiInstance()
       .then((api) => api.get('v1/models/sources').json<Data<ModelSource>>())
       .then((e) => (typeof e === 'object' ? (e.data as ModelSource[]) : []))
+      // Deprecated source - filter out from legacy sources
+      .then((e) => e.filter((x) => x.id.toLowerCase() !== 'menlo/jan-nano'))
       .catch(() => [])
     return sources.concat(
       DEFAULT_MODEL_SOURCES.filter((e) => !sources.some((x) => x.id === e.id))
diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx
index ed7fb669b..c4327fb04 100644
--- a/web-app/src/routes/hub.tsx
+++ b/web-app/src/routes/hub.tsx
@@ -199,7 +199,8 @@ function Hub() {
   const navigate = useNavigate()
 
   const isRecommendedModel = useCallback((modelId: string) => {
-    return (extractModelName(modelId) === 'Jan-nano') as boolean
+    return (extractModelName(modelId)?.toLowerCase() ===
+      'jan-nano-gguf') as boolean
   }, [])
 
   const handleUseModel = useCallback(

From 2ff177637970e5a6675e69bcd5a7c8456ef2f88b Mon Sep 17 00:00:00 2001
From: LazyYuuki <huy2840@gmail.com>
Date: Sun, 15 Jun 2025 16:07:08 +0800
Subject: [PATCH 10/48] =?UTF-8?q?=F0=9F=9A=A7=20wip:=20disable=20showSpeed?=
 =?UTF-8?q?Token=20in=20ChatInput?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/containers/ChatInput.tsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx
index a83adc59e..de4f9d9ad 100644
--- a/web-app/src/containers/ChatInput.tsx
+++ b/web-app/src/containers/ChatInput.tsx
@@ -21,7 +21,7 @@ import {
   IconTool,
   IconCodeCircle2,
   IconPlayerStopFilled,
-  IconBrandSpeedtest,
+  // IconBrandSpeedtest,
   IconX,
 } from '@tabler/icons-react'
 import { useTranslation } from 'react-i18next'
@@ -48,7 +48,7 @@ type ChatInputProps = {
 const ChatInput = ({
   model,
   className,
-  showSpeedToken = true,
+  showSpeedToken = false,
   initialMessage,
 }: ChatInputProps) => {
   const textareaRef = useRef<HTMLTextAreaElement>(null)

From 3ae4d12f6079eb68475e2a73d22ad7f3bafb8972 Mon Sep 17 00:00:00 2001
From: LazyYuuki <huy2840@gmail.com>
Date: Sun, 15 Jun 2025 16:08:58 +0800
Subject: [PATCH 11/48] =?UTF-8?q?=F0=9F=90=9B=20fix:=20commented=20out=20t?=
 =?UTF-8?q?he=20wrong=20import?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/containers/ChatInput.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx
index de4f9d9ad..291642a60 100644
--- a/web-app/src/containers/ChatInput.tsx
+++ b/web-app/src/containers/ChatInput.tsx
@@ -21,7 +21,7 @@ import {
   IconTool,
   IconCodeCircle2,
   IconPlayerStopFilled,
-  // IconBrandSpeedtest,
+  IconBrandSpeedtest,
   IconX,
 } from '@tabler/icons-react'
 import { useTranslation } from 'react-i18next'

From d131752419815832bbd9a629a399d51ea9166e79 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Sun, 15 Jun 2025 16:18:43 +0700
Subject: [PATCH 12/48] fix: masking value MCP env field (#5276)

---
 web-app/src/routes/settings/mcp-servers.tsx | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/web-app/src/routes/settings/mcp-servers.tsx b/web-app/src/routes/settings/mcp-servers.tsx
index 02e51dfa4..f545ef5d0 100644
--- a/web-app/src/routes/settings/mcp-servers.tsx
+++ b/web-app/src/routes/settings/mcp-servers.tsx
@@ -22,6 +22,13 @@ import { useToolApproval } from '@/hooks/useToolApproval'
 import { toast } from 'sonner'
 import { invoke } from '@tauri-apps/api/core'
 
+// Function to mask sensitive values
+const maskSensitiveValue = (value: string) => {
+  if (!value) return value
+  if (value.length <= 8) return '*'.repeat(value.length)
+  return value.slice(0, 4) + '*'.repeat(value.length - 8) + value.slice(-4)
+}
+
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 export const Route = createFileRoute(route.settings.mcp_servers as any)({
   component: MCPServers,
@@ -322,7 +329,10 @@ function MCPServers() {
                           <div className="break-all">
                             Env:{' '}
                             {Object.entries(config.env)
-                              .map(([key, value]) => `${key}=${value}`)
+                              .map(
+                                ([key, value]) =>
+                                  `${key}=${maskSensitiveValue(value)}`
+                              )
                               .join(', ')}
                           </div>
                         )}

From 665de7df5543107527895b537cdbfe3daf686ac1 Mon Sep 17 00:00:00 2001
From: LazyYuuki <huy2840@gmail.com>
Date: Sun, 15 Jun 2025 18:30:39 +0800
Subject: [PATCH 13/48] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20token=20speed?=
 =?UTF-8?q?=20to=20each=20message=20that=20persist?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/containers/ChatInput.tsx          |   2 +-
 web-app/src/containers/ThreadContent.tsx      |  12 +-
 .../src/containers/TokenSpeedIndicator.tsx    |  22 ++
 web-app/src/hooks/useAppState.ts              |  77 ++---
 web-app/src/hooks/useChat.ts                  | 282 +++++++++---------
 web-app/src/hooks/useMessages.ts              |  41 +--
 6 files changed, 236 insertions(+), 200 deletions(-)
 create mode 100644 web-app/src/containers/TokenSpeedIndicator.tsx

diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx
index 291642a60..a83adc59e 100644
--- a/web-app/src/containers/ChatInput.tsx
+++ b/web-app/src/containers/ChatInput.tsx
@@ -48,7 +48,7 @@ type ChatInputProps = {
 const ChatInput = ({
   model,
   className,
-  showSpeedToken = false,
+  showSpeedToken = true,
   initialMessage,
 }: ChatInputProps) => {
   const textareaRef = useRef<HTMLTextAreaElement>(null)
diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx
index 833846db1..40c26993b 100644
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@@ -34,6 +34,9 @@ import {
 } from '@/components/ui/tooltip'
 import { formatDate } from '@/utils/formatDate'
 import { AvatarEmoji } from '@/containers/AvatarEmoji'
+
+import TokenSpeedIndicator from '@/containers/TokenSpeedIndicator'
+
 import CodeEditor from '@uiw/react-textarea-code-editor'
 import '@uiw/react-textarea-code-editor/dist.css'
 
@@ -360,8 +363,8 @@ export const ThreadContent = memo(
                   className={cn(
                     'flex items-center gap-2',
                     item.isLastMessage &&
-                      streamingContent &&
-                      'opacity-0 visibility-hidden pointer-events-none'
+                    streamingContent &&
+                    'opacity-0 visibility-hidden pointer-events-none'
                   )}
                 >
                   <CopyButton text={item.content?.[0]?.text.value || ''} />
@@ -445,6 +448,11 @@ export const ThreadContent = memo(
                       </TooltipContent>
                     </Tooltip>
                   )}
+
+                  <TokenSpeedIndicator
+                    messageId={item.id}
+                    metadata={item.metadata}
+                  />
                 </div>
               </div>
             )}
diff --git a/web-app/src/containers/TokenSpeedIndicator.tsx b/web-app/src/containers/TokenSpeedIndicator.tsx
new file mode 100644
index 000000000..b1dfb841c
--- /dev/null
+++ b/web-app/src/containers/TokenSpeedIndicator.tsx
@@ -0,0 +1,22 @@
+import { IconBrandSpeedtest } from '@tabler/icons-react'
+
+interface TokenSpeedIndicatorProps {
+  metadata?: Record<string, unknown>
+}
+
+export const TokenSpeedIndicator = ({
+  metadata
+}: TokenSpeedIndicatorProps) => {
+  const persistedTokenSpeed = (metadata?.tokenSpeed as { tokenSpeed: number })?.tokenSpeed
+
+  return (
+    <div className="flex items-center gap-1 text-main-view-fg/60 text-xs">
+      <IconBrandSpeedtest size={16} />
+      <span>
+        {Math.round(persistedTokenSpeed)} tokens/sec
+      </span>
+    </div>
+  )
+}
+
+export default TokenSpeedIndicator
diff --git a/web-app/src/hooks/useAppState.ts b/web-app/src/hooks/useAppState.ts
index dc29f7f8a..ccf044a4f 100644
--- a/web-app/src/hooks/useAppState.ts
+++ b/web-app/src/hooks/useAppState.ts
@@ -1,36 +1,36 @@
-import { create } from 'zustand'
-import { ThreadMessage } from '@janhq/core'
-import { MCPTool } from '@/types/completion'
-import { useAssistant } from './useAssistant'
-import { ChatCompletionMessageToolCall } from 'openai/resources'
+import { create } from "zustand";
+import { ThreadMessage } from "@janhq/core";
+import { MCPTool } from "@/types/completion";
+import { useAssistant } from "./useAssistant";
+import { ChatCompletionMessageToolCall } from "openai/resources";
 
 type AppState = {
-  streamingContent?: ThreadMessage
-  loadingModel?: boolean
-  tools: MCPTool[]
-  serverStatus: 'running' | 'stopped' | 'pending'
-  abortControllers: Record<string, AbortController>
-  tokenSpeed?: TokenSpeed
-  currentToolCall?: ChatCompletionMessageToolCall
-  showOutOfContextDialog?: boolean
-  setServerStatus: (value: 'running' | 'stopped' | 'pending') => void
-  updateStreamingContent: (content: ThreadMessage | undefined) => void
+  streamingContent?: ThreadMessage;
+  loadingModel?: boolean;
+  tools: MCPTool[];
+  serverStatus: "running" | "stopped" | "pending";
+  abortControllers: Record<string, AbortController>;
+  tokenSpeed?: TokenSpeed;
+  currentToolCall?: ChatCompletionMessageToolCall;
+  showOutOfContextDialog?: boolean;
+  setServerStatus: (value: "running" | "stopped" | "pending") => void;
+  updateStreamingContent: (content: ThreadMessage | undefined) => void;
   updateCurrentToolCall: (
-    toolCall: ChatCompletionMessageToolCall | undefined
-  ) => void
-  updateLoadingModel: (loading: boolean) => void
-  updateTools: (tools: MCPTool[]) => void
-  setAbortController: (threadId: string, controller: AbortController) => void
-  updateTokenSpeed: (message: ThreadMessage) => void
-  resetTokenSpeed: () => void
-  setOutOfContextDialog: (show: boolean) => void
-}
+    toolCall: ChatCompletionMessageToolCall | undefined,
+  ) => void;
+  updateLoadingModel: (loading: boolean) => void;
+  updateTools: (tools: MCPTool[]) => void;
+  setAbortController: (threadId: string, controller: AbortController) => void;
+  updateTokenSpeed: (message: ThreadMessage) => void;
+  resetTokenSpeed: () => void;
+  setOutOfContextDialog: (show: boolean) => void;
+};
 
 export const useAppState = create<AppState>()((set) => ({
   streamingContent: undefined,
   loadingModel: false,
   tools: [],
-  serverStatus: 'stopped',
+  serverStatus: "stopped",
   abortControllers: {},
   tokenSpeed: undefined,
   currentToolCall: undefined,
@@ -46,18 +46,19 @@ export const useAppState = create<AppState>()((set) => ({
             },
           }
         : undefined,
-    }))
+    }));
+    console.log(useAppState.getState().streamingContent);
   },
   updateCurrentToolCall: (toolCall) => {
     set(() => ({
       currentToolCall: toolCall,
-    }))
+    }));
   },
   updateLoadingModel: (loading) => {
-    set({ loadingModel: loading })
+    set({ loadingModel: loading });
   },
   updateTools: (tools) => {
-    set({ tools })
+    set({ tools });
   },
   setServerStatus: (value) => set({ serverStatus: value }),
   setAbortController: (threadId, controller) => {
@@ -66,11 +67,11 @@ export const useAppState = create<AppState>()((set) => ({
         ...state.abortControllers,
         [threadId]: controller,
       },
-    }))
+    }));
   },
   updateTokenSpeed: (message) =>
     set((state) => {
-      const currentTimestamp = new Date().getTime() // Get current time in milliseconds
+      const currentTimestamp = new Date().getTime(); // Get current time in milliseconds
       if (!state.tokenSpeed) {
         // If this is the first update, just set the lastTimestamp and return
         return {
@@ -80,14 +81,14 @@ export const useAppState = create<AppState>()((set) => ({
             tokenCount: 1,
             message: message.id,
           },
-        }
+        };
       }
 
       const timeDiffInSeconds =
-        (currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000 // Time difference in seconds
-      const totalTokenCount = state.tokenSpeed.tokenCount + 1
+        (currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000; // Time difference in seconds
+      const totalTokenCount = state.tokenSpeed.tokenCount + 1;
       const averageTokenSpeed =
-        totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1) // Calculate average token speed
+        totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1); // Calculate average token speed
       return {
         tokenSpeed: {
           ...state.tokenSpeed,
@@ -95,7 +96,7 @@ export const useAppState = create<AppState>()((set) => ({
           tokenCount: totalTokenCount,
           message: message.id,
         },
-      }
+      };
     }),
   resetTokenSpeed: () =>
     set({
@@ -104,6 +105,6 @@ export const useAppState = create<AppState>()((set) => ({
   setOutOfContextDialog: (show) => {
     set(() => ({
       showOutOfContextDialog: show,
-    }))
+    }));
   },
-}))
+}));
diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts
index 164555563..0fbfeb5d9 100644
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@@ -1,12 +1,12 @@
-import { useCallback, useEffect, useMemo } from 'react'
-import { usePrompt } from './usePrompt'
-import { useModelProvider } from './useModelProvider'
-import { useThreads } from './useThreads'
-import { useAppState } from './useAppState'
-import { useMessages } from './useMessages'
-import { useRouter } from '@tanstack/react-router'
-import { defaultModel } from '@/lib/models'
-import { route } from '@/constants/routes'
+import { useCallback, useEffect, useMemo } from "react";
+import { usePrompt } from "./usePrompt";
+import { useModelProvider } from "./useModelProvider";
+import { useThreads } from "./useThreads";
+import { useAppState } from "./useAppState";
+import { useMessages } from "./useMessages";
+import { useRouter } from "@tanstack/react-router";
+import { defaultModel } from "@/lib/models";
+import { route } from "@/constants/routes";
 import {
   emptyThreadContent,
   extractToolCall,
@@ -15,23 +15,23 @@ import {
   newUserThreadContent,
   postMessageProcessing,
   sendCompletion,
-} from '@/lib/completion'
-import { CompletionMessagesBuilder } from '@/lib/messages'
-import { ChatCompletionMessageToolCall } from 'openai/resources'
-import { useAssistant } from './useAssistant'
-import { toast } from 'sonner'
-import { getTools } from '@/services/mcp'
-import { MCPTool } from '@/types/completion'
-import { listen } from '@tauri-apps/api/event'
-import { SystemEvent } from '@/types/events'
-import { stopModel, startModel, stopAllModels } from '@/services/models'
+} from "@/lib/completion";
+import { CompletionMessagesBuilder } from "@/lib/messages";
+import { ChatCompletionMessageToolCall } from "openai/resources";
+import { useAssistant } from "./useAssistant";
+import { toast } from "sonner";
+import { getTools } from "@/services/mcp";
+import { MCPTool } from "@/types/completion";
+import { listen } from "@tauri-apps/api/event";
+import { SystemEvent } from "@/types/events";
+import { stopModel, startModel, stopAllModels } from "@/services/models";
 
-import { useToolApproval } from '@/hooks/useToolApproval'
-import { useToolAvailable } from '@/hooks/useToolAvailable'
-import { OUT_OF_CONTEXT_SIZE } from '@/utils/error'
+import { useToolApproval } from "@/hooks/useToolApproval";
+import { useToolAvailable } from "@/hooks/useToolAvailable";
+import { OUT_OF_CONTEXT_SIZE } from "@/utils/error";
 
 export const useChat = () => {
-  const { prompt, setPrompt } = usePrompt()
+  const { prompt, setPrompt } = usePrompt();
   const {
     tools,
     updateTokenSpeed,
@@ -40,51 +40,51 @@ export const useChat = () => {
     updateStreamingContent,
     updateLoadingModel,
     setAbortController,
-  } = useAppState()
-  const { currentAssistant } = useAssistant()
-  const { updateProvider } = useModelProvider()
+  } = useAppState();
+  const { currentAssistant } = useAssistant();
+  const { updateProvider } = useModelProvider();
 
   const { approvedTools, showApprovalModal, allowAllMCPPermissions } =
-    useToolApproval()
-  const { getDisabledToolsForThread } = useToolAvailable()
+    useToolApproval();
+  const { getDisabledToolsForThread } = useToolAvailable();
 
   const { getProviderByName, selectedModel, selectedProvider } =
-    useModelProvider()
+    useModelProvider();
 
   const {
     getCurrentThread: retrieveThread,
     createThread,
     updateThreadTimestamp,
-  } = useThreads()
-  const { getMessages, addMessage } = useMessages()
-  const router = useRouter()
+  } = useThreads();
+  const { getMessages, addMessage } = useMessages();
+  const router = useRouter();
 
   const provider = useMemo(() => {
-    return getProviderByName(selectedProvider)
-  }, [selectedProvider, getProviderByName])
+    return getProviderByName(selectedProvider);
+  }, [selectedProvider, getProviderByName]);
 
   const currentProviderId = useMemo(() => {
-    return provider?.provider || selectedProvider
-  }, [provider, selectedProvider])
+    return provider?.provider || selectedProvider;
+  }, [provider, selectedProvider]);
 
   useEffect(() => {
     function setTools() {
       getTools().then((data: MCPTool[]) => {
-        updateTools(data)
-      })
+        updateTools(data);
+      });
     }
-    setTools()
+    setTools();
 
-    let unsubscribe = () => {}
+    let unsubscribe = () => {};
     listen(SystemEvent.MCP_UPDATE, setTools).then((unsub) => {
       // Unsubscribe from the event when the component unmounts
-      unsubscribe = unsub
-    })
-    return unsubscribe
-  }, [updateTools])
+      unsubscribe = unsub;
+    });
+    return unsubscribe;
+  }, [updateTools]);
 
   const getCurrentThread = useCallback(async () => {
-    let currentThread = retrieveThread()
+    let currentThread = retrieveThread();
     if (!currentThread) {
       currentThread = await createThread(
         {
@@ -92,14 +92,14 @@ export const useChat = () => {
           provider: selectedProvider,
         },
         prompt,
-        currentAssistant
-      )
+        currentAssistant,
+      );
       router.navigate({
         to: route.threadsDetail,
         params: { threadId: currentThread.id },
-      })
+      });
     }
-    return currentThread
+    return currentThread;
   }, [
     createThread,
     prompt,
@@ -108,7 +108,7 @@ export const useChat = () => {
     selectedModel?.id,
     selectedProvider,
     currentAssistant,
-  ])
+  ]);
 
   const increaseModelContextSize = useCallback(
     (model: Model, provider: ProviderObject) => {
@@ -118,12 +118,12 @@ export const useChat = () => {
        */
       const ctxSize = Math.max(
         model.settings?.ctx_len?.controller_props.value
-          ? typeof model.settings.ctx_len.controller_props.value === 'string'
+          ? typeof model.settings.ctx_len.controller_props.value === "string"
             ? parseInt(model.settings.ctx_len.controller_props.value as string)
             : (model.settings.ctx_len.controller_props.value as number)
           : 8192,
-        8192
-      )
+        8192,
+      );
       const updatedModel = {
         ...model,
         settings: {
@@ -136,80 +136,80 @@ export const useChat = () => {
             },
           },
         },
-      }
+      };
 
       // Find the model index in the provider's models array
-      const modelIndex = provider.models.findIndex((m) => m.id === model.id)
+      const modelIndex = provider.models.findIndex((m) => m.id === model.id);
 
       if (modelIndex !== -1) {
         // Create a copy of the provider's models array
-        const updatedModels = [...provider.models]
+        const updatedModels = [...provider.models];
 
         // Update the specific model in the array
-        updatedModels[modelIndex] = updatedModel as Model
+        updatedModels[modelIndex] = updatedModel as Model;
 
         // Update the provider with the new models array
         updateProvider(provider.provider, {
           models: updatedModels,
-        })
+        });
       }
-      stopAllModels()
+      stopAllModels();
     },
-    [updateProvider]
-  )
+    [updateProvider],
+  );
 
   const sendMessage = useCallback(
     async (
       message: string,
       showModal?: () => Promise<unknown>,
-      troubleshooting = true
+      troubleshooting = true,
     ) => {
-      const activeThread = await getCurrentThread()
+      const activeThread = await getCurrentThread();
 
-      resetTokenSpeed()
+      resetTokenSpeed();
       const activeProvider = currentProviderId
         ? getProviderByName(currentProviderId)
-        : provider
-      if (!activeThread || !activeProvider) return
-      const messages = getMessages(activeThread.id)
-      const abortController = new AbortController()
-      setAbortController(activeThread.id, abortController)
-      updateStreamingContent(emptyThreadContent)
+        : provider;
+      if (!activeThread || !activeProvider) return;
+      const messages = getMessages(activeThread.id);
+      const abortController = new AbortController();
+      setAbortController(activeThread.id, abortController);
+      updateStreamingContent(emptyThreadContent);
       // Do not add new message on retry
       if (troubleshooting)
-        addMessage(newUserThreadContent(activeThread.id, message))
-      updateThreadTimestamp(activeThread.id)
-      setPrompt('')
+        addMessage(newUserThreadContent(activeThread.id, message));
+      updateThreadTimestamp(activeThread.id);
+      setPrompt("");
       try {
         if (selectedModel?.id) {
-          updateLoadingModel(true)
+          updateLoadingModel(true);
           await startModel(
             activeProvider,
             selectedModel.id,
-            abortController
-          ).catch(console.error)
-          updateLoadingModel(false)
+            abortController,
+          ).catch(console.error);
+          updateLoadingModel(false);
         }
 
         const builder = new CompletionMessagesBuilder(
           messages,
-          currentAssistant?.instructions
-        )
+          currentAssistant?.instructions,
+        );
 
-        builder.addUserMessage(message)
+        builder.addUserMessage(message);
 
-        let isCompleted = false
+        let isCompleted = false;
 
         // Filter tools based on model capabilities and available tools for this thread
-        let availableTools = selectedModel?.capabilities?.includes('tools')
+        let availableTools = selectedModel?.capabilities?.includes("tools")
           ? tools.filter((tool) => {
-              const disabledTools = getDisabledToolsForThread(activeThread.id)
-              return !disabledTools.includes(tool.name)
+              const disabledTools = getDisabledToolsForThread(activeThread.id);
+              return !disabledTools.includes(tool.name);
             })
-          : []
+          : [];
 
         // TODO: Later replaced by Agent setup?
-        const followUpWithToolUse = true
+        const followUpWithToolUse = true;
         while (!isCompleted && !abortController.signal.aborted) {
           const completion = await sendCompletion(
             activeThread,
@@ -218,51 +218,51 @@ export const useChat = () => {
             abortController,
             availableTools,
             currentAssistant.parameters?.stream === false ? false : true,
-            currentAssistant.parameters as unknown as Record<string, object>
+            currentAssistant.parameters as unknown as Record<string, object>,
             // TODO: replace it with according provider setting later on
             // selectedProvider === 'llama.cpp' && availableTools.length > 0
             //   ? false
             //   : true
-          )
+          );
 
-          if (!completion) throw new Error('No completion received')
-          let accumulatedText = ''
-          const currentCall: ChatCompletionMessageToolCall | null = null
-          const toolCalls: ChatCompletionMessageToolCall[] = []
+          if (!completion) throw new Error("No completion received");
+          let accumulatedText = "";
+          const currentCall: ChatCompletionMessageToolCall | null = null;
+          const toolCalls: ChatCompletionMessageToolCall[] = [];
           if (isCompletionResponse(completion)) {
-            accumulatedText = completion.choices[0]?.message?.content || ''
+            accumulatedText = completion.choices[0]?.message?.content || "";
             if (completion.choices[0]?.message?.tool_calls) {
-              toolCalls.push(...completion.choices[0].message.tool_calls)
+              toolCalls.push(...completion.choices[0].message.tool_calls);
             }
           } else {
             for await (const part of completion) {
               // Error message
               if (!part.choices) {
                 throw new Error(
-                  'message' in part
+                  "message" in part
                     ? (part.message as string)
-                    : (JSON.stringify(part) ?? '')
-                )
+                    : (JSON.stringify(part) ?? ""),
+                );
               }
-              const delta = part.choices[0]?.delta?.content || ''
+              const delta = part.choices[0]?.delta?.content || "";
 
               if (part.choices[0]?.delta?.tool_calls) {
-                const calls = extractToolCall(part, currentCall, toolCalls)
+                const calls = extractToolCall(part, currentCall, toolCalls);
                 const currentContent = newAssistantThreadContent(
                   activeThread.id,
                   accumulatedText,
                   {
                     tool_calls: calls.map((e) => ({
                       ...e,
-                      state: 'pending',
+                      state: "pending",
                     })),
-                  }
-                )
-                updateStreamingContent(currentContent)
-                await new Promise((resolve) => setTimeout(resolve, 0))
+                  },
+                );
+                updateStreamingContent(currentContent);
+                await new Promise((resolve) => setTimeout(resolve, 0));
               }
               if (delta) {
-                accumulatedText += delta
+                accumulatedText += delta;
                 // Create a new object each time to avoid reference issues
                 // Use a timeout to prevent React from batching updates too quickly
                 const currentContent = newAssistantThreadContent(
@@ -271,13 +271,13 @@ export const useChat = () => {
                   {
                     tool_calls: toolCalls.map((e) => ({
                       ...e,
-                      state: 'pending',
+                      state: "pending",
                     })),
-                  }
-                )
-                updateStreamingContent(currentContent)
-                updateTokenSpeed(currentContent)
-                await new Promise((resolve) => setTimeout(resolve, 0))
+                  },
+                );
+                updateStreamingContent(currentContent);
+                updateTokenSpeed(currentContent);
+                await new Promise((resolve) => setTimeout(resolve, 0));
               }
             }
           }
@@ -286,18 +286,22 @@ export const useChat = () => {
             accumulatedText.length === 0 &&
             toolCalls.length === 0 &&
             activeThread.model?.id &&
-            activeProvider.provider === 'llama.cpp'
+            activeProvider.provider === "llama.cpp"
           ) {
-            await stopModel(activeThread.model.id, 'cortex')
-            throw new Error('No response received from the model')
+            await stopModel(activeThread.model.id, "cortex");
+            throw new Error("No response received from the model");
           }
 
           // Create a final content object for adding to the thread
           const finalContent = newAssistantThreadContent(
             activeThread.id,
-            accumulatedText
-          )
-          builder.addAssistantMessage(accumulatedText, undefined, toolCalls)
+            accumulatedText,
+            {
+              tokenSpeed: useAppState.getState().tokenSpeed,
+            },
+          );
+
+          builder.addAssistantMessage(accumulatedText, undefined, toolCalls);
           const updatedMessage = await postMessageProcessing(
             toolCalls,
             builder,
@@ -305,41 +309,41 @@ export const useChat = () => {
             abortController,
             approvedTools,
             allowAllMCPPermissions ? undefined : showApprovalModal,
-            allowAllMCPPermissions
-          )
-          addMessage(updatedMessage ?? finalContent)
-          updateStreamingContent(emptyThreadContent)
-          updateThreadTimestamp(activeThread.id)
+            allowAllMCPPermissions,
+          );
+          addMessage(updatedMessage ?? finalContent);
+          updateStreamingContent(emptyThreadContent);
+          updateThreadTimestamp(activeThread.id);
 
-          isCompleted = !toolCalls.length
+          isCompleted = !toolCalls.length;
           // Do not create agent loop if there is no need for it
-          if (!followUpWithToolUse) availableTools = []
+          if (!followUpWithToolUse) availableTools = [];
         }
       } catch (error) {
         const errorMessage =
-          error && typeof error === 'object' && 'message' in error
+          error && typeof error === "object" && "message" in error
             ? error.message
-            : error
+            : error;
         if (
-          typeof errorMessage === 'string' &&
+          typeof errorMessage === "string" &&
           errorMessage.includes(OUT_OF_CONTEXT_SIZE) &&
           selectedModel &&
           troubleshooting
         ) {
           showModal?.().then((confirmed) => {
             if (confirmed) {
-              increaseModelContextSize(selectedModel, activeProvider)
+              increaseModelContextSize(selectedModel, activeProvider);
               setTimeout(() => {
-                sendMessage(message, showModal, false) // Retry sending the message without troubleshooting
-              }, 1000)
+                sendMessage(message, showModal, false); // Retry sending the message without troubleshooting
+              }, 1000);
             }
-          })
+          });
         }
-        toast.error(`Error sending message: ${errorMessage}`)
-        console.error('Error sending message:', error)
+        toast.error(`Error sending message: ${errorMessage}`);
+        console.error("Error sending message:", error);
       } finally {
-        updateLoadingModel(false)
-        updateStreamingContent(undefined)
+        updateLoadingModel(false);
+        updateStreamingContent(undefined);
       }
     },
     [
@@ -364,8 +368,8 @@ export const useChat = () => {
       showApprovalModal,
       updateTokenSpeed,
       increaseModelContextSize,
-    ]
-  )
+    ],
+  );
 
-  return { sendMessage }
-}
+  return { sendMessage };
+};
diff --git a/web-app/src/hooks/useMessages.ts b/web-app/src/hooks/useMessages.ts
index 3a83b5a48..251d67438 100644
--- a/web-app/src/hooks/useMessages.ts
+++ b/web-app/src/hooks/useMessages.ts
@@ -1,23 +1,23 @@
-import { create } from 'zustand'
-import { ThreadMessage } from '@janhq/core'
+import { create } from "zustand";
+import { ThreadMessage } from "@janhq/core";
 import {
   createMessage,
   deleteMessage as deleteMessageExt,
-} from '@/services/messages'
-import { useAssistant } from './useAssistant'
+} from "@/services/messages";
+import { useAssistant } from "./useAssistant";
 
 type MessageState = {
-  messages: Record<string, ThreadMessage[]>
-  getMessages: (threadId: string) => ThreadMessage[]
-  setMessages: (threadId: string, messages: ThreadMessage[]) => void
-  addMessage: (message: ThreadMessage) => void
-  deleteMessage: (threadId: string, messageId: string) => void
-}
+  messages: Record<string, ThreadMessage[]>;
+  getMessages: (threadId: string) => ThreadMessage[];
+  setMessages: (threadId: string, messages: ThreadMessage[]) => void;
+  addMessage: (message: ThreadMessage) => void;
+  deleteMessage: (threadId: string, messageId: string) => void;
+};
 
 export const useMessages = create<MessageState>()((set, get) => ({
   messages: {},
   getMessages: (threadId) => {
-    return get().messages[threadId] || []
+    return get().messages[threadId] || [];
   },
   setMessages: (threadId, messages) => {
     set((state) => ({
@@ -25,10 +25,11 @@ export const useMessages = create<MessageState>()((set, get) => ({
         ...state.messages,
         [threadId]: messages,
       },
-    }))
+    }));
   },
   addMessage: (message) => {
-    const currentAssistant = useAssistant.getState().currentAssistant
+    console.log("addMessage: ", message);
+    const currentAssistant = useAssistant.getState().currentAssistant;
     const newMessage = {
       ...message,
       created_at: message.created_at || Date.now(),
@@ -36,7 +37,7 @@ export const useMessages = create<MessageState>()((set, get) => ({
         ...message.metadata,
         assistant: currentAssistant,
       },
-    }
+    };
     createMessage(newMessage).then((createdMessage) => {
       set((state) => ({
         messages: {
@@ -46,19 +47,19 @@ export const useMessages = create<MessageState>()((set, get) => ({
             createdMessage,
           ],
         },
-      }))
-    })
+      }));
+    });
   },
   deleteMessage: (threadId, messageId) => {
-    deleteMessageExt(threadId, messageId)
+    deleteMessageExt(threadId, messageId);
     set((state) => ({
       messages: {
         ...state.messages,
         [threadId]:
           state.messages[threadId]?.filter(
-            (message) => message.id !== messageId
+            (message) => message.id !== messageId,
           ) || [],
       },
-    }))
+    }));
   },
-}))
+}));

From 4b3a0918fef87192a495991fed7f35f918c9f241 Mon Sep 17 00:00:00 2001
From: LazyYuuki <huy2840@gmail.com>
Date: Sun, 15 Jun 2025 18:54:22 +0800
Subject: [PATCH 14/48] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20to=20fol?=
 =?UTF-8?q?low=20prettier=20convention?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/hooks/useAppState.ts |  78 ++++-----
 web-app/src/hooks/useChat.ts     | 278 +++++++++++++++----------------
 2 files changed, 178 insertions(+), 178 deletions(-)

diff --git a/web-app/src/hooks/useAppState.ts b/web-app/src/hooks/useAppState.ts
index ccf044a4f..e3fa1753b 100644
--- a/web-app/src/hooks/useAppState.ts
+++ b/web-app/src/hooks/useAppState.ts
@@ -1,36 +1,36 @@
-import { create } from "zustand";
-import { ThreadMessage } from "@janhq/core";
-import { MCPTool } from "@/types/completion";
-import { useAssistant } from "./useAssistant";
-import { ChatCompletionMessageToolCall } from "openai/resources";
+import { create } from 'zustand'
+import { ThreadMessage } from '@janhq/core'
+import { MCPTool } from '@/types/completion'
+import { useAssistant } from './useAssistant'
+import { ChatCompletionMessageToolCall } from 'openai/resources'
 
 type AppState = {
-  streamingContent?: ThreadMessage;
-  loadingModel?: boolean;
-  tools: MCPTool[];
-  serverStatus: "running" | "stopped" | "pending";
-  abortControllers: Record<string, AbortController>;
-  tokenSpeed?: TokenSpeed;
-  currentToolCall?: ChatCompletionMessageToolCall;
-  showOutOfContextDialog?: boolean;
-  setServerStatus: (value: "running" | "stopped" | "pending") => void;
-  updateStreamingContent: (content: ThreadMessage | undefined) => void;
+  streamingContent?: ThreadMessage
+  loadingModel?: boolean
+  tools: MCPTool[]
+  serverStatus: 'running' | 'stopped' | 'pending'
+  abortControllers: Record<string, AbortController>
+  tokenSpeed?: TokenSpeed
+  currentToolCall?: ChatCompletionMessageToolCall
+  showOutOfContextDialog?: boolean
+  setServerStatus: (value: 'running' | 'stopped' | 'pending') => void
+  updateStreamingContent: (content: ThreadMessage | undefined) => void
   updateCurrentToolCall: (
-    toolCall: ChatCompletionMessageToolCall | undefined,
-  ) => void;
-  updateLoadingModel: (loading: boolean) => void;
-  updateTools: (tools: MCPTool[]) => void;
-  setAbortController: (threadId: string, controller: AbortController) => void;
-  updateTokenSpeed: (message: ThreadMessage) => void;
-  resetTokenSpeed: () => void;
-  setOutOfContextDialog: (show: boolean) => void;
-};
+    toolCall: ChatCompletionMessageToolCall | undefined
+  ) => void
+  updateLoadingModel: (loading: boolean) => void
+  updateTools: (tools: MCPTool[]) => void
+  setAbortController: (threadId: string, controller: AbortController) => void
+  updateTokenSpeed: (message: ThreadMessage) => void
+  resetTokenSpeed: () => void
+  setOutOfContextDialog: (show: boolean) => void
+}
 
 export const useAppState = create<AppState>()((set) => ({
   streamingContent: undefined,
   loadingModel: false,
   tools: [],
-  serverStatus: "stopped",
+  serverStatus: 'stopped',
   abortControllers: {},
   tokenSpeed: undefined,
   currentToolCall: undefined,
@@ -46,19 +46,19 @@ export const useAppState = create<AppState>()((set) => ({
             },
           }
         : undefined,
-    }));
-    console.log(useAppState.getState().streamingContent);
+    }))
+    console.log(useAppState.getState().streamingContent)
   },
   updateCurrentToolCall: (toolCall) => {
     set(() => ({
       currentToolCall: toolCall,
-    }));
+    }))
   },
   updateLoadingModel: (loading) => {
-    set({ loadingModel: loading });
+    set({ loadingModel: loading })
   },
   updateTools: (tools) => {
-    set({ tools });
+    set({ tools })
   },
   setServerStatus: (value) => set({ serverStatus: value }),
   setAbortController: (threadId, controller) => {
@@ -67,11 +67,11 @@ export const useAppState = create<AppState>()((set) => ({
         ...state.abortControllers,
         [threadId]: controller,
       },
-    }));
+    }))
   },
   updateTokenSpeed: (message) =>
     set((state) => {
-      const currentTimestamp = new Date().getTime(); // Get current time in milliseconds
+      const currentTimestamp = new Date().getTime() // Get current time in milliseconds
       if (!state.tokenSpeed) {
         // If this is the first update, just set the lastTimestamp and return
         return {
@@ -81,14 +81,14 @@ export const useAppState = create<AppState>()((set) => ({
             tokenCount: 1,
             message: message.id,
           },
-        };
+        }
       }
 
       const timeDiffInSeconds =
-        (currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000; // Time difference in seconds
-      const totalTokenCount = state.tokenSpeed.tokenCount + 1;
+        (currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000 // Time difference in seconds
+      const totalTokenCount = state.tokenSpeed.tokenCount + 1
       const averageTokenSpeed =
-        totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1); // Calculate average token speed
+        totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1) // Calculate average token speed
       return {
         tokenSpeed: {
           ...state.tokenSpeed,
@@ -96,7 +96,7 @@ export const useAppState = create<AppState>()((set) => ({
           tokenCount: totalTokenCount,
           message: message.id,
         },
-      };
+      }
     }),
   resetTokenSpeed: () =>
     set({
@@ -105,6 +105,6 @@ export const useAppState = create<AppState>()((set) => ({
   setOutOfContextDialog: (show) => {
     set(() => ({
       showOutOfContextDialog: show,
-    }));
+    }))
   },
-}));
+}))
diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts
index 0fbfeb5d9..c8e0fe9f1 100644
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@@ -1,12 +1,12 @@
-import { useCallback, useEffect, useMemo } from "react";
-import { usePrompt } from "./usePrompt";
-import { useModelProvider } from "./useModelProvider";
-import { useThreads } from "./useThreads";
-import { useAppState } from "./useAppState";
-import { useMessages } from "./useMessages";
-import { useRouter } from "@tanstack/react-router";
-import { defaultModel } from "@/lib/models";
-import { route } from "@/constants/routes";
+import { useCallback, useEffect, useMemo } from 'react'
+import { usePrompt } from './usePrompt'
+import { useModelProvider } from './useModelProvider'
+import { useThreads } from './useThreads'
+import { useAppState } from './useAppState'
+import { useMessages } from './useMessages'
+import { useRouter } from '@tanstack/react-router'
+import { defaultModel } from '@/lib/models'
+import { route } from '@/constants/routes'
 import {
   emptyThreadContent,
   extractToolCall,
@@ -15,23 +15,23 @@ import {
   newUserThreadContent,
   postMessageProcessing,
   sendCompletion,
-} from "@/lib/completion";
-import { CompletionMessagesBuilder } from "@/lib/messages";
-import { ChatCompletionMessageToolCall } from "openai/resources";
-import { useAssistant } from "./useAssistant";
-import { toast } from "sonner";
-import { getTools } from "@/services/mcp";
-import { MCPTool } from "@/types/completion";
-import { listen } from "@tauri-apps/api/event";
-import { SystemEvent } from "@/types/events";
-import { stopModel, startModel, stopAllModels } from "@/services/models";
+} from '@/lib/completion'
+import { CompletionMessagesBuilder } from '@/lib/messages'
+import { ChatCompletionMessageToolCall } from 'openai/resources'
+import { useAssistant } from './useAssistant'
+import { toast } from 'sonner'
+import { getTools } from '@/services/mcp'
+import { MCPTool } from '@/types/completion'
+import { listen } from '@tauri-apps/api/event'
+import { SystemEvent } from '@/types/events'
+import { stopModel, startModel, stopAllModels } from '@/services/models'
 
-import { useToolApproval } from "@/hooks/useToolApproval";
-import { useToolAvailable } from "@/hooks/useToolAvailable";
-import { OUT_OF_CONTEXT_SIZE } from "@/utils/error";
+import { useToolApproval } from '@/hooks/useToolApproval'
+import { useToolAvailable } from '@/hooks/useToolAvailable'
+import { OUT_OF_CONTEXT_SIZE } from '@/utils/error'
 
 export const useChat = () => {
-  const { prompt, setPrompt } = usePrompt();
+  const { prompt, setPrompt } = usePrompt()
   const {
     tools,
     updateTokenSpeed,
@@ -40,51 +40,51 @@ export const useChat = () => {
     updateStreamingContent,
     updateLoadingModel,
     setAbortController,
-  } = useAppState();
-  const { currentAssistant } = useAssistant();
-  const { updateProvider } = useModelProvider();
+  } = useAppState()
+  const { currentAssistant } = useAssistant()
+  const { updateProvider } = useModelProvider()
 
   const { approvedTools, showApprovalModal, allowAllMCPPermissions } =
-    useToolApproval();
-  const { getDisabledToolsForThread } = useToolAvailable();
+    useToolApproval()
+  const { getDisabledToolsForThread } = useToolAvailable()
 
   const { getProviderByName, selectedModel, selectedProvider } =
-    useModelProvider();
+    useModelProvider()
 
   const {
     getCurrentThread: retrieveThread,
     createThread,
     updateThreadTimestamp,
-  } = useThreads();
-  const { getMessages, addMessage } = useMessages();
-  const router = useRouter();
+  } = useThreads()
+  const { getMessages, addMessage } = useMessages()
+  const router = useRouter()
 
   const provider = useMemo(() => {
-    return getProviderByName(selectedProvider);
-  }, [selectedProvider, getProviderByName]);
+    return getProviderByName(selectedProvider)
+  }, [selectedProvider, getProviderByName])
 
   const currentProviderId = useMemo(() => {
-    return provider?.provider || selectedProvider;
-  }, [provider, selectedProvider]);
+    return provider?.provider || selectedProvider
+  }, [provider, selectedProvider])
 
   useEffect(() => {
     function setTools() {
       getTools().then((data: MCPTool[]) => {
-        updateTools(data);
-      });
+        updateTools(data)
+      })
     }
-    setTools();
+    setTools()
 
-    let unsubscribe = () => {};
+    let unsubscribe = () => {}
     listen(SystemEvent.MCP_UPDATE, setTools).then((unsub) => {
       // Unsubscribe from the event when the component unmounts
-      unsubscribe = unsub;
-    });
-    return unsubscribe;
-  }, [updateTools]);
+      unsubscribe = unsub
+    })
+    return unsubscribe
+  }, [updateTools])
 
   const getCurrentThread = useCallback(async () => {
-    let currentThread = retrieveThread();
+    let currentThread = retrieveThread()
     if (!currentThread) {
       currentThread = await createThread(
         {
@@ -92,14 +92,14 @@ export const useChat = () => {
           provider: selectedProvider,
         },
         prompt,
-        currentAssistant,
-      );
+        currentAssistant
+      )
       router.navigate({
         to: route.threadsDetail,
         params: { threadId: currentThread.id },
-      });
+      })
     }
-    return currentThread;
+    return currentThread
   }, [
     createThread,
     prompt,
@@ -108,7 +108,7 @@ export const useChat = () => {
     selectedModel?.id,
     selectedProvider,
     currentAssistant,
-  ]);
+  ])
 
   const increaseModelContextSize = useCallback(
     (model: Model, provider: ProviderObject) => {
@@ -118,12 +118,12 @@ export const useChat = () => {
        */
       const ctxSize = Math.max(
         model.settings?.ctx_len?.controller_props.value
-          ? typeof model.settings.ctx_len.controller_props.value === "string"
+          ? typeof model.settings.ctx_len.controller_props.value === 'string'
             ? parseInt(model.settings.ctx_len.controller_props.value as string)
             : (model.settings.ctx_len.controller_props.value as number)
           : 8192,
-        8192,
-      );
+        8192
+      )
       const updatedModel = {
         ...model,
         settings: {
@@ -136,80 +136,80 @@ export const useChat = () => {
             },
           },
         },
-      };
+      }
 
       // Find the model index in the provider's models array
-      const modelIndex = provider.models.findIndex((m) => m.id === model.id);
+      const modelIndex = provider.models.findIndex((m) => m.id === model.id)
 
       if (modelIndex !== -1) {
         // Create a copy of the provider's models array
-        const updatedModels = [...provider.models];
+        const updatedModels = [...provider.models]
 
         // Update the specific model in the array
-        updatedModels[modelIndex] = updatedModel as Model;
+        updatedModels[modelIndex] = updatedModel as Model
 
         // Update the provider with the new models array
         updateProvider(provider.provider, {
           models: updatedModels,
-        });
+        })
       }
-      stopAllModels();
+      stopAllModels()
     },
-    [updateProvider],
-  );
+    [updateProvider]
+  )
 
   const sendMessage = useCallback(
     async (
       message: string,
       showModal?: () => Promise<unknown>,
-      troubleshooting = true,
+      troubleshooting = true
     ) => {
-      const activeThread = await getCurrentThread();
+      const activeThread = await getCurrentThread()
 
-      resetTokenSpeed();
+      resetTokenSpeed()
       const activeProvider = currentProviderId
         ? getProviderByName(currentProviderId)
-        : provider;
-      if (!activeThread || !activeProvider) return;
-      const messages = getMessages(activeThread.id);
-      const abortController = new AbortController();
-      setAbortController(activeThread.id, abortController);
-      updateStreamingContent(emptyThreadContent);
+        : provider
+      if (!activeThread || !activeProvider) return
+      const messages = getMessages(activeThread.id)
+      const abortController = new AbortController()
+      setAbortController(activeThread.id, abortController)
+      updateStreamingContent(emptyThreadContent)
       // Do not add new message on retry
       if (troubleshooting)
-        addMessage(newUserThreadContent(activeThread.id, message));
-      updateThreadTimestamp(activeThread.id);
-      setPrompt("");
+        addMessage(newUserThreadContent(activeThread.id, message))
+      updateThreadTimestamp(activeThread.id)
+      setPrompt('')
       try {
         if (selectedModel?.id) {
-          updateLoadingModel(true);
+          updateLoadingModel(true)
           await startModel(
             activeProvider,
             selectedModel.id,
-            abortController,
-          ).catch(console.error);
-          updateLoadingModel(false);
+            abortController
+          ).catch(console.error)
+          updateLoadingModel(false)
         }
 
         const builder = new CompletionMessagesBuilder(
           messages,
-          currentAssistant?.instructions,
-        );
+          currentAssistant?.instructions
+        )
 
-        builder.addUserMessage(message);
+        builder.addUserMessage(message)
 
-        let isCompleted = false;
+        let isCompleted = false
 
         // Filter tools based on model capabilities and available tools for this thread
-        let availableTools = selectedModel?.capabilities?.includes("tools")
+        let availableTools = selectedModel?.capabilities?.includes('tools')
           ? tools.filter((tool) => {
-              const disabledTools = getDisabledToolsForThread(activeThread.id);
-              return !disabledTools.includes(tool.name);
+              const disabledTools = getDisabledToolsForThread(activeThread.id)
+              return !disabledTools.includes(tool.name)
             })
-          : [];
+          : []
 
         // TODO: Later replaced by Agent setup?
-        const followUpWithToolUse = true;
+        const followUpWithToolUse = true
         while (!isCompleted && !abortController.signal.aborted) {
           const completion = await sendCompletion(
             activeThread,
@@ -218,51 +218,51 @@ export const useChat = () => {
             abortController,
             availableTools,
             currentAssistant.parameters?.stream === false ? false : true,
-            currentAssistant.parameters as unknown as Record<string, object>,
+            currentAssistant.parameters as unknown as Record<string, object>
             // TODO: replace it with according provider setting later on
             // selectedProvider === 'llama.cpp' && availableTools.length > 0
             //   ? false
             //   : true
-          );
+          )
 
-          if (!completion) throw new Error("No completion received");
-          let accumulatedText = "";
-          const currentCall: ChatCompletionMessageToolCall | null = null;
-          const toolCalls: ChatCompletionMessageToolCall[] = [];
+          if (!completion) throw new Error('No completion received')
+          let accumulatedText = ''
+          const currentCall: ChatCompletionMessageToolCall | null = null
+          const toolCalls: ChatCompletionMessageToolCall[] = []
           if (isCompletionResponse(completion)) {
-            accumulatedText = completion.choices[0]?.message?.content || "";
+            accumulatedText = completion.choices[0]?.message?.content || ''
             if (completion.choices[0]?.message?.tool_calls) {
-              toolCalls.push(...completion.choices[0].message.tool_calls);
+              toolCalls.push(...completion.choices[0].message.tool_calls)
             }
           } else {
             for await (const part of completion) {
               // Error message
               if (!part.choices) {
                 throw new Error(
-                  "message" in part
+                  'message' in part
                     ? (part.message as string)
-                    : (JSON.stringify(part) ?? ""),
-                );
+                    : (JSON.stringify(part) ?? '')
+                )
               }
-              const delta = part.choices[0]?.delta?.content || "";
+              const delta = part.choices[0]?.delta?.content || ''
 
               if (part.choices[0]?.delta?.tool_calls) {
-                const calls = extractToolCall(part, currentCall, toolCalls);
+                const calls = extractToolCall(part, currentCall, toolCalls)
                 const currentContent = newAssistantThreadContent(
                   activeThread.id,
                   accumulatedText,
                   {
                     tool_calls: calls.map((e) => ({
                       ...e,
-                      state: "pending",
+                      state: 'pending',
                     })),
-                  },
-                );
-                updateStreamingContent(currentContent);
-                await new Promise((resolve) => setTimeout(resolve, 0));
+                  }
+                )
+                updateStreamingContent(currentContent)
+                await new Promise((resolve) => setTimeout(resolve, 0))
               }
               if (delta) {
-                accumulatedText += delta;
+                accumulatedText += delta
                 // Create a new object each time to avoid reference issues
                 // Use a timeout to prevent React from batching updates too quickly
                 const currentContent = newAssistantThreadContent(
@@ -271,13 +271,13 @@ export const useChat = () => {
                   {
                     tool_calls: toolCalls.map((e) => ({
                       ...e,
-                      state: "pending",
+                      state: 'pending',
                     })),
-                  },
-                );
-                updateStreamingContent(currentContent);
-                updateTokenSpeed(currentContent);
-                await new Promise((resolve) => setTimeout(resolve, 0));
+                  }
+                )
+                updateStreamingContent(currentContent)
+                updateTokenSpeed(currentContent)
+                await new Promise((resolve) => setTimeout(resolve, 0))
               }
             }
           }
@@ -286,10 +286,10 @@ export const useChat = () => {
             accumulatedText.length === 0 &&
             toolCalls.length === 0 &&
             activeThread.model?.id &&
-            activeProvider.provider === "llama.cpp"
+            activeProvider.provider === 'llama.cpp'
           ) {
-            await stopModel(activeThread.model.id, "cortex");
-            throw new Error("No response received from the model");
+            await stopModel(activeThread.model.id, 'cortex')
+            throw new Error('No response received from the model')
           }
 
           // Create a final content object for adding to the thread
@@ -298,10 +298,10 @@ export const useChat = () => {
             accumulatedText,
             {
               tokenSpeed: useAppState.getState().tokenSpeed,
-            },
-          );
+            }
+          )
 
-          builder.addAssistantMessage(accumulatedText, undefined, toolCalls);
+          builder.addAssistantMessage(accumulatedText, undefined, toolCalls)
           const updatedMessage = await postMessageProcessing(
             toolCalls,
             builder,
@@ -309,41 +309,41 @@ export const useChat = () => {
             abortController,
             approvedTools,
             allowAllMCPPermissions ? undefined : showApprovalModal,
-            allowAllMCPPermissions,
-          );
-          addMessage(updatedMessage ?? finalContent);
-          updateStreamingContent(emptyThreadContent);
-          updateThreadTimestamp(activeThread.id);
+            allowAllMCPPermissions
+          )
+          addMessage(updatedMessage ?? finalContent)
+          updateStreamingContent(emptyThreadContent)
+          updateThreadTimestamp(activeThread.id)
 
-          isCompleted = !toolCalls.length;
+          isCompleted = !toolCalls.length
           // Do not create agent loop if there is no need for it
-          if (!followUpWithToolUse) availableTools = [];
+          if (!followUpWithToolUse) availableTools = []
         }
       } catch (error) {
         const errorMessage =
-          error && typeof error === "object" && "message" in error
+          error && typeof error === 'object' && 'message' in error
             ? error.message
-            : error;
+            : error
         if (
-          typeof errorMessage === "string" &&
+          typeof errorMessage === 'string' &&
           errorMessage.includes(OUT_OF_CONTEXT_SIZE) &&
           selectedModel &&
           troubleshooting
         ) {
           showModal?.().then((confirmed) => {
             if (confirmed) {
-              increaseModelContextSize(selectedModel, activeProvider);
+              increaseModelContextSize(selectedModel, activeProvider)
               setTimeout(() => {
-                sendMessage(message, showModal, false); // Retry sending the message without troubleshooting
-              }, 1000);
+                sendMessage(message, showModal, false) // Retry sending the message without troubleshooting
+              }, 1000)
             }
-          });
+          })
         }
-        toast.error(`Error sending message: ${errorMessage}`);
-        console.error("Error sending message:", error);
+        toast.error(`Error sending message: ${errorMessage}`)
+        console.error('Error sending message:', error)
       } finally {
-        updateLoadingModel(false);
-        updateStreamingContent(undefined);
+        updateLoadingModel(false)
+        updateStreamingContent(undefined)
       }
     },
     [
@@ -368,8 +368,8 @@ export const useChat = () => {
       showApprovalModal,
       updateTokenSpeed,
       increaseModelContextSize,
-    ],
-  );
+    ]
+  )
 
-  return { sendMessage };
-};
+  return { sendMessage }
+}

From b93f8afda2205f5b7c8f88cfda1ae9a74cd511c5 Mon Sep 17 00:00:00 2001
From: LazyYuuki <huy2840@gmail.com>
Date: Sun, 15 Jun 2025 19:05:11 +0800
Subject: [PATCH 15/48] =?UTF-8?q?=F0=9F=90=9B=20fix:=20exclude=20deleted?=
 =?UTF-8?q?=20field?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/containers/ThreadContent.tsx | 1 -
 1 file changed, 1 deletion(-)

diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx
index 40c26993b..66205248a 100644
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@@ -450,7 +450,6 @@ export const ThreadContent = memo(
                   )}
 
                   <TokenSpeedIndicator
-                    messageId={item.id}
                     metadata={item.metadata}
                   />
                 </div>

From de5ddaac6d8879c7bd57b6ecda98052b0f7da617 Mon Sep 17 00:00:00 2001
From: LazyYuuki <huy2840@gmail.com>
Date: Sun, 15 Jun 2025 19:12:19 +0800
Subject: [PATCH 16/48] =?UTF-8?q?=F0=9F=A7=B9=20clean:=20all=20the=20misse?=
 =?UTF-8?q?d=20console.log?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/hooks/useAppState.ts |  1 -
 web-app/src/hooks/useMessages.ts | 41 ++++++++++++++++----------------
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/web-app/src/hooks/useAppState.ts b/web-app/src/hooks/useAppState.ts
index e3fa1753b..dc29f7f8a 100644
--- a/web-app/src/hooks/useAppState.ts
+++ b/web-app/src/hooks/useAppState.ts
@@ -47,7 +47,6 @@ export const useAppState = create<AppState>()((set) => ({
           }
         : undefined,
     }))
-    console.log(useAppState.getState().streamingContent)
   },
   updateCurrentToolCall: (toolCall) => {
     set(() => ({
diff --git a/web-app/src/hooks/useMessages.ts b/web-app/src/hooks/useMessages.ts
index 251d67438..3a83b5a48 100644
--- a/web-app/src/hooks/useMessages.ts
+++ b/web-app/src/hooks/useMessages.ts
@@ -1,23 +1,23 @@
-import { create } from "zustand";
-import { ThreadMessage } from "@janhq/core";
+import { create } from 'zustand'
+import { ThreadMessage } from '@janhq/core'
 import {
   createMessage,
   deleteMessage as deleteMessageExt,
-} from "@/services/messages";
-import { useAssistant } from "./useAssistant";
+} from '@/services/messages'
+import { useAssistant } from './useAssistant'
 
 type MessageState = {
-  messages: Record<string, ThreadMessage[]>;
-  getMessages: (threadId: string) => ThreadMessage[];
-  setMessages: (threadId: string, messages: ThreadMessage[]) => void;
-  addMessage: (message: ThreadMessage) => void;
-  deleteMessage: (threadId: string, messageId: string) => void;
-};
+  messages: Record<string, ThreadMessage[]>
+  getMessages: (threadId: string) => ThreadMessage[]
+  setMessages: (threadId: string, messages: ThreadMessage[]) => void
+  addMessage: (message: ThreadMessage) => void
+  deleteMessage: (threadId: string, messageId: string) => void
+}
 
 export const useMessages = create<MessageState>()((set, get) => ({
   messages: {},
   getMessages: (threadId) => {
-    return get().messages[threadId] || [];
+    return get().messages[threadId] || []
   },
   setMessages: (threadId, messages) => {
     set((state) => ({
@@ -25,11 +25,10 @@ export const useMessages = create<MessageState>()((set, get) => ({
         ...state.messages,
         [threadId]: messages,
       },
-    }));
+    }))
   },
   addMessage: (message) => {
-    console.log("addMessage: ", message);
-    const currentAssistant = useAssistant.getState().currentAssistant;
+    const currentAssistant = useAssistant.getState().currentAssistant
     const newMessage = {
       ...message,
       created_at: message.created_at || Date.now(),
@@ -37,7 +36,7 @@ export const useMessages = create<MessageState>()((set, get) => ({
         ...message.metadata,
         assistant: currentAssistant,
       },
-    };
+    }
     createMessage(newMessage).then((createdMessage) => {
       set((state) => ({
         messages: {
@@ -47,19 +46,19 @@ export const useMessages = create<MessageState>()((set, get) => ({
             createdMessage,
           ],
         },
-      }));
-    });
+      }))
+    })
   },
   deleteMessage: (threadId, messageId) => {
-    deleteMessageExt(threadId, messageId);
+    deleteMessageExt(threadId, messageId)
     set((state) => ({
       messages: {
         ...state.messages,
         [threadId]:
           state.messages[threadId]?.filter(
-            (message) => message.id !== messageId,
+            (message) => message.id !== messageId
           ) || [],
       },
-    }));
+    }))
   },
-}));
+}))

From e20c801ff0904ea1763a742c8f4f370643d4c374 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Sun, 15 Jun 2025 18:20:17 +0700
Subject: [PATCH 17/48] =?UTF-8?q?=E2=9C=A8enhancement:=20out=20of=20contex?=
 =?UTF-8?q?t=20troubleshooting=20(#5275)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* ✨enhancement: out of context troubleshooting

* 🔧refactor: clean up
---
 .../containers/dialogs/OutOfContextDialog.tsx |  27 +-
 web-app/src/hooks/useChat.ts                  | 232 ++++++++++++------
 .../settings/providers/$providerName.tsx      |   3 +
 web-app/src/services/models.ts                |   4 +-
 4 files changed, 188 insertions(+), 78 deletions(-)

diff --git a/web-app/src/containers/dialogs/OutOfContextDialog.tsx b/web-app/src/containers/dialogs/OutOfContextDialog.tsx
index fb01d7907..92e72950a 100644
--- a/web-app/src/containers/dialogs/OutOfContextDialog.tsx
+++ b/web-app/src/containers/dialogs/OutOfContextDialog.tsx
@@ -14,7 +14,9 @@ import { Button } from '@/components/ui/button'
 export function useOutOfContextPromiseModal() {
   const [isOpen, setIsOpen] = useState(false)
   const [modalProps, setModalProps] = useState<{
-    resolveRef: ((value: unknown) => void) | null
+    resolveRef:
+      | ((value: 'ctx_len' | 'context_shift' | undefined) => void)
+      | null
   }>({
     resolveRef: null,
   })
@@ -33,17 +35,23 @@ export function useOutOfContextPromiseModal() {
       return null
     }
 
-    const handleConfirm = () => {
+    const handleContextLength = () => {
       setIsOpen(false)
       if (modalProps.resolveRef) {
-        modalProps.resolveRef(true)
+        modalProps.resolveRef('ctx_len')
       }
     }
 
+    const handleContextShift = () => {
+      setIsOpen(false)
+      if (modalProps.resolveRef) {
+        modalProps.resolveRef('context_shift')
+      }
+    }
     const handleCancel = () => {
       setIsOpen(false)
       if (modalProps.resolveRef) {
-        modalProps.resolveRef(false)
+        modalProps.resolveRef(undefined)
       }
     }
 
@@ -64,7 +72,7 @@ export function useOutOfContextPromiseModal() {
           <DialogDescription>
             {t(
               'outOfContextError.description',
-              'This chat is reaching the AI’s memory limit, like a whiteboard filling up. We can expand the memory window (called context size) so it remembers more, but it may use more of your computer’s memory.'
+              'This chat is reaching the AI’s memory limit, like a whiteboard filling up. We can expand the memory window (called context size) so it remembers more, but it may use more of your computer’s memory. We can also truncate the input, which means it will forget some of the chat history to make room for new messages.'
             )}
             <br />
             <br />
@@ -77,14 +85,17 @@ export function useOutOfContextPromiseModal() {
             <Button
               variant="default"
               className="bg-transparent border border-main-view-fg/20 hover:bg-main-view-fg/4"
-              onClick={() => setIsOpen(false)}
+              onClick={() => {
+                handleContextShift()
+                setIsOpen(false)
+              }}
             >
-              {t('common.cancel', 'Cancel')}
+              {t('outOfContextError.truncateInput', 'Truncate Input')}
             </Button>
             <Button
               asChild
               onClick={() => {
-                handleConfirm()
+                handleContextLength()
                 setIsOpen(false)
               }}
             >
diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts
index 164555563..349b07f7a 100644
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@@ -29,6 +29,7 @@ import { stopModel, startModel, stopAllModels } from '@/services/models'
 import { useToolApproval } from '@/hooks/useToolApproval'
 import { useToolAvailable } from '@/hooks/useToolAvailable'
 import { OUT_OF_CONTEXT_SIZE } from '@/utils/error'
+import { updateSettings } from '@/services/providers'
 
 export const useChat = () => {
   const { prompt, setPrompt } = usePrompt()
@@ -110,19 +111,41 @@ export const useChat = () => {
     currentAssistant,
   ])
 
+  const restartModel = useCallback(
+    async (
+      provider: ProviderObject,
+      modelId: string,
+      abortController: AbortController
+    ) => {
+      await stopAllModels()
+      await new Promise((resolve) => setTimeout(resolve, 1000))
+      updateLoadingModel(true)
+      await startModel(provider, modelId, abortController).catch(console.error)
+      updateLoadingModel(false)
+      await new Promise((resolve) => setTimeout(resolve, 1000))
+    },
+    [updateLoadingModel]
+  )
+
   const increaseModelContextSize = useCallback(
-    (model: Model, provider: ProviderObject) => {
+    async (
+      modelId: string,
+      provider: ProviderObject,
+      controller: AbortController
+    ) => {
       /**
        * Should increase the context size of the model by 2x
        * If the context size is not set or too low, it defaults to 8192.
        */
+      const model = provider.models.find((m) => m.id === modelId)
+      if (!model) return undefined
       const ctxSize = Math.max(
         model.settings?.ctx_len?.controller_props.value
           ? typeof model.settings.ctx_len.controller_props.value === 'string'
             ? parseInt(model.settings.ctx_len.controller_props.value as string)
             : (model.settings.ctx_len.controller_props.value as number)
-          : 8192,
-        8192
+          : 16384,
+        16384
       )
       const updatedModel = {
         ...model,
@@ -153,9 +176,54 @@ export const useChat = () => {
           models: updatedModels,
         })
       }
-      stopAllModels()
+      const updatedProvider = getProviderByName(provider.provider)
+      if (updatedProvider)
+        await restartModel(updatedProvider, model.id, controller)
+
+      console.log(
+        updatedProvider?.models.find((e) => e.id === model.id)?.settings
+          ?.ctx_len?.controller_props.value
+      )
+      return updatedProvider
     },
-    [updateProvider]
+    [getProviderByName, restartModel, updateProvider]
+  )
+  const toggleOnContextShifting = useCallback(
+    async (
+      modelId: string,
+      provider: ProviderObject,
+      controller: AbortController
+    ) => {
+      const providerName = provider.provider
+      const newSettings = [...provider.settings]
+      const settingKey = 'context_shift'
+      // Handle different value types by forcing the type
+      // Use type assertion to bypass type checking
+      const settingIndex = provider.settings.findIndex(
+        (s) => s.key === settingKey
+      )
+      ;(
+        newSettings[settingIndex].controller_props as {
+          value: string | boolean | number
+        }
+      ).value = true
+
+      // Create update object with updated settings
+      const updateObj: Partial<ModelProvider> = {
+        settings: newSettings,
+      }
+
+      await updateSettings(providerName, updateObj.settings ?? [])
+      updateProvider(providerName, {
+        ...provider,
+        ...updateObj,
+      })
+      const updatedProvider = getProviderByName(providerName)
+      if (updatedProvider)
+        await restartModel(updatedProvider, modelId, controller)
+      return updatedProvider
+    },
+    [updateProvider, getProviderByName, restartModel]
   )
 
   const sendMessage = useCallback(
@@ -167,7 +235,7 @@ export const useChat = () => {
       const activeThread = await getCurrentThread()
 
       resetTokenSpeed()
-      const activeProvider = currentProviderId
+      let activeProvider = currentProviderId
         ? getProviderByName(currentProviderId)
         : provider
       if (!activeThread || !activeProvider) return
@@ -210,7 +278,11 @@ export const useChat = () => {
 
         // TODO: Later replaced by Agent setup?
         const followUpWithToolUse = true
-        while (!isCompleted && !abortController.signal.aborted) {
+        while (
+          !isCompleted &&
+          !abortController.signal.aborted &&
+          activeProvider
+        ) {
           const completion = await sendCompletion(
             activeThread,
             activeProvider,
@@ -229,56 +301,90 @@ export const useChat = () => {
           let accumulatedText = ''
           const currentCall: ChatCompletionMessageToolCall | null = null
           const toolCalls: ChatCompletionMessageToolCall[] = []
-          if (isCompletionResponse(completion)) {
-            accumulatedText = completion.choices[0]?.message?.content || ''
-            if (completion.choices[0]?.message?.tool_calls) {
-              toolCalls.push(...completion.choices[0].message.tool_calls)
-            }
-          } else {
-            for await (const part of completion) {
-              // Error message
-              if (!part.choices) {
-                throw new Error(
-                  'message' in part
-                    ? (part.message as string)
-                    : (JSON.stringify(part) ?? '')
-                )
+          try {
+            if (isCompletionResponse(completion)) {
+              accumulatedText = completion.choices[0]?.message?.content || ''
+              if (completion.choices[0]?.message?.tool_calls) {
+                toolCalls.push(...completion.choices[0].message.tool_calls)
               }
-              const delta = part.choices[0]?.delta?.content || ''
+            } else {
+              for await (const part of completion) {
+                // Error message
+                if (!part.choices) {
+                  throw new Error(
+                    'message' in part
+                      ? (part.message as string)
+                      : (JSON.stringify(part) ?? '')
+                  )
+                }
+                const delta = part.choices[0]?.delta?.content || ''
 
-              if (part.choices[0]?.delta?.tool_calls) {
-                const calls = extractToolCall(part, currentCall, toolCalls)
-                const currentContent = newAssistantThreadContent(
-                  activeThread.id,
-                  accumulatedText,
-                  {
-                    tool_calls: calls.map((e) => ({
-                      ...e,
-                      state: 'pending',
-                    })),
-                  }
-                )
-                updateStreamingContent(currentContent)
-                await new Promise((resolve) => setTimeout(resolve, 0))
+                if (part.choices[0]?.delta?.tool_calls) {
+                  const calls = extractToolCall(part, currentCall, toolCalls)
+                  const currentContent = newAssistantThreadContent(
+                    activeThread.id,
+                    accumulatedText,
+                    {
+                      tool_calls: calls.map((e) => ({
+                        ...e,
+                        state: 'pending',
+                      })),
+                    }
+                  )
+                  updateStreamingContent(currentContent)
+                  await new Promise((resolve) => setTimeout(resolve, 0))
+                }
+                if (delta) {
+                  accumulatedText += delta
+                  // Create a new object each time to avoid reference issues
+                  // Use a timeout to prevent React from batching updates too quickly
+                  const currentContent = newAssistantThreadContent(
+                    activeThread.id,
+                    accumulatedText,
+                    {
+                      tool_calls: toolCalls.map((e) => ({
+                        ...e,
+                        state: 'pending',
+                      })),
+                    }
+                  )
+                  updateStreamingContent(currentContent)
+                  updateTokenSpeed(currentContent)
+                  await new Promise((resolve) => setTimeout(resolve, 0))
+                }
               }
-              if (delta) {
-                accumulatedText += delta
-                // Create a new object each time to avoid reference issues
-                // Use a timeout to prevent React from batching updates too quickly
-                const currentContent = newAssistantThreadContent(
-                  activeThread.id,
-                  accumulatedText,
-                  {
-                    tool_calls: toolCalls.map((e) => ({
-                      ...e,
-                      state: 'pending',
-                    })),
-                  }
+            }
+          } catch (error) {
+            const errorMessage =
+              error && typeof error === 'object' && 'message' in error
+                ? error.message
+                : error
+            if (
+              typeof errorMessage === 'string' &&
+              errorMessage.includes(OUT_OF_CONTEXT_SIZE) &&
+              selectedModel &&
+              troubleshooting
+            ) {
+              const method = await showModal?.()
+              if (method === 'ctx_len') {
+                /// Increase context size
+                activeProvider = await increaseModelContextSize(
+                  selectedModel.id,
+                  activeProvider,
+                  abortController
                 )
-                updateStreamingContent(currentContent)
-                updateTokenSpeed(currentContent)
-                await new Promise((resolve) => setTimeout(resolve, 0))
-              }
+                continue
+              } else if (method === 'context_shift' && selectedModel?.id) {
+                /// Enable context_shift
+                activeProvider = await toggleOnContextShifting(
+                  selectedModel?.id,
+                  activeProvider,
+                  abortController
+                )
+                continue
+              } else throw error
+            } else {
+              throw error
             }
           }
           // TODO: Remove this check when integrating new llama.cpp extension
@@ -320,21 +426,7 @@ export const useChat = () => {
           error && typeof error === 'object' && 'message' in error
             ? error.message
             : error
-        if (
-          typeof errorMessage === 'string' &&
-          errorMessage.includes(OUT_OF_CONTEXT_SIZE) &&
-          selectedModel &&
-          troubleshooting
-        ) {
-          showModal?.().then((confirmed) => {
-            if (confirmed) {
-              increaseModelContextSize(selectedModel, activeProvider)
-              setTimeout(() => {
-                sendMessage(message, showModal, false) // Retry sending the message without troubleshooting
-              }, 1000)
-            }
-          })
-        }
+
         toast.error(`Error sending message: ${errorMessage}`)
         console.error('Error sending message:', error)
       } finally {
@@ -355,7 +447,8 @@ export const useChat = () => {
       updateThreadTimestamp,
       setPrompt,
       selectedModel,
-      currentAssistant,
+      currentAssistant?.instructions,
+      currentAssistant.parameters,
       tools,
       updateLoadingModel,
       getDisabledToolsForThread,
@@ -364,6 +457,7 @@ export const useChat = () => {
       showApprovalModal,
       updateTokenSpeed,
       increaseModelContextSize,
+      toggleOnContextShifting,
     ]
   )
 
diff --git a/web-app/src/routes/settings/providers/$providerName.tsx b/web-app/src/routes/settings/providers/$providerName.tsx
index dfdf5a0da..7ed4e3969 100644
--- a/web-app/src/routes/settings/providers/$providerName.tsx
+++ b/web-app/src/routes/settings/providers/$providerName.tsx
@@ -9,6 +9,7 @@ import {
   getActiveModels,
   importModel,
   startModel,
+  stopAllModels,
   stopModel,
 } from '@/services/models'
 import {
@@ -299,6 +300,8 @@ function ProviderDetail() {
                                 ...provider,
                                 ...updateObj,
                               })
+
+                              stopAllModels()
                             }
                           }}
                         />
diff --git a/web-app/src/services/models.ts b/web-app/src/services/models.ts
index 77fbde6de..38749eea9 100644
--- a/web-app/src/services/models.ts
+++ b/web-app/src/services/models.ts
@@ -296,7 +296,8 @@ export const startModel = async (
     normalizeProvider(provider.provider)
   )
   const modelObj = provider.models.find((m) => m.id === model)
-  if (providerObj && modelObj)
+
+  if (providerObj && modelObj) {
     return providerObj?.loadModel(
       {
         id: modelObj.id,
@@ -309,6 +310,7 @@ export const startModel = async (
       },
       abortController
     )
+  }
 }
 
 /**

From da2f97c227266e18786ef19b17f0d521751cf28d Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 16 Jun 2025 15:02:43 +0700
Subject: [PATCH 18/48] =?UTF-8?q?=E2=9C=A8enhancement:=20add=20setting=20c?=
 =?UTF-8?q?hat=20width=20container=20(#5289)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* ✨enhancement: add setting conversation width

* ✨enahncement: cleanup log and change improve accesibility

* ✨enahcement: move const beta version
---
 web-app/src/components/ui/skeleton.tsx        |  13 ++
 web-app/src/containers/ChatInput.tsx          |  19 +--
 web-app/src/containers/ChatWidthSwitcher.tsx  |  61 +++++++
 web-app/src/containers/ThreadContent.tsx      | 159 +++++++++---------
 .../src/containers/TokenSpeedIndicator.tsx    |  19 ++-
 web-app/src/containers/dialogs/AppUpdater.tsx |   2 +-
 web-app/src/hooks/useAppearance.ts            |   8 +
 web-app/src/routes/settings/appearance.tsx    |  10 ++
 web-app/src/routes/threads/$threadId.tsx      |  16 +-
 9 files changed, 202 insertions(+), 105 deletions(-)
 create mode 100644 web-app/src/components/ui/skeleton.tsx
 create mode 100644 web-app/src/containers/ChatWidthSwitcher.tsx

diff --git a/web-app/src/components/ui/skeleton.tsx b/web-app/src/components/ui/skeleton.tsx
new file mode 100644
index 000000000..9efe5678e
--- /dev/null
+++ b/web-app/src/components/ui/skeleton.tsx
@@ -0,0 +1,13 @@
+import { cn } from '@/lib/utils'
+
+function Skeleton({ className, ...props }: React.ComponentProps<'div'>) {
+  return (
+    <div
+      data-slot="skeleton"
+      className={cn('bg-main-view-fg/10', className)}
+      {...props}
+    />
+  )
+}
+
+export { Skeleton }
diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx
index a83adc59e..7737652eb 100644
--- a/web-app/src/containers/ChatInput.tsx
+++ b/web-app/src/containers/ChatInput.tsx
@@ -21,7 +21,6 @@ import {
   IconTool,
   IconCodeCircle2,
   IconPlayerStopFilled,
-  IconBrandSpeedtest,
   IconX,
 } from '@tabler/icons-react'
 import { useTranslation } from 'react-i18next'
@@ -45,12 +44,7 @@ type ChatInputProps = {
   initialMessage?: boolean
 }
 
-const ChatInput = ({
-  model,
-  className,
-  showSpeedToken = true,
-  initialMessage,
-}: ChatInputProps) => {
+const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
   const textareaRef = useRef<HTMLTextAreaElement>(null)
   const [isFocused, setIsFocused] = useState(false)
   const [rows, setRows] = useState(1)
@@ -60,7 +54,7 @@ const ChatInput = ({
   const { currentThreadId } = useThreads()
   const { t } = useTranslation()
   const { spellCheckChatInput } = useGeneralSetting()
-  const { tokenSpeed } = useAppState()
+
   const { showModal, PromiseModal: OutOfContextModal } =
     useOutOfContextPromiseModal()
   const maxRows = 10
@@ -559,15 +553,6 @@ const ChatInput = ({
                   </TooltipProvider>
                 )}
               </div>
-
-              {showSpeedToken && (
-                <div className="flex items-center gap-1 text-main-view-fg/60 text-xs">
-                  <IconBrandSpeedtest size={18} />
-                  <span>
-                    {Math.round(tokenSpeed?.tokenSpeed ?? 0)} tokens/sec
-                  </span>
-                </div>
-              )}
             </div>
 
             {streamingContent ? (
diff --git a/web-app/src/containers/ChatWidthSwitcher.tsx b/web-app/src/containers/ChatWidthSwitcher.tsx
new file mode 100644
index 000000000..ddaf4d4fe
--- /dev/null
+++ b/web-app/src/containers/ChatWidthSwitcher.tsx
@@ -0,0 +1,61 @@
+import { Skeleton } from '@/components/ui/skeleton'
+import { useAppearance } from '@/hooks/useAppearance'
+import { cn } from '@/lib/utils'
+import { IconCircleCheckFilled } from '@tabler/icons-react'
+
+export function ChatWidthSwitcher() {
+  const { chatWidth, setChatWidth } = useAppearance()
+
+  return (
+    <div className="flex gap-4">
+      <button
+        className={cn(
+          'w-full overflow-hidden border border-main-view-fg/10 rounded-md my-2 pb-2 cursor-pointer',
+          chatWidth === 'compact' && 'border-accent'
+        )}
+        onClick={() => setChatWidth('compact')}
+      >
+        <div className="flex items-center justify-between px-4 py-2 bg-main-view-fg/10">
+          <span className="font-medium text-xs font-sans">Compact Width</span>
+          {chatWidth === 'compact' && (
+            <IconCircleCheckFilled className="size-4 text-accent" />
+          )}
+        </div>
+        <div className="overflow-auto p-2">
+          <div className="flex flex-col px-10 gap-2 mt-2">
+            <Skeleton className="h-2 w-full rounded-full" />
+            <Skeleton className="h-2 w-full rounded-full" />
+            <Skeleton className="h-2 w-full rounded-full" />
+            <div className="bg-main-view-fg/10 h-8 px-4 w-full flex-shrink-0 border-none resize-none outline-0 rounded-2xl flex items-center">
+              <span className="text-main-view-fg/50">Ask me anything...</span>
+            </div>
+          </div>
+        </div>
+      </button>
+      <button
+        className={cn(
+          'w-full overflow-hidden border border-main-view-fg/10 rounded-md my-2 pb-2 cursor-pointer',
+          chatWidth === 'full' && 'border-accent'
+        )}
+        onClick={() => setChatWidth('full')}
+      >
+        <div className="flex items-center justify-between px-4 py-2 bg-main-view-fg/10">
+          <span className="font-medium text-xs font-sans">Full Width</span>
+          {chatWidth === 'full' && (
+            <IconCircleCheckFilled className="size-4 text-accent" />
+          )}
+        </div>
+        <div className="overflow-auto p-2">
+          <div className="flex flex-col gap-2 mt-2">
+            <Skeleton className="h-2 w-full rounded-full" />
+            <Skeleton className="h-2 w-full rounded-full" />
+            <Skeleton className="h-2 w-full rounded-full" />
+            <div className="bg-main-view-fg/10 h-8 px-4 w-full flex-shrink-0 border-none resize-none outline-0 rounded-2xl flex items-center">
+              <span className="text-main-view-fg/50">Ask me anything...</span>
+            </div>
+          </div>
+        </div>
+      </button>
+    </div>
+  )
+}
diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx
index 66205248a..af359d32e 100644
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@@ -359,97 +359,98 @@ export const ThreadContent = memo(
 
             {!isToolCalls && (
               <div className="flex items-center gap-2 mt-2 text-main-view-fg/60 text-xs">
-                <div
-                  className={cn(
-                    'flex items-center gap-2',
-                    item.isLastMessage &&
-                    streamingContent &&
-                    'opacity-0 visibility-hidden pointer-events-none'
-                  )}
-                >
-                  <CopyButton text={item.content?.[0]?.text.value || ''} />
-                  <Tooltip>
-                    <TooltipTrigger asChild>
-                      <button
-                        className="flex items-center gap-1 hover:text-accent transition-colors cursor-pointer group relative"
-                        onClick={() => {
-                          removeMessage()
-                        }}
-                      >
-                        <IconTrash size={16} />
-                      </button>
-                    </TooltipTrigger>
-                    <TooltipContent>
-                      <p>Delete</p>
-                    </TooltipContent>
-                  </Tooltip>
-                  <Dialog>
-                    <DialogTrigger>
-                      <Tooltip>
-                        <TooltipTrigger asChild>
-                          <div className="outline-0 focus:outline-0 flex items-center gap-1 hover:text-accent transition-colors cursor-pointer group relative">
-                            <IconInfoCircle size={16} />
-                          </div>
-                        </TooltipTrigger>
-                        <TooltipContent>
-                          <p>Metadata</p>
-                        </TooltipContent>
-                      </Tooltip>
-                    </DialogTrigger>
-                    <DialogContent>
-                      <DialogHeader>
-                        <DialogTitle>Message Metadata</DialogTitle>
-                        <div className="space-y-2">
-                          <div className="border border-main-view-fg/10 rounded-md overflow-hidden">
-                            <CodeEditor
-                              value={JSON.stringify(
-                                item.metadata || {},
-                                null,
-                                2
-                              )}
-                              language="json"
-                              readOnly
-                              style={{
-                                fontFamily: 'ui-monospace',
-                                backgroundColor: 'transparent',
-                                height: '100%',
-                              }}
-                              className="w-full h-full !text-sm"
-                            />
-                          </div>
-                        </div>
-                        <DialogFooter className="mt-2 flex items-center">
-                          <DialogClose asChild>
-                            <Button
-                              variant="link"
-                              size="sm"
-                              className="hover:no-underline"
-                            >
-                              Close
-                            </Button>
-                          </DialogClose>
-                        </DialogFooter>
-                      </DialogHeader>
-                    </DialogContent>
-                  </Dialog>
-
-                  {item.isLastMessage && (
+                <div className={cn('flex items-center gap-2')}>
+                  <div
+                    className={cn(
+                      'flex items-center gap-2',
+                      item.isLastMessage && streamingContent && 'hidden'
+                    )}
+                  >
+                    <CopyButton text={item.content?.[0]?.text.value || ''} />
                     <Tooltip>
                       <TooltipTrigger asChild>
                         <button
                           className="flex items-center gap-1 hover:text-accent transition-colors cursor-pointer group relative"
-                          onClick={regenerate}
+                          onClick={() => {
+                            removeMessage()
+                          }}
                         >
-                          <IconRefresh size={16} />
+                          <IconTrash size={16} />
                         </button>
                       </TooltipTrigger>
                       <TooltipContent>
-                        <p>Regenerate</p>
+                        <p>Delete</p>
                       </TooltipContent>
                     </Tooltip>
-                  )}
+                    <Dialog>
+                      <DialogTrigger>
+                        <Tooltip>
+                          <TooltipTrigger asChild>
+                            <div className="outline-0 focus:outline-0 flex items-center gap-1 hover:text-accent transition-colors cursor-pointer group relative">
+                              <IconInfoCircle size={16} />
+                            </div>
+                          </TooltipTrigger>
+                          <TooltipContent>
+                            <p>Metadata</p>
+                          </TooltipContent>
+                        </Tooltip>
+                      </DialogTrigger>
+                      <DialogContent>
+                        <DialogHeader>
+                          <DialogTitle>Message Metadata</DialogTitle>
+                          <div className="space-y-2">
+                            <div className="border border-main-view-fg/10 rounded-md overflow-hidden">
+                              <CodeEditor
+                                value={JSON.stringify(
+                                  item.metadata || {},
+                                  null,
+                                  2
+                                )}
+                                language="json"
+                                readOnly
+                                style={{
+                                  fontFamily: 'ui-monospace',
+                                  backgroundColor: 'transparent',
+                                  height: '100%',
+                                }}
+                                className="w-full h-full !text-sm"
+                              />
+                            </div>
+                          </div>
+                          <DialogFooter className="mt-2 flex items-center">
+                            <DialogClose asChild>
+                              <Button
+                                variant="link"
+                                size="sm"
+                                className="hover:no-underline"
+                              >
+                                Close
+                              </Button>
+                            </DialogClose>
+                          </DialogFooter>
+                        </DialogHeader>
+                      </DialogContent>
+                    </Dialog>
+
+                    {item.isLastMessage && (
+                      <Tooltip>
+                        <TooltipTrigger asChild>
+                          <button
+                            className="flex items-center gap-1 hover:text-accent transition-colors cursor-pointer group relative"
+                            onClick={regenerate}
+                          >
+                            <IconRefresh size={16} />
+                          </button>
+                        </TooltipTrigger>
+                        <TooltipContent>
+                          <p>Regenerate</p>
+                        </TooltipContent>
+                      </Tooltip>
+                    )}
+                  </div>
 
                   <TokenSpeedIndicator
+                    streaming={Boolean(item.isLastMessage && streamingContent)}
                     metadata={item.metadata}
                   />
                 </div>
diff --git a/web-app/src/containers/TokenSpeedIndicator.tsx b/web-app/src/containers/TokenSpeedIndicator.tsx
index b1dfb841c..5309d890c 100644
--- a/web-app/src/containers/TokenSpeedIndicator.tsx
+++ b/web-app/src/containers/TokenSpeedIndicator.tsx
@@ -1,19 +1,28 @@
-import { IconBrandSpeedtest } from '@tabler/icons-react'
+import { useAppState } from '@/hooks/useAppState'
+import { Gauge } from 'lucide-react'
 
 interface TokenSpeedIndicatorProps {
   metadata?: Record<string, unknown>
+  streaming?: boolean
 }
 
 export const TokenSpeedIndicator = ({
-  metadata
+  metadata,
+  streaming,
 }: TokenSpeedIndicatorProps) => {
-  const persistedTokenSpeed = (metadata?.tokenSpeed as { tokenSpeed: number })?.tokenSpeed
+  const { tokenSpeed } = useAppState()
+  const persistedTokenSpeed = (metadata?.tokenSpeed as { tokenSpeed: number })
+    ?.tokenSpeed
 
   return (
     <div className="flex items-center gap-1 text-main-view-fg/60 text-xs">
-      <IconBrandSpeedtest size={16} />
+      <Gauge size={16} />
+
       <span>
-        {Math.round(persistedTokenSpeed)} tokens/sec
+        {Math.round(
+          streaming ? Number(tokenSpeed?.tokenSpeed) : persistedTokenSpeed
+        )}
+        &nbsp;tokens/sec
       </span>
     </div>
   )
diff --git a/web-app/src/containers/dialogs/AppUpdater.tsx b/web-app/src/containers/dialogs/AppUpdater.tsx
index 02a84accb..a17876a15 100644
--- a/web-app/src/containers/dialogs/AppUpdater.tsx
+++ b/web-app/src/containers/dialogs/AppUpdater.tsx
@@ -22,8 +22,8 @@ const DialogAppUpdater = () => {
     setRemindMeLater(true)
   }
 
-  const beta = VERSION.includes('beta')
   const nightly = VERSION.includes('-')
+  const beta = VERSION.includes('beta')
 
   const { release, fetchLatestRelease } = useReleaseNotes()
 
diff --git a/web-app/src/hooks/useAppearance.ts b/web-app/src/hooks/useAppearance.ts
index 60340e542..51a1ce10e 100644
--- a/web-app/src/hooks/useAppearance.ts
+++ b/web-app/src/hooks/useAppearance.ts
@@ -6,8 +6,10 @@ import { rgb, oklch, formatCss } from 'culori'
 import { useTheme } from './useTheme'
 
 export type FontSize = '14px' | '15px' | '16px' | '18px'
+export type ChatWidth = 'full' | 'compact'
 
 interface AppearanceState {
+  chatWidth: ChatWidth
   fontSize: FontSize
   appBgColor: RgbaColor
   appMainViewBgColor: RgbaColor
@@ -19,6 +21,7 @@ interface AppearanceState {
   appAccentTextColor: string
   appDestructiveTextColor: string
   appLeftPanelTextColor: string
+  setChatWidth: (size: ChatWidth) => void
   setFontSize: (size: FontSize) => void
   setAppBgColor: (color: RgbaColor) => void
   setAppMainViewBgColor: (color: RgbaColor) => void
@@ -129,6 +132,7 @@ export const useAppearance = create<AppearanceState>()(
   persist(
     (set) => {
       return {
+        chatWidth: 'compact',
         fontSize: defaultFontSize,
         appBgColor: defaultAppBgColor,
         appMainViewBgColor: defaultAppMainViewBgColor,
@@ -270,6 +274,10 @@ export const useAppearance = create<AppearanceState>()(
           })
         },
 
+        setChatWidth: (value: ChatWidth) => {
+          set({ chatWidth: value })
+        },
+
         setFontSize: (size: FontSize) => {
           // Update CSS variable
           document.documentElement.style.setProperty('--font-size-base', size)
diff --git a/web-app/src/routes/settings/appearance.tsx b/web-app/src/routes/settings/appearance.tsx
index d59abb9b3..21b99c73e 100644
--- a/web-app/src/routes/settings/appearance.tsx
+++ b/web-app/src/routes/settings/appearance.tsx
@@ -18,6 +18,7 @@ import CodeBlockStyleSwitcher from '@/containers/CodeBlockStyleSwitcher'
 import { LineNumbersSwitcher } from '@/containers/LineNumbersSwitcher'
 import { CodeBlockExample } from '@/containers/CodeBlockExample'
 import { toast } from 'sonner'
+import { ChatWidthSwitcher } from '@/containers/ChatWidthSwitcher'
 
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 export const Route = createFileRoute(route.settings.appearance as any)({
@@ -98,6 +99,15 @@ function Appareances() {
               />
             </Card>
 
+            {/* Chat Message */}
+            <Card>
+              <CardItem
+                title="Chat Width"
+                description="Choose the width of the chat area to customize your conversation view."
+              />
+              <ChatWidthSwitcher />
+            </Card>
+
             {/* Codeblock */}
             <Card>
               <CardItem
diff --git a/web-app/src/routes/threads/$threadId.tsx b/web-app/src/routes/threads/$threadId.tsx
index 2f18f6094..bb8e9a72c 100644
--- a/web-app/src/routes/threads/$threadId.tsx
+++ b/web-app/src/routes/threads/$threadId.tsx
@@ -35,7 +35,7 @@ function ThreadDetail() {
   const { setCurrentAssistant, assistants } = useAssistant()
   const { setMessages } = useMessages()
   const { streamingContent } = useAppState()
-  const { appMainViewBgColor } = useAppearance()
+  const { appMainViewBgColor, chatWidth } = useAppearance()
 
   const { messages } = useMessages(
     useShallow((state) => ({
@@ -213,7 +213,12 @@ function ThreadDetail() {
             'flex flex-col h-full w-full overflow-auto px-4 pt-4 pb-3'
           )}
         >
-          <div className="w-4/6 mx-auto flex max-w-full flex-col grow">
+          <div
+            className={cn(
+              'w-4/6 mx-auto flex max-w-full flex-col grow',
+              chatWidth === 'compact' ? 'w-4/6' : 'w-full'
+            )}
+          >
             {messages &&
               messages.map((item, index) => {
                 // Only pass isLastMessage to the last message in the array
@@ -247,7 +252,12 @@ function ThreadDetail() {
             <StreamingContent threadId={threadId} />
           </div>
         </div>
-        <div className="w-4/6 mx-auto pt-2 pb-3 shrink-0 relative">
+        <div
+          className={cn(
+            ' mx-auto pt-2 pb-3 shrink-0 relative',
+            chatWidth === 'compact' ? 'w-4/6' : 'w-full px-3'
+          )}
+        >
           <div
             className={cn(
               'absolute z-0 -top-6 h-8 py-1 flex w-full justify-center pointer-events-none opacity-0 visibility-hidden',

From 8e921ab521b363c0cf826903a133e56423efff47 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 16 Jun 2025 16:28:54 +0700
Subject: [PATCH 19/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20optional=20additional?=
 =?UTF-8?q?=5Finformation=20gpu=20(#5291)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/routes/settings/hardware.tsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/web-app/src/routes/settings/hardware.tsx b/web-app/src/routes/settings/hardware.tsx
index 3b35ddc65..dcd301773 100644
--- a/web-app/src/routes/settings/hardware.tsx
+++ b/web-app/src/routes/settings/hardware.tsx
@@ -97,7 +97,7 @@ function SortableGPUItem({ gpu, index }: { gpu: GPU; index: number }) {
           title="Driver Version"
           actions={
             <span className="text-main-view-fg/80">
-              {gpu.additional_information.driver_version}
+              {gpu.additional_information?.driver_version}
             </span>
           }
         />
@@ -105,7 +105,7 @@ function SortableGPUItem({ gpu, index }: { gpu: GPU; index: number }) {
           title="Compute Capability"
           actions={
             <span className="text-main-view-fg/80">
-              {gpu.additional_information.compute_cap}
+              {gpu.additional_information?.compute_cap}
             </span>
           }
         />

From 9b1f206cc63ec8e472661ccf9bcf85a962d8c899 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 16 Jun 2025 17:14:38 +0700
Subject: [PATCH 20/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20showing=20release=20no?=
 =?UTF-8?q?tes=20for=20beta=20and=20prod=20(#5292)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 🐛fix: showing release notes for beta and prod

* ♻️refactor: make an utils env

* ♻️refactor: hide MCP for production

* ♻️refactor: simplify the boolean expression fetch release note
---
 web-app/src/containers/SettingsMenu.tsx       | 14 ++++++++++----
 web-app/src/containers/dialogs/AppUpdater.tsx | 16 +++++++---------
 web-app/src/lib/version.ts                    |  5 +++++
 3 files changed, 22 insertions(+), 13 deletions(-)
 create mode 100644 web-app/src/lib/version.ts

diff --git a/web-app/src/containers/SettingsMenu.tsx b/web-app/src/containers/SettingsMenu.tsx
index 4494aa44c..c23ed6acf 100644
--- a/web-app/src/containers/SettingsMenu.tsx
+++ b/web-app/src/containers/SettingsMenu.tsx
@@ -2,6 +2,7 @@ import { Link, useMatches } from '@tanstack/react-router'
 import { route } from '@/constants/routes'
 import { useTranslation } from 'react-i18next'
 import { useModelProvider } from '@/hooks/useModelProvider'
+import { isProd } from '@/lib/version'
 
 const menuSettings = [
   {
@@ -24,10 +25,15 @@ const menuSettings = [
     title: 'Hardware',
     route: route.settings.hardware,
   },
-  {
-    title: 'MCP Servers',
-    route: route.settings.mcp_servers,
-  },
+  // Only show MCP Servers in non-production environment
+  ...(!isProd
+    ? [
+        {
+          title: 'MCP Servers',
+          route: route.settings.mcp_servers,
+        },
+      ]
+    : []),
   {
     title: 'Local API Server',
     route: route.settings.local_api_server,
diff --git a/web-app/src/containers/dialogs/AppUpdater.tsx b/web-app/src/containers/dialogs/AppUpdater.tsx
index a17876a15..313937c34 100644
--- a/web-app/src/containers/dialogs/AppUpdater.tsx
+++ b/web-app/src/containers/dialogs/AppUpdater.tsx
@@ -7,6 +7,7 @@ import { useState, useEffect } from 'react'
 import { useReleaseNotes } from '@/hooks/useReleaseNotes'
 import { RenderMarkdown } from '../RenderMarkdown'
 import { cn, isDev } from '@/lib/utils'
+import { isNightly, isBeta } from '@/lib/version'
 
 const DialogAppUpdater = () => {
   const {
@@ -22,16 +23,13 @@ const DialogAppUpdater = () => {
     setRemindMeLater(true)
   }
 
-  const nightly = VERSION.includes('-')
-  const beta = VERSION.includes('beta')
-
   const { release, fetchLatestRelease } = useReleaseNotes()
 
   useEffect(() => {
     if (!isDev()) {
-      fetchLatestRelease(beta ? true : false)
+      fetchLatestRelease(isBeta)
     }
-  }, [beta, fetchLatestRelease])
+  }, [fetchLatestRelease])
 
   // Check for updates when component mounts
   useEffect(() => {
@@ -71,7 +69,7 @@ const DialogAppUpdater = () => {
                   <div className="text-base font-medium">
                     New Version: Jan {updateState.updateInfo?.version}
                   </div>
-                  <div className="mt-1 text-main-view-fg/70 font-normal">
+                  <div className="mt-1 text-main-view-fg/70 font-normal mb-2">
                     There's a new app update available to download.
                   </div>
                 </div>
@@ -79,9 +77,9 @@ const DialogAppUpdater = () => {
             </div>
 
             {showReleaseNotes && (
-              <div className="max-h-[500px] py-2 overflow-y-scroll px-4 text-sm font-normal leading-relaxed">
-                {nightly ? (
-                  <p className="mt-2 text-sm font-normal">
+              <div className="max-h-[500px] p-4 w-[400px] overflow-y-scroll  text-sm font-normal leading-relaxed">
+                {isNightly && !isBeta ? (
+                  <p className="text-sm font-normal">
                     You are using a nightly build. This version is built from
                     the latest development branch and may not have release
                     notes.
diff --git a/web-app/src/lib/version.ts b/web-app/src/lib/version.ts
new file mode 100644
index 000000000..f8e7df5b0
--- /dev/null
+++ b/web-app/src/lib/version.ts
@@ -0,0 +1,5 @@
+import { isDev } from './utils'
+
+export const isNightly = VERSION.includes('-')
+export const isBeta = VERSION.includes('beta')
+export const isProd = !isNightly && !isBeta && !isDev

From 41814547995c319d069f36ed139e95b79eb1f775 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 16 Jun 2025 18:45:26 +0700
Subject: [PATCH 21/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20typo=20in=20build=20ty?=
 =?UTF-8?q?pe=20check=20(#5297)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/lib/version.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web-app/src/lib/version.ts b/web-app/src/lib/version.ts
index f8e7df5b0..fbbe217b3 100644
--- a/web-app/src/lib/version.ts
+++ b/web-app/src/lib/version.ts
@@ -2,4 +2,4 @@ import { isDev } from './utils'
 
 export const isNightly = VERSION.includes('-')
 export const isBeta = VERSION.includes('beta')
-export const isProd = !isNightly && !isBeta && !isDev
+export const isProd = !isNightly && !isBeta && !isDev()

From 4dfc9f604a5a2dc6229d4bede83cee8f72ede6d1 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 16 Jun 2025 20:53:48 +0700
Subject: [PATCH 22/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20remove=20onboarding=20?=
 =?UTF-8?q?local=20model=20and=20hide=20the=20edit=20capabilities=20model?=
 =?UTF-8?q?=20(#5301)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 🐛fix: remove onboarding local model and hide the edit capabilities model

* ♻️refactor: conditional search params setup screen
---
 web-app/src/containers/SetupScreen.tsx                |  5 +++--
 .../src/routes/settings/providers/$providerName.tsx   | 11 +++++++----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/web-app/src/containers/SetupScreen.tsx b/web-app/src/containers/SetupScreen.tsx
index 6f3f07873..cf8e32c84 100644
--- a/web-app/src/containers/SetupScreen.tsx
+++ b/web-app/src/containers/SetupScreen.tsx
@@ -3,6 +3,7 @@ import { useModelProvider } from '@/hooks/useModelProvider'
 import { Link } from '@tanstack/react-router'
 import { route } from '@/constants/routes'
 import HeaderPage from './HeaderPage'
+import { isProd } from '@/lib/version'
 
 function SetupScreen() {
   const { providers } = useModelProvider()
@@ -19,7 +20,7 @@ function SetupScreen() {
               Welcome to Jan
             </h1>
             <p className="text-main-view-fg/70 text-lg mt-2">
-              To get started, you’ll need to either download a local AI model or
+              To get started, you'll need to either download a local AI model or
               connect to a cloud model using an API key
             </p>
           </div>
@@ -29,7 +30,7 @@ function SetupScreen() {
                 <Link
                   to={route.hub}
                   search={{
-                    step: 'setup_local_provider',
+                    ...(!isProd ? { step: 'setup_local_provider' } : {}),
                   }}
                 >
                   <div>
diff --git a/web-app/src/routes/settings/providers/$providerName.tsx b/web-app/src/routes/settings/providers/$providerName.tsx
index 7ed4e3969..3d8f67af5 100644
--- a/web-app/src/routes/settings/providers/$providerName.tsx
+++ b/web-app/src/routes/settings/providers/$providerName.tsx
@@ -39,6 +39,7 @@ import { toast } from 'sonner'
 import { ActiveModel } from '@/types/models'
 import { useEffect, useState } from 'react'
 import { predefinedProviders } from '@/mock/data'
+import { isProd } from '@/lib/version'
 
 // as route.threadsDetail
 export const Route = createFileRoute('/settings/providers/$providerName')({
@@ -459,10 +460,12 @@ function ProviderDetail() {
                           }
                           actions={
                             <div className="flex items-center gap-1">
-                              <DialogEditModel
-                                provider={provider}
-                                modelId={model.id}
-                              />
+                              {!isProd && (
+                                <DialogEditModel
+                                  provider={provider}
+                                  modelId={model.id}
+                                />
+                              )}
                               {model.settings && (
                                 <ModelSetting
                                   provider={provider}

From f7b53da898b68c9465398a32cf4d71541d37e02d Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 16 Jun 2025 21:50:23 +0700
Subject: [PATCH 23/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20hide=20token=20speed?=
 =?UTF-8?q?=20when=20assistant=20params=20stream=20false=20(#5302)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/containers/ThreadContent.tsx       | 11 -----------
 web-app/src/containers/TokenSpeedIndicator.tsx | 11 ++++++++++-
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx
index af359d32e..3079f3833 100644
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@@ -417,17 +417,6 @@ export const ThreadContent = memo(
                               />
                             </div>
                           </div>
-                          <DialogFooter className="mt-2 flex items-center">
-                            <DialogClose asChild>
-                              <Button
-                                variant="link"
-                                size="sm"
-                                className="hover:no-underline"
-                              >
-                                Close
-                              </Button>
-                            </DialogClose>
-                          </DialogFooter>
                         </DialogHeader>
                       </DialogContent>
                     </Dialog>
diff --git a/web-app/src/containers/TokenSpeedIndicator.tsx b/web-app/src/containers/TokenSpeedIndicator.tsx
index 5309d890c..8ca08d707 100644
--- a/web-app/src/containers/TokenSpeedIndicator.tsx
+++ b/web-app/src/containers/TokenSpeedIndicator.tsx
@@ -14,10 +14,19 @@ export const TokenSpeedIndicator = ({
   const persistedTokenSpeed = (metadata?.tokenSpeed as { tokenSpeed: number })
     ?.tokenSpeed
 
+  const nonStreamingAssistantParam =
+    typeof metadata?.assistant === 'object' &&
+    metadata?.assistant !== null &&
+    'parameters' in metadata.assistant
+      ? (metadata.assistant as { parameters?: { stream?: boolean } }).parameters
+          ?.stream === false
+      : undefined
+
+  if (nonStreamingAssistantParam) return
+
   return (
     <div className="flex items-center gap-1 text-main-view-fg/60 text-xs">
       <Gauge size={16} />
-
       <span>
         {Math.round(
           streaming ? Number(tokenSpeed?.tokenSpeed) : persistedTokenSpeed

From a745d24fbe7d42aaba87c407f05288567de3db90 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Tue, 17 Jun 2025 10:00:55 +0700
Subject: [PATCH 24/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20glitch=20padding=20spe?=
 =?UTF-8?q?ed=20token=20(#5307)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/containers/ThreadContent.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx
index 3079f3833..9e491bf55 100644
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@@ -358,7 +358,7 @@ export const ThreadContent = memo(
             ) : null}
 
             {!isToolCalls && (
-              <div className="flex items-center gap-2 mt-2 text-main-view-fg/60 text-xs">
+              <div className="flex items-center gap-2 text-main-view-fg/60 text-xs">
                 <div className={cn('flex items-center gap-2')}>
                   <div
                     className={cn(

From f0ec3e03d134077cb2e495d991d26d64a3597f03 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Tue, 17 Jun 2025 12:46:23 +0700
Subject: [PATCH 25/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20immediately=20show=20d?=
 =?UTF-8?q?ownload=20progress=20(#5308)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/containers/DownloadManegement.tsx | 69 +++++++++++++------
 web-app/src/hooks/useDownloadStore.ts         | 18 +++++
 web-app/src/routes/hub.tsx                    | 30 ++++++--
 3 files changed, 90 insertions(+), 27 deletions(-)

diff --git a/web-app/src/containers/DownloadManegement.tsx b/web-app/src/containers/DownloadManegement.tsx
index 47b448485..db78181c6 100644
--- a/web-app/src/containers/DownloadManegement.tsx
+++ b/web-app/src/containers/DownloadManegement.tsx
@@ -19,7 +19,13 @@ export function DownloadManagement() {
   const { setProviders } = useModelProvider()
   const { open: isLeftPanelOpen } = useLeftPanel()
   const [isPopoverOpen, setIsPopoverOpen] = useState(false)
-  const { downloads, updateProgress, removeDownload } = useDownloadStore()
+  const {
+    downloads,
+    updateProgress,
+    localDownloadingModels,
+    removeDownload,
+    removeLocalDownloadingModel,
+  } = useDownloadStore()
   const { updateState } = useAppUpdater()
 
   const [appUpdateState, setAppUpdateState] = useState({
@@ -76,23 +82,36 @@ export function DownloadManagement() {
     })
   }, [])
 
+  const downloadProcesses = useMemo(() => {
+    // Get downloads with progress data
+    const downloadsWithProgress = Object.values(downloads).map((download) => ({
+      id: download.name,
+      name: download.name,
+      progress: download.progress,
+      current: download.current,
+      total: download.total,
+    }))
+
+    // Add local downloading models that don't have progress data yet
+    const localDownloadsWithoutProgress = Array.from(localDownloadingModels)
+      .filter((modelId) => !downloads[modelId]) // Only include models not in downloads
+      .map((modelId) => ({
+        id: modelId,
+        name: modelId,
+        progress: 0,
+        current: 0,
+        total: 0,
+      }))
+
+    return [...downloadsWithProgress, ...localDownloadsWithoutProgress]
+  }, [downloads, localDownloadingModels])
+
   const downloadCount = useMemo(() => {
-    const modelDownloads = Object.keys(downloads).length
+    const modelDownloads = downloadProcesses.length
     const appUpdateDownload = appUpdateState.isDownloading ? 1 : 0
     const total = modelDownloads + appUpdateDownload
     return total
-  }, [downloads, appUpdateState.isDownloading])
-  const downloadProcesses = useMemo(
-    () =>
-      Object.values(downloads).map((download) => ({
-        id: download.name,
-        name: download.name,
-        progress: download.progress,
-        current: download.current,
-        total: download.total,
-      })),
-    [downloads]
-  )
+  }, [downloadProcesses, appUpdateState.isDownloading])
 
   const overallProgress = useMemo(() => {
     const modelTotal = downloadProcesses.reduce((acc, download) => {
@@ -139,29 +158,32 @@ export function DownloadManagement() {
     (state: DownloadState) => {
       console.debug('onFileDownloadError', state)
       removeDownload(state.modelId)
+      removeLocalDownloadingModel(state.modelId)
     },
-    [removeDownload]
+    [removeDownload, removeLocalDownloadingModel]
   )
 
   const onFileDownloadStopped = useCallback(
     (state: DownloadState) => {
       console.debug('onFileDownloadError', state)
       removeDownload(state.modelId)
+      removeLocalDownloadingModel(state.modelId)
     },
-    [removeDownload]
+    [removeDownload, removeLocalDownloadingModel]
   )
 
   const onFileDownloadSuccess = useCallback(
     async (state: DownloadState) => {
       console.debug('onFileDownloadSuccess', state)
       removeDownload(state.modelId)
+      removeLocalDownloadingModel(state.modelId)
       getProviders().then(setProviders)
       toast.success('Download Complete', {
         id: 'download-complete',
         description: `The model ${state.modelId} has been downloaded`,
       })
     },
-    [removeDownload, setProviders]
+    [removeDownload, removeLocalDownloadingModel, setProviders]
   )
 
   useEffect(() => {
@@ -264,12 +286,16 @@ export function DownloadManagement() {
                     />
                     <p className="text-main-view-fg/60 text-xs">
                       {`${renderGB(appUpdateState.downloadedBytes)} / ${renderGB(appUpdateState.totalBytes)}`}{' '}
-                      GB ({Math.round(appUpdateState.downloadProgress * 100)}%)
+                      GB ({Math.round(appUpdateState.downloadProgress * 100)}
+                      %)
                     </p>
                   </div>
                 )}
                 {downloadProcesses.map((download) => (
-                  <div className="bg-main-view-fg/4 rounded-md p-2">
+                  <div
+                    key={download.id}
+                    className="bg-main-view-fg/4 rounded-md p-2"
+                  >
                     <div className="flex items-center justify-between">
                       <p className="truncate text-main-view-fg/80">
                         {download.name}
@@ -299,8 +325,9 @@ export function DownloadManagement() {
                       className="my-2"
                     />
                     <p className="text-main-view-fg/60 text-xs">
-                      {`${renderGB(download.current)} / ${renderGB(download.total)}`}{' '}
-                      GB ({Math.round(download.progress * 100)}%)
+                      {download.total > 0
+                        ? `${renderGB(download.current)} / ${renderGB(download.total)} GB (${Math.round(download.progress * 100)}%)`
+                        : 'Initializing download...'}
                     </p>
                   </div>
                 ))}
diff --git a/web-app/src/hooks/useDownloadStore.ts b/web-app/src/hooks/useDownloadStore.ts
index 8a0e6ac19..48a5a347c 100644
--- a/web-app/src/hooks/useDownloadStore.ts
+++ b/web-app/src/hooks/useDownloadStore.ts
@@ -11,6 +11,7 @@ export interface DownloadProgressProps {
 // Zustand store for thinking block state
 export type DownloadState = {
   downloads: { [id: string]: DownloadProgressProps }
+  localDownloadingModels: Set<string>
   removeDownload: (id: string) => void
   updateProgress: (
     id: string,
@@ -19,6 +20,8 @@ export type DownloadState = {
     current?: number,
     total?: number
   ) => void
+  addLocalDownloadingModel: (modelId: string) => void
+  removeLocalDownloadingModel: (modelId: string) => void
 }
 
 /**
@@ -26,6 +29,7 @@ export type DownloadState = {
  */
 export const useDownloadStore = create<DownloadState>((set) => ({
   downloads: {},
+  localDownloadingModels: new Set(),
   removeDownload: (id: string) =>
     set((state) => {
       // eslint-disable-next-line @typescript-eslint/no-unused-vars
@@ -46,4 +50,18 @@ export const useDownloadStore = create<DownloadState>((set) => ({
         },
       },
     })),
+
+  addLocalDownloadingModel: (modelId: string) =>
+    set((state) => ({
+      localDownloadingModels: new Set(state.localDownloadingModels).add(
+        modelId
+      ),
+    })),
+
+  removeLocalDownloadingModel: (modelId: string) =>
+    set((state) => {
+      const newSet = new Set(state.localDownloadingModels)
+      newSet.delete(modelId)
+      return { localDownloadingModels: newSet }
+    }),
 }))
diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx
index c4327fb04..42147d0f8 100644
--- a/web-app/src/routes/hub.tsx
+++ b/web-app/src/routes/hub.tsx
@@ -182,7 +182,8 @@ function Hub() {
     }
   }
 
-  const { downloads } = useDownloadStore()
+  const { downloads, localDownloadingModels, addLocalDownloadingModel } =
+    useDownloadStore()
 
   const downloadProcesses = useMemo(
     () =>
@@ -225,7 +226,9 @@ function Hub() {
         model.models.find((e) =>
           defaultModelQuantizations.some((m) => e.id.toLowerCase().includes(m))
         )?.id ?? model.models[0]?.id
-      const isDownloading = downloadProcesses.some((e) => e.id === modelId)
+      const isDownloading =
+        localDownloadingModels.has(modelId) ||
+        downloadProcesses.some((e) => e.id === modelId)
       const downloadProgress =
         downloadProcesses.find((e) => e.id === modelId)?.progress || 0
       const isDownloaded = llamaProvider?.models.some(
@@ -233,6 +236,12 @@ function Hub() {
       )
       const isRecommended = isRecommendedModel(model.metadata?.id)
 
+      const handleDownload = () => {
+        // Immediately set local downloading state
+        addLocalDownloadingModel(modelId)
+        downloadModel(modelId)
+      }
+
       return (
         <div
           className={cn(
@@ -255,7 +264,7 @@ function Hub() {
           ) : (
             <Button
               size="sm"
-              onClick={() => downloadModel(modelId)}
+              onClick={handleDownload}
               className={cn(isDownloading && 'hidden')}
               ref={isRecommended ? downloadButtonRef : undefined}
             >
@@ -271,6 +280,8 @@ function Hub() {
     handleUseModel,
     isRecommendedModel,
     downloadButtonRef,
+    localDownloadingModels,
+    addLocalDownloadingModel,
   ])
 
   const { step } = useSearch({ from: Route.id })
@@ -320,7 +331,8 @@ function Hub() {
   }
 
   // Check if any model is currently downloading
-  const isDownloading = downloadProcesses.length > 0
+  const isDownloading =
+    localDownloadingModels.size > 0 || downloadProcesses.length > 0
 
   const steps = [
     {
@@ -553,6 +565,9 @@ function Hub() {
                                       </p>
                                       {(() => {
                                         const isDownloading =
+                                          localDownloadingModels.has(
+                                            variant.id
+                                          ) ||
                                           downloadProcesses.some(
                                             (e) => e.id === variant.id
                                           )
@@ -607,9 +622,12 @@ function Hub() {
                                           <div
                                             className="size-6 cursor-pointer flex items-center justify-center rounded hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
                                             title="Download model"
-                                            onClick={() =>
+                                            onClick={() => {
+                                              addLocalDownloadingModel(
+                                                variant.id
+                                              )
                                               downloadModel(variant.id)
-                                            }
+                                            }}
                                           >
                                             <IconDownload
                                               size={16}

From 321fb858dd65fa412cd43b6928c993cc69d435f1 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Tue, 17 Jun 2025 13:15:40 +0700
Subject: [PATCH 26/48] =?UTF-8?q?=F0=9F=90=9Bfix:safely=20convert=20values?=
 =?UTF-8?q?=20to=20numbers=20and=20handle=20NaN=20cases=20(#5309)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/containers/TokenSpeedIndicator.tsx | 3 ++-
 web-app/src/utils/number.ts                    | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100644 web-app/src/utils/number.ts

diff --git a/web-app/src/containers/TokenSpeedIndicator.tsx b/web-app/src/containers/TokenSpeedIndicator.tsx
index 8ca08d707..74f277f70 100644
--- a/web-app/src/containers/TokenSpeedIndicator.tsx
+++ b/web-app/src/containers/TokenSpeedIndicator.tsx
@@ -1,4 +1,5 @@
 import { useAppState } from '@/hooks/useAppState'
+import { toNumber } from '@/utils/number'
 import { Gauge } from 'lucide-react'
 
 interface TokenSpeedIndicatorProps {
@@ -29,7 +30,7 @@ export const TokenSpeedIndicator = ({
       <Gauge size={16} />
       <span>
         {Math.round(
-          streaming ? Number(tokenSpeed?.tokenSpeed) : persistedTokenSpeed
+          streaming ? toNumber(tokenSpeed?.tokenSpeed) : persistedTokenSpeed
         )}
         &nbsp;tokens/sec
       </span>
diff --git a/web-app/src/utils/number.ts b/web-app/src/utils/number.ts
new file mode 100644
index 000000000..866755ed6
--- /dev/null
+++ b/web-app/src/utils/number.ts
@@ -0,0 +1,4 @@
+export const toNumber = (value: unknown): number => {
+  const num = Number(value)
+  return isNaN(num) ? 0 : num
+}

From 50c25b65b66ebb2020e880c8999f847ce368243a Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 17 Jun 2025 13:35:53 +0700
Subject: [PATCH 27/48] chore: correct binary name for stable version (#5303)
 (#5311)

Co-authored-by: hiento09 <136591877+hiento09@users.noreply.github.com>
---
 .github/workflows/template-tauri-build-windows-x64.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/template-tauri-build-windows-x64.yml b/.github/workflows/template-tauri-build-windows-x64.yml
index 47b5663cb..d17546955 100644
--- a/.github/workflows/template-tauri-build-windows-x64.yml
+++ b/.github/workflows/template-tauri-build-windows-x64.yml
@@ -160,6 +160,9 @@ jobs:
             
             sed -i "s/jan_productname/Jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template
             sed -i "s/jan_mainbinaryname/jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template
+          else
+            sed -i "s/jan_productname/Jan/g" ./src-tauri/tauri.bundle.windows.nsis.template
+            sed -i "s/jan_mainbinaryname/jan/g" ./src-tauri/tauri.bundle.windows.nsis.template
           fi
           echo "---------nsis.template---------"
           cat ./src-tauri/tauri.bundle.windows.nsis.template

From 2899d58ad788d787cd6dcbb8970ab522757cacd1 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 17 Jun 2025 15:30:07 +0700
Subject: [PATCH 28/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20llama.cpp=20default=20?=
 =?UTF-8?q?NGL=20setting=20does=20not=20offload=20all=20layers=20to=20GPU?=
 =?UTF-8?q?=20(#5310)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 🐛fix: llama.cpp default NGL setting does not offload all layers to GPU

* chore: cover more cases

* chore: clean up

* fix: should not show GPU section on Mac
---
 core/src/browser/models/utils.ts              |  2 +-
 .../inference-cortex-extension/src/index.ts   |  6 ++-
 web-app/src/routes/settings/hardware.tsx      | 50 ++++++++++---------
 3 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/core/src/browser/models/utils.ts b/core/src/browser/models/utils.ts
index 0e52441b2..2ac243b6a 100644
--- a/core/src/browser/models/utils.ts
+++ b/core/src/browser/models/utils.ts
@@ -17,7 +17,7 @@ export const validationRules: { [key: string]: (value: any) => boolean } = {
   presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
 
   ctx_len: (value: any) => Number.isInteger(value) && value >= 0,
-  ngl: (value: any) => Number.isInteger(value) && value >= 0,
+  ngl: (value: any) => Number.isInteger(value),
   embedding: (value: any) => typeof value === 'boolean',
   n_parallel: (value: any) => Number.isInteger(value) && value >= 0,
   cpu_threads: (value: any) => Number.isInteger(value) && value >= 0,
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 3e8b60ebe..d80bad3d4 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -253,11 +253,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
         }
       }
     }
+    const modelSettings = extractModelLoadParams(model.settings)
     return await this.apiInstance().then((api) =>
       api
         .post('v1/models/start', {
           json: {
-            ...extractModelLoadParams(model.settings),
+            ...modelSettings,
             model: model.id,
             engine:
               model.engine === 'nitro' // Legacy model cache
@@ -282,6 +283,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
             ...(this.context_shift === false
               ? { 'no-context-shift': true }
               : {}),
+            ...(modelSettings.ngl === -1 || modelSettings.ngl === undefined
+              ? { ngl: 100 }
+              : {}),
           },
           timeout: false,
           signal,
diff --git a/web-app/src/routes/settings/hardware.tsx b/web-app/src/routes/settings/hardware.tsx
index dcd301773..53de1f9b3 100644
--- a/web-app/src/routes/settings/hardware.tsx
+++ b/web-app/src/routes/settings/hardware.tsx
@@ -371,30 +371,34 @@ function Hardware() {
             )}
 
             {/* GPU Information */}
-            <Card title="GPUs">
-              {hardwareData.gpus.length > 0 ? (
-                <DndContext
-                  sensors={sensors}
-                  collisionDetection={closestCenter}
-                  onDragEnd={handleDragEnd}
-                >
-                  <SortableContext
-                    items={hardwareData.gpus.map((gpu) => gpu.id)}
-                    strategy={verticalListSortingStrategy}
+            {!IS_MACOS ? (
+              <Card title="GPUs">
+                {hardwareData.gpus.length > 0 ? (
+                  <DndContext
+                    sensors={sensors}
+                    collisionDetection={closestCenter}
+                    onDragEnd={handleDragEnd}
                   >
-                    {hardwareData.gpus.map((gpu, index) => (
-                      <SortableGPUItem
-                        key={gpu.id || index}
-                        gpu={gpu}
-                        index={index}
-                      />
-                    ))}
-                  </SortableContext>
-                </DndContext>
-              ) : (
-                <CardItem title="No GPUs detected" actions={<></>} />
-              )}
-            </Card>
+                    <SortableContext
+                      items={hardwareData.gpus.map((gpu) => gpu.id)}
+                      strategy={verticalListSortingStrategy}
+                    >
+                      {hardwareData.gpus.map((gpu, index) => (
+                        <SortableGPUItem
+                          key={gpu.id || index}
+                          gpu={gpu}
+                          index={index}
+                        />
+                      ))}
+                    </SortableContext>
+                  </DndContext>
+                ) : (
+                  <CardItem title="No GPUs detected" actions={<></>} />
+                )}
+              </Card>
+            ) : (
+              <></>
+            )}
           </div>
         </div>
       </div>

From 86079074d232ef832d134f5b3aad95112c4fe623 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 17 Jun 2025 17:37:31 +0700
Subject: [PATCH 29/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20update=20default=20ext?=
 =?UTF-8?q?ension=20settings=20(#5315)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: update default extension settings

* chore: hide language setting on Prod
---
 .../resources/default_settings.json                   |  2 +-
 extensions/inference-cortex-extension/src/index.ts    |  4 ++--
 web-app/src/hooks/useChat.ts                          |  4 ----
 web-app/src/routes/settings/general.tsx               | 11 +++++++----
 4 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/extensions/inference-cortex-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json
index d27624639..54d578293 100644
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
@@ -14,7 +14,7 @@
     "description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.",
     "controllerType": "checkbox",
     "controllerProps": {
-      "value": true
+      "value": false
     }
   },
   {
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index d80bad3d4..dd78e2d62 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -64,7 +64,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
   cpu_threads?: number
   auto_unload_models: boolean = true
   reasoning_budget = -1 // Default reasoning budget in seconds
-  context_shift = true
+  context_shift = false
   /**
    * The URL for making inference requests.
    */
@@ -132,7 +132,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
     this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
     this.context_shift = await this.getSetting<boolean>(
       Settings.context_shift,
-      true
+      false
     )
     this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
     if (this.caching_enabled)
diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts
index 4b0e3e6e8..2c8f9fd2a 100644
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@@ -180,10 +180,6 @@ export const useChat = () => {
       if (updatedProvider)
         await restartModel(updatedProvider, model.id, controller)
 
-      console.log(
-        updatedProvider?.models.find((e) => e.id === model.id)?.settings
-          ?.ctx_len?.controller_props.value
-      )
       return updatedProvider
     },
     [getProviderByName, restartModel, updateProvider]
diff --git a/web-app/src/routes/settings/general.tsx b/web-app/src/routes/settings/general.tsx
index 572d7f916..33eafc341 100644
--- a/web-app/src/routes/settings/general.tsx
+++ b/web-app/src/routes/settings/general.tsx
@@ -45,6 +45,7 @@ import { isDev } from '@/lib/utils'
 import { emit } from '@tauri-apps/api/event'
 import { stopAllModels } from '@/services/models'
 import { SystemEvent } from '@/types/events'
+import { isProd } from '@/lib/version'
 
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 export const Route = createFileRoute(route.settings.general as any)({
@@ -234,10 +235,12 @@ function General() {
                   </Button>
                 }
               />
-              <CardItem
-                title={t('common.language')}
-                actions={<LanguageSwitcher />}
-              />
+              {!isProd && (
+                <CardItem
+                  title={t('common.language')}
+                  actions={<LanguageSwitcher />}
+                />
+              )}
             </Card>
 
             {/* Data folder */}

From 11d767633e1340c040499930d9ba07218237771a Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Tue, 17 Jun 2025 18:35:02 +0700
Subject: [PATCH 30/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20allow=20script=20posth?=
 =?UTF-8?q?og=20(#5316)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src-tauri/tauri.conf.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json
index 1810c0661..04191e842 100644
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@@ -45,7 +45,7 @@
         ],
         "img-src": "'self' asset: http://asset.localhost blob: data: https://cdn.jsdelivr.net",
         "style-src": "'unsafe-inline' 'self' https://fonts.googleapis.com",
-        "script-src": "'self' asset: $APPDATA/**.* http://asset.localhost"
+        "script-src": "'self' asset: $APPDATA/**.* http://asset.localhost https://eu-assets.i.posthog.com https://posthog.com"
       },
       "assetProtocol": {
         "enable": true,

From ebc268018bdf457408eefcfbf0e50c6ab3c5df1a Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 17 Jun 2025 21:08:56 +0700
Subject: [PATCH 31/48] Sync 0.5.18 to 0.6.0 (#5320)

* chore: correct binary name for stable version (#5303)

* ci: enable devtool on prod build (#5317)

* ci: enable devtool on prod build

---------

Co-authored-by: hiento09 <136591877+hiento09@users.noreply.github.com>
Co-authored-by: Nguyen Ngoc Minh <91668012+Minh141120@users.noreply.github.com>
---
 .github/workflows/template-tauri-build-linux-x64.yml   | 4 ++++
 .github/workflows/template-tauri-build-macos.yml       | 4 ++++
 .github/workflows/template-tauri-build-windows-x64.yml | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/.github/workflows/template-tauri-build-linux-x64.yml b/.github/workflows/template-tauri-build-linux-x64.yml
index 6c47c79f2..9356c3f28 100644
--- a/.github/workflows/template-tauri-build-linux-x64.yml
+++ b/.github/workflows/template-tauri-build-linux-x64.yml
@@ -122,6 +122,10 @@ jobs:
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
           mv /tmp/package.json web-app/package.json
 
+          # Temporarily enable devtool on prod build
+          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
+          cat ./src-tauri/Cargo.toml
+
           ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
           cat ./src-tauri/Cargo.toml
 
diff --git a/.github/workflows/template-tauri-build-macos.yml b/.github/workflows/template-tauri-build-macos.yml
index 6999ff77e..086e14ad2 100644
--- a/.github/workflows/template-tauri-build-macos.yml
+++ b/.github/workflows/template-tauri-build-macos.yml
@@ -126,6 +126,10 @@ jobs:
           ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
           cat ./src-tauri/Cargo.toml
 
+          # Temporarily enable devtool on prod build
+          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
+          cat ./src-tauri/Cargo.toml
+
           # Change app name for beta and nightly builds
           if [ "${{ inputs.channel }}" != "stable" ]; then
             jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
diff --git a/.github/workflows/template-tauri-build-windows-x64.yml b/.github/workflows/template-tauri-build-windows-x64.yml
index d17546955..2ab6d7ad9 100644
--- a/.github/workflows/template-tauri-build-windows-x64.yml
+++ b/.github/workflows/template-tauri-build-windows-x64.yml
@@ -137,6 +137,10 @@ jobs:
           sed -i "s/jan_version/$new_base_version/g" ./src-tauri/tauri.bundle.windows.nsis.template
           sed -i "s/jan_build/$new_build_version/g" ./src-tauri/tauri.bundle.windows.nsis.template          
 
+          # Temporarily enable devtool on prod build
+          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
+          cat ./src-tauri/Cargo.toml
+
           # Change app name for beta and nightly builds
           if [ "${{ inputs.channel }}" != "stable" ]; then
             jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json

From 52bb54d2d50ceee87e72d6d30cd085bd729c5f04 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 17 Jun 2025 23:01:22 +0700
Subject: [PATCH 32/48] fix: glitch model download issue (#5322)

---
 .../model-extension/resources/default.json    | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/extensions/model-extension/resources/default.json b/extensions/model-extension/resources/default.json
index 32bc278e4..bd7c7e63b 100644
--- a/extensions/model-extension/resources/default.json
+++ b/extensions/model-extension/resources/default.json
@@ -125,59 +125,59 @@
     },
     "models": [
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-iQ4_XS.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-iQ4_XS.gguf",
         "size": 2270750400
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_L.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q3_K_L.gguf",
         "size": 2239784384
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_M.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q3_K_M.gguf",
         "size": 2075616704
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_S.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q3_K_S.gguf",
         "size": 1886995904
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q4_0.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_0.gguf",
         "size": 2369545664
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q4_1.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_1.gguf",
         "size": 2596627904
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q4_K_M.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_K_M.gguf",
         "size": 2497279424
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q4_K_S.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_K_S.gguf",
         "size": 2383308224
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q5_0.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_0.gguf",
         "size": 2823710144
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q5_1.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_1.gguf",
         "size": 3050792384
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q5_K_M.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_K_M.gguf",
         "size": 2889512384
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q5_K_S.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_K_S.gguf",
         "size": 2823710144
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q6_K.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q6_K.gguf",
         "size": 3306259904
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q8_0.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q8_0.gguf",
         "size": 4280403904
       }
     ]

From 3f07358125457a95e3f8cd04d4dee2326a0b7770 Mon Sep 17 00:00:00 2001
From: Nguyen Ngoc Minh <91668012+Minh141120@users.noreply.github.com>
Date: Tue, 17 Jun 2025 23:43:15 +0700
Subject: [PATCH 33/48] =?UTF-8?q?=F0=9F=90=9B=20fix(updater):=20terminate?=
 =?UTF-8?q?=20sidecar=20processes=20before=20update=20to=20avoid=20file=20?=
 =?UTF-8?q?access=20errors=20(#5325)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src-tauri/src/core/setup.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src-tauri/src/core/setup.rs b/src-tauri/src/core/setup.rs
index 6883b2a3b..602080378 100644
--- a/src-tauri/src/core/setup.rs
+++ b/src-tauri/src/core/setup.rs
@@ -291,6 +291,7 @@ pub fn setup_sidecar(app: &App) -> Result<(), String> {
                 } else {
                     log::warn!("Kill event received, but no active sidecar process found to kill.");
                 }
+                clean_up()
             });
         });
 

From 1160ea140b4fe6774da849bf24e574b0af69820f Mon Sep 17 00:00:00 2001
From: Sam Hoang Van <samhv.ict@gmail.com>
Date: Wed, 18 Jun 2025 00:23:53 +0700
Subject: [PATCH 34/48] =?UTF-8?q?=F0=9F=90=9B=20fix:=20disable=20sorting?=
 =?UTF-8?q?=20for=20threads=20in=20SortableItem=20and=20clean=20up=20threa?=
 =?UTF-8?q?d=20order=20handling=20(#5326)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/containers/ThreadList.tsx | 44 ++-------------------------
 web-app/src/hooks/useThreads.ts       | 35 +--------------------
 2 files changed, 3 insertions(+), 76 deletions(-)

diff --git a/web-app/src/containers/ThreadList.tsx b/web-app/src/containers/ThreadList.tsx
index 44f8bbfc4..0702c1bf9 100644
--- a/web-app/src/containers/ThreadList.tsx
+++ b/web-app/src/containers/ThreadList.tsx
@@ -9,7 +9,6 @@ import {
 import {
   SortableContext,
   verticalListSortingStrategy,
-  arrayMove,
   useSortable,
 } from '@dnd-kit/sortable'
 import { CSS } from '@dnd-kit/utilities'
@@ -54,7 +53,7 @@ const SortableItem = memo(({ thread }: { thread: Thread }) => {
     transform,
     transition,
     isDragging,
-  } = useSortable({ id: thread.id })
+  } = useSortable({ id: thread.id, disabled: true })
 
   const style = {
     transform: CSS.Transform.toString(transform),
@@ -263,18 +262,8 @@ type ThreadListProps = {
 }
 
 function ThreadList({ threads }: ThreadListProps) {
-  const { setThreads } = useThreads()
-
   const sortedThreads = useMemo(() => {
     return threads.sort((a, b) => {
-      // If both have order, sort by order (ascending, so lower order comes first)
-      if (a.order != null && b.order != null) {
-        return a.order - b.order
-      }
-      // If only one has order, prioritize the one with order (order comes first)
-      if (a.order != null) return -1
-      if (b.order != null) return 1
-      // If neither has order, sort by updated time (newer threads first)
       return (b.updated || 0) - (a.updated || 0)
     })
   }, [threads])
@@ -290,36 +279,7 @@ function ThreadList({ threads }: ThreadListProps) {
   )
 
   return (
-    <DndContext
-      sensors={sensors}
-      collisionDetection={closestCenter}
-      onDragEnd={(event) => {
-        const { active, over } = event
-        if (active.id !== over?.id && over) {
-          // Access Global State
-          const allThreadsMap = useThreads.getState().threads
-          const allThreadsArray = Object.values(allThreadsMap)
-
-          // Calculate Global Indices
-          const oldIndexInGlobal = allThreadsArray.findIndex(
-            (t) => t.id === active.id
-          )
-          const newIndexInGlobal = allThreadsArray.findIndex(
-            (t) => t.id === over.id
-          )
-
-          // Reorder Globally and Update State
-          if (oldIndexInGlobal !== -1 && newIndexInGlobal !== -1) {
-            const reorderedGlobalThreads = arrayMove(
-              allThreadsArray,
-              oldIndexInGlobal,
-              newIndexInGlobal
-            )
-            setThreads(reorderedGlobalThreads)
-          }
-        }
-      }}
-    >
+    <DndContext sensors={sensors} collisionDetection={closestCenter}>
       <SortableContext
         items={sortedThreads.map((t) => t.id)}
         strategy={verticalListSortingStrategy}
diff --git a/web-app/src/hooks/useThreads.ts b/web-app/src/hooks/useThreads.ts
index 806749b73..7bc46675e 100644
--- a/web-app/src/hooks/useThreads.ts
+++ b/web-app/src/hooks/useThreads.ts
@@ -32,11 +32,9 @@ export const useThreads = create<ThreadState>()((set, get) => ({
   threads: {},
   searchIndex: null,
   setThreads: (threads) => {
-    threads.forEach((thread, index) => {
-      thread.order = index + 1
+    threads.forEach((thread) => {
       updateThread({
         ...thread,
-        order: index + 1,
       })
     })
     const threadMap = threads.reduce(
@@ -159,7 +157,6 @@ export const useThreads = create<ThreadState>()((set, get) => ({
       id: ulid(),
       title: title ?? 'New Thread',
       model,
-      // order: 1, // Will be set properly by setThreads
       updated: Date.now() / 1000,
       assistants: assistant ? [assistant] : [],
     }
@@ -244,44 +241,14 @@ export const useThreads = create<ThreadState>()((set, get) => ({
       const thread = state.threads[threadId]
       if (!thread) return state
 
-      // If the thread is already at order 1, just update the timestamp
-      if (thread.order === 1) {
-        const updatedThread = {
-          ...thread,
-          updated: Date.now() / 1000,
-        }
-        updateThread(updatedThread)
-
-        return {
-          threads: {
-            ...state.threads,
-            [threadId]: updatedThread,
-          },
-        }
-      }
-
       // Update the thread with new timestamp and set it to order 1 (top)
       const updatedThread = {
         ...thread,
         updated: Date.now() / 1000,
-        order: 1,
       }
 
       // Update all other threads to increment their order by 1
       const updatedThreads = { ...state.threads }
-      Object.keys(updatedThreads).forEach((id) => {
-        if (id !== threadId) {
-          const otherThread = updatedThreads[id]
-          updatedThreads[id] = {
-            ...otherThread,
-            order: (otherThread.order || 1) + 1,
-          }
-          // Update the backend for other threads
-          updateThread(updatedThreads[id])
-        }
-      })
-
-      // Set the updated thread
       updatedThreads[threadId] = updatedThread
 
       // Update the backend for the main thread

From 06be778e9819e7e76c5310759832686c3feb2f9c Mon Sep 17 00:00:00 2001
From: Ramon Perez <ramonpzg@protonmail.com>
Date: Wed, 18 Jun 2025 02:17:15 +1000
Subject: [PATCH 35/48] improved wording in UI elements (#5323)

---
 web-app/src/locales/en/settings.json          |  8 ++---
 web-app/src/routes/settings/appearance.tsx    | 24 +++++++--------
 web-app/src/routes/settings/general.tsx       | 30 ++++++++++---------
 web-app/src/routes/settings/hardware.tsx      |  2 +-
 web-app/src/routes/settings/https-proxy.tsx   | 16 +++++-----
 .../src/routes/settings/local-api-server.tsx  | 18 +++++------
 web-app/src/routes/settings/privacy.tsx       | 18 +++++------
 web-app/src/routes/settings/shortcuts.tsx     | 14 ++++-----
 8 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/web-app/src/locales/en/settings.json b/web-app/src/locales/en/settings.json
index 67edc9c8b..0422c28ca 100644
--- a/web-app/src/locales/en/settings.json
+++ b/web-app/src/locales/en/settings.json
@@ -5,15 +5,15 @@
     },
     "dataFolder": {
       "appData": "App Data",
-      "appDataDesc": "Default location for messages and other user data",
+      "appDataDesc": "Default location for messages and other user data.",
       "appLogs": "App Logs",
-      "appLogsDesc": "Default location App Logs"
+      "appLogsDesc": "Default location App Logs."
     },
     "others": {
       "spellCheck": "Spell Check",
-      "spellCheckDesc": "Turn on to enable spell check chat input.",
+      "spellCheckDesc": "Enable spell check for your threads.",
       "resetFactory": "Reset To Factory Settings",
-      "resetFactoryDesc": "Restore application to its initial state, erasing all models and chat history. This action is irreversible and recommended only if the application is corrupted"
+      "resetFactoryDesc": "Restore application to its initial state, erasing all models and chat history. This action is irreversible and recommended only if the application is corrupted."
     }
   }
 }
diff --git a/web-app/src/routes/settings/appearance.tsx b/web-app/src/routes/settings/appearance.tsx
index 21b99c73e..cd3306080 100644
--- a/web-app/src/routes/settings/appearance.tsx
+++ b/web-app/src/routes/settings/appearance.tsx
@@ -43,43 +43,43 @@ function Appareances() {
             <Card title="Appearance">
               <CardItem
                 title="Theme"
-                description="Native appearance for consistent theming across OS UI elements"
+                description="Match the OS theme."
                 actions={<ThemeSwitcher />}
               />
               <CardItem
                 title="Font Size"
-                description="Adjust the size of text across the app"
+                description="Adjust the app's font size."
                 actions={<FontSizeSwitcher />}
               />
 
               <CardItem
                 title="Window Background"
-                description="Choose the App window color"
+                description="Set the app window's background color."
                 actions={<ColorPickerAppBgColor />}
               />
               <CardItem
                 title="App Main View"
-                description="Sets the background color for the main content area"
+                description="Set the main content area's background color."
                 actions={<ColorPickerAppMainView />}
               />
               <CardItem
                 title="Primary"
-                description="Controls the primary color used for components"
+                description="Set the primary color for UI components."
                 actions={<ColorPickerAppPrimaryColor />}
               />
               <CardItem
                 title="Accent"
-                description="Controls the accent color used for highlights"
+                description="Set the accent color for UI highlights."
                 actions={<ColorPickerAppAccentColor />}
               />
               <CardItem
                 title="Destructive"
-                description="Controls the color used for destructive actions"
+                description="Set the color for destructive actions."
                 actions={<ColorPickerAppDestructiveColor />}
               />
               <CardItem
                 title="Reset to Default"
-                description="Reset all colors to their default values"
+                description="Reset all appearance settings to default."
                 actions={
                   <Button
                     variant="destructive"
@@ -103,7 +103,7 @@ function Appareances() {
             <Card>
               <CardItem
                 title="Chat Width"
-                description="Choose the width of the chat area to customize your conversation view."
+                description="Customize the width of the chat view."
               />
               <ChatWidthSwitcher />
             </Card>
@@ -112,18 +112,18 @@ function Appareances() {
             <Card>
               <CardItem
                 title="Code Block"
-                description="Choose the style for code block syntax highlighting"
+                description="Choose a syntax highlighting style."
                 actions={<CodeBlockStyleSwitcher />}
               />
               <CodeBlockExample />
               <CardItem
                 title="Show Line Numbers"
-                description="Toggle line numbers in code blocks"
+                description="Display line numbers in code blocks."
                 actions={<LineNumbersSwitcher />}
               />
               <CardItem
                 title="Reset Code Block Style"
-                description="Reset code block style to default"
+                description="Reset code block style to default."
                 actions={
                   <Button
                     variant="destructive"
diff --git a/web-app/src/routes/settings/general.tsx b/web-app/src/routes/settings/general.tsx
index 33eafc341..cfdd4f298 100644
--- a/web-app/src/routes/settings/general.tsx
+++ b/web-app/src/routes/settings/general.tsx
@@ -220,7 +220,7 @@ function General() {
               />
               <CardItem
                 title="Check for Updates"
-                description="Check if a newer version of Jan is available"
+                description="Check if a newer version of Jan is available."
                 actions={
                   <Button
                     variant="link"
@@ -327,7 +327,7 @@ function General() {
                 title={t('settings.dataFolder.appLogs', {
                   ns: 'settings',
                 })}
-                description="View detailed logs of the App"
+                description="View detailed logs of the App."
                 actions={
                   <div className="flex items-center gap-2">
                     <Button
@@ -408,9 +408,9 @@ function General() {
                       <DialogHeader>
                         <DialogTitle>Factory Reset</DialogTitle>
                         <DialogDescription>
-                          Are you sure you want to reset the app to factory
-                          settings? This action is irreversible and recommended
-                          only if the application is corrupted.
+                          This will reset all app settings to their defaults.
+                          This can't be undone. We only recommend this if the
+                          app is corrupted.
                         </DialogDescription>
                         <DialogFooter className="mt-2 flex items-center">
                           <DialogClose asChild>
@@ -442,7 +442,7 @@ function General() {
             <Card title="Resources">
               <CardItem
                 title="Documentation"
-                description="Learn how to use Jan and explore its features"
+                description="Learn how to use Jan and explore its features."
                 actions={
                   <a
                     href="https://jan.ai/docs"
@@ -458,7 +458,7 @@ function General() {
               />
               <CardItem
                 title="Release Notes"
-                description="See what's new in the latest version"
+                description="See what's new in the latest version of Jan."
                 actions={
                   <a
                     href="https://github.com/menloresearch/jan/releases"
@@ -478,7 +478,7 @@ function General() {
             <Card title="Community">
               <CardItem
                 title="GitHub"
-                description="Contribute to Jan's development"
+                description="Contribute to Jan's development."
                 actions={
                   <a
                     href="https://github.com/menloresearch/jan"
@@ -496,7 +496,7 @@ function General() {
               />
               <CardItem
                 title="Discord"
-                description="Join our community for support and discussions"
+                description="Join our community for support and discussions."
                 actions={
                   <a
                     href="https://discord.com/invite/FTk2MvZwJH"
@@ -518,7 +518,7 @@ function General() {
             <Card title="Support">
               <CardItem
                 title="Report an Issue"
-                description="Found a bug? Let us know on GitHub"
+                description="Found a bug? Help us out by filing an issue on GitHub."
                 actions={
                   <a
                     href="https://github.com/menloresearch/jan/issues/new"
@@ -540,12 +540,14 @@ function General() {
                 description={
                   <div className="text-main-view-fg/70 -mt-2">
                     <p>
-                      Jan is built with ❤️ by the Jan team and contributors from
-                      around the world.
+                      Jan is built with ❤️ by the{' '}
+                      <a href="https://menlo.ai" target="_blank">
+                        Menlo Team.
+                      </a>
                     </p>
                     <p className="mt-2">
-                      Special thanks to all our open-source dependencies and the
-                      amazing AI community.
+                      Special thanks to our open-source dependencies—especially
+                      llama.cpp and Tauri—and to our amazing AI community.
                     </p>
                   </div>
                 }
diff --git a/web-app/src/routes/settings/hardware.tsx b/web-app/src/routes/settings/hardware.tsx
index 53de1f9b3..ab42a15f9 100644
--- a/web-app/src/routes/settings/hardware.tsx
+++ b/web-app/src/routes/settings/hardware.tsx
@@ -352,7 +352,7 @@ function Hardware() {
               <Card title="Vulkan">
                 <CardItem
                   title="Enable Vulkan"
-                  description="Enable Vulkan API for GPU acceleration"
+                  description="Use Vulkan API for GPU acceleration."
                   actions={
                     <div className="flex items-center gap-4">
                       <Switch
diff --git a/web-app/src/routes/settings/https-proxy.tsx b/web-app/src/routes/settings/https-proxy.tsx
index 2d8f1b3ba..f510eefa9 100644
--- a/web-app/src/routes/settings/https-proxy.tsx
+++ b/web-app/src/routes/settings/https-proxy.tsx
@@ -129,7 +129,7 @@ function HTTPSProxy() {
                 className="block"
                 description={
                   <div className="space-y-2">
-                    <p>URL and port of your proxy server.</p>
+                    <p>The URL and port of your proxy server.</p>
                     <Input
                       className="w-full"
                       placeholder="http://<user>:<password>@<domain or IP>:<port>"
@@ -144,7 +144,7 @@ function HTTPSProxy() {
                 className="block"
                 description={
                   <div className="space-y-2">
-                    <p>Credentials for your proxy server (if required).</p>
+                    <p>Credentials for the proxy server, if required.</p>
                     <div className="flex gap-2">
                       <Input
                         placeholder="Username"
@@ -181,7 +181,7 @@ function HTTPSProxy() {
                 className="block"
                 description={
                   <div className="space-y-2">
-                    <p>List of hosts that should bypass the proxy.</p>
+                    <p>A comma-separated list of hosts to bypass the proxy.</p>
                     <Input
                       placeholder="localhost, 127.0.0.1"
                       value={noProxy}
@@ -196,7 +196,7 @@ function HTTPSProxy() {
             <Card title="SSL Verification">
               <CardItem
                 title="Ignore SSL Certificates"
-                description="Allow self-signed or unverified certificates (may be required for certain proxies). Enable this reduces security. Only use this if you trust your proxy server."
+                description="Allow self-signed or unverified certificates. This may be required for some proxies but reduces security. Only enable if you trust your proxy."
                 actions={
                   <Switch
                     checked={proxyIgnoreSSL}
@@ -206,7 +206,7 @@ function HTTPSProxy() {
               />
               <CardItem
                 title="Proxy SSL"
-                description="Validate SSL certificate when connecting to the proxy server."
+                description="Validate the SSL certificate when connecting to the proxy."
                 actions={
                   <Switch
                     checked={verifyProxySSL}
@@ -216,7 +216,7 @@ function HTTPSProxy() {
               />
               <CardItem
                 title="Proxy Host SSL"
-                description="Validate SSL certificate of the proxy server host."
+                description="Validate the SSL certificate of the proxy's host."
                 actions={
                   <Switch
                     checked={verifyProxyHostSSL}
@@ -228,7 +228,7 @@ function HTTPSProxy() {
               />
               <CardItem
                 title="Peer SSL"
-                description="Validate SSL certificate of the peer connections."
+                description="Validate the SSL certificates of peer connections."
                 actions={
                   <Switch
                     checked={verifyPeerSSL}
@@ -238,7 +238,7 @@ function HTTPSProxy() {
               />
               <CardItem
                 title="Host SSL"
-                description="Validate SSL certificate of destination hosts."
+                description="Validate the SSL certificates of destination hosts."
                 actions={
                   <Switch
                     checked={verifyHostSSL}
diff --git a/web-app/src/routes/settings/local-api-server.tsx b/web-app/src/routes/settings/local-api-server.tsx
index 3bbbaf176..dd7561be5 100644
--- a/web-app/src/routes/settings/local-api-server.tsx
+++ b/web-app/src/routes/settings/local-api-server.tsx
@@ -148,7 +148,7 @@ function LocalAPIServer() {
                   <div className="w-full space-y-2">
                     <h1 className="text-base font-medium">Local API Server</h1>
                     <p className="text-main-view-fg/70 mb-2">
-                      Start an OpenAI-compatible local HTTP server.
+                      Run an OpenAI-compatible server locally.
                     </p>
                   </div>
                   <div className="flex items-center gap-2">
@@ -165,7 +165,7 @@ function LocalAPIServer() {
             >
               <CardItem
                 title="Server Logs"
-                description="View detailed logs of the local API server"
+                description="View detailed logs of the local API server."
                 actions={
                   <Button
                     variant="link"
@@ -187,7 +187,7 @@ function LocalAPIServer() {
             <Card title="Server Configuration">
               <CardItem
                 title="Server Host"
-                description="Choose between 127.0.0.1 or 0.0.0.0"
+                description="Network address for the server."
                 className={cn(
                   isServerRunning && 'opacity-50 pointer-events-none'
                 )}
@@ -195,7 +195,7 @@ function LocalAPIServer() {
               />
               <CardItem
                 title="Server Port"
-                description="Set the port number for the API server"
+                description="Port number for the API server."
                 className={cn(
                   isServerRunning && 'opacity-50 pointer-events-none'
                 )}
@@ -203,7 +203,7 @@ function LocalAPIServer() {
               />
               <CardItem
                 title="API Prefix"
-                description="Set the API endpoint prefix"
+                description="Path prefix for API endpoints."
                 className={cn(
                   isServerRunning && 'opacity-50 pointer-events-none'
                 )}
@@ -211,7 +211,7 @@ function LocalAPIServer() {
               />
               <CardItem
                 title="API Key"
-                description="Authenticate requests with an API key"
+                description="Authenticate requests with an API key."
                 className={cn(
                   isServerRunning && 'opacity-50 pointer-events-none',
                   isApiKeyEmpty && showApiKeyError && 'pb-6'
@@ -225,7 +225,7 @@ function LocalAPIServer() {
               />
               <CardItem
                 title="Trusted Hosts"
-                description="Add trusted hosts that can access the API server"
+                description="Hosts allowed to access the server, separated by commas."
                 className={cn(
                   isServerRunning && 'opacity-50 pointer-events-none'
                 )}
@@ -237,7 +237,7 @@ function LocalAPIServer() {
             <Card title="Advanced Settings">
               <CardItem
                 title="Cross-Origin Resource Sharing (CORS)"
-                description="Allow requests from different origins to access the API"
+                description="Allow cross-origin requests to the API server."
                 className={cn(
                   isServerRunning && 'opacity-50 pointer-events-none'
                 )}
@@ -250,7 +250,7 @@ function LocalAPIServer() {
               />
               <CardItem
                 title="Verbose Server Logs"
-                description="Enable detailed logging for debugging purposes"
+                description="Enable detailed server logs for debugging."
                 className={cn(
                   isServerRunning && 'opacity-50 pointer-events-none'
                 )}
diff --git a/web-app/src/routes/settings/privacy.tsx b/web-app/src/routes/settings/privacy.tsx
index 27b21e562..0958c2019 100644
--- a/web-app/src/routes/settings/privacy.tsx
+++ b/web-app/src/routes/settings/privacy.tsx
@@ -52,9 +52,9 @@ function Privacy() {
                 title="Help us improve"
                 description={
                   <p>
-                    By opting in, you help us make Jan better by sharing
-                    anonymous data, like feature usage and user counts. Your
-                    chats and personal information are never collected.
+                    To help us improve Jan, you can share anonymous data like
+                    feature usage and user counts. We never collect your chats
+                    or personal information.
                   </p>
                 }
                 align="start"
@@ -63,17 +63,15 @@ function Privacy() {
                 description={
                   <div className="text-main-view-fg/90">
                     <p>
-                      We prioritize your control over your data. Learn more
-                      about our Privacy Policy.
+                      You have full control over your data. Learn more in our
+                      Privacy Policy.
                     </p>
                     <p className="my-1">
-                      To make Jan better, we need to understand how it’s used -
-                      but only if you choose to help. You can change your Jan
-                      Analytics settings anytime.
+                      To improve Jan, we need to understand how it’s used—but
+                      only with your help. You can change this setting anytime.
                     </p>
                     <p>
-                      Your choice to opt-in or out doesn't change our core
-                      privacy promises:
+                      Your choice here won’t change our core privacy promises:
                     </p>
                     <ul className="list-disc pl-4 space-y-1 mt-4">
                       <li className="font-medium">Your chats are never read</li>
diff --git a/web-app/src/routes/settings/shortcuts.tsx b/web-app/src/routes/settings/shortcuts.tsx
index 2d46ad7c2..2aac82447 100644
--- a/web-app/src/routes/settings/shortcuts.tsx
+++ b/web-app/src/routes/settings/shortcuts.tsx
@@ -27,7 +27,7 @@ function Shortcuts() {
             <Card title="Application">
               <CardItem
                 title="New Chat"
-                description="Create a new chat conversation"
+                description="Create a new chat."
                 actions={
                   <div className="flex items-center justify-center px-3 py-1 bg-main-view-fg/5 rounded-md">
                     <span className="font-medium">
@@ -38,7 +38,7 @@ function Shortcuts() {
               />
               <CardItem
                 title="Toggle Sidebar"
-                description="Show or hide the sidebar"
+                description="Show or hide the sidebar."
                 actions={
                   <div className="flex items-center justify-center px-3 py-1 bg-main-view-fg/5 rounded-md">
                     <span className="font-medium">
@@ -49,7 +49,7 @@ function Shortcuts() {
               />
               <CardItem
                 title="Zoom In"
-                description="Increase the zoom level"
+                description="Increase the zoom level."
                 actions={
                   <div className="flex items-center justify-center px-3 py-1 bg-main-view-fg/5 rounded-md">
                     <span className="font-medium">
@@ -60,7 +60,7 @@ function Shortcuts() {
               />
               <CardItem
                 title="Zoom Out"
-                description="Decrease the zoom level"
+                description="Decrease the zoom level."
                 actions={
                   <div className="flex items-center justify-center px-3 py-1 bg-main-view-fg/5 rounded-md">
                     <span className="font-medium">
@@ -75,7 +75,7 @@ function Shortcuts() {
             <Card title="Chat">
               <CardItem
                 title="Send Message"
-                description="Send the current message"
+                description="Send the current message."
                 actions={
                   <div className="flex items-center justify-center px-3 py-1 bg-main-view-fg/5 rounded-md">
                     <span className="font-medium">Enter</span>
@@ -84,7 +84,7 @@ function Shortcuts() {
               />
               <CardItem
                 title="New Line"
-                description="Insert a new line in the message"
+                description="Insert a new line."
                 actions={
                   <div className="flex items-center justify-center px-3 py-1 bg-main-view-fg/5 rounded-md">
                     <span className="font-medium">Shift + Enter</span>
@@ -97,7 +97,7 @@ function Shortcuts() {
             <Card title="Navigation">
               <CardItem
                 title="Go to Settings"
-                description="Open the settings page"
+                description="Open settings."
                 actions={
                   <div className="flex items-center justify-center px-3 py-1 bg-main-view-fg/5 rounded-md">
                     <span className="font-medium">

From b30de3d1ae98b1e05a5db3ed2a24b307de155252 Mon Sep 17 00:00:00 2001
From: Sam Hoang Van <samhv.ict@gmail.com>
Date: Wed, 18 Jun 2025 09:39:18 +0700
Subject: [PATCH 36/48] fix: sorted-thread-not-stable (#5336)

---
 web-app/src/hooks/useThreads.ts | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/web-app/src/hooks/useThreads.ts b/web-app/src/hooks/useThreads.ts
index 7bc46675e..f8f990b44 100644
--- a/web-app/src/hooks/useThreads.ts
+++ b/web-app/src/hooks/useThreads.ts
@@ -32,11 +32,6 @@ export const useThreads = create<ThreadState>()((set, get) => ({
   threads: {},
   searchIndex: null,
   setThreads: (threads) => {
-    threads.forEach((thread) => {
-      updateThread({
-        ...thread,
-      })
-    })
     const threadMap = threads.reduce(
       (acc: Record<string, Thread>, thread) => {
         acc[thread.id] = thread

From 417b789b5ee7a30a995e36ca0125b6bbe0e10f97 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Wed, 18 Jun 2025 12:35:37 +0700
Subject: [PATCH 37/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20update=20wording=20des?=
 =?UTF-8?q?c=20vulkan=20(#5338)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 🐛fix: update wording desc vulkan

* ✨enhancement: update copy
---
 web-app/src/routes/settings/hardware.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web-app/src/routes/settings/hardware.tsx b/web-app/src/routes/settings/hardware.tsx
index ab42a15f9..beab425fe 100644
--- a/web-app/src/routes/settings/hardware.tsx
+++ b/web-app/src/routes/settings/hardware.tsx
@@ -352,7 +352,7 @@ function Hardware() {
               <Card title="Vulkan">
                 <CardItem
                   title="Enable Vulkan"
-                  description="Use Vulkan API for GPU acceleration."
+                  description="Use Vulkan API for GPU acceleration. Do not enable Vulkan if you have an NVIDIA GPU as it may cause compatibility issues."
                   actions={
                     <div className="flex items-center gap-4">
                       <Switch

From 771105a5b2f30a958112f54804d76b96445709a0 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Wed, 18 Jun 2025 12:35:42 +0700
Subject: [PATCH 38/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20handle=20NaN=20value?=
 =?UTF-8?q?=20tokenspeed=20(#5339)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/containers/TokenSpeedIndicator.tsx | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/web-app/src/containers/TokenSpeedIndicator.tsx b/web-app/src/containers/TokenSpeedIndicator.tsx
index 74f277f70..ea9f91be0 100644
--- a/web-app/src/containers/TokenSpeedIndicator.tsx
+++ b/web-app/src/containers/TokenSpeedIndicator.tsx
@@ -12,8 +12,8 @@ export const TokenSpeedIndicator = ({
   streaming,
 }: TokenSpeedIndicatorProps) => {
   const { tokenSpeed } = useAppState()
-  const persistedTokenSpeed = (metadata?.tokenSpeed as { tokenSpeed: number })
-    ?.tokenSpeed
+  const persistedTokenSpeed =
+    (metadata?.tokenSpeed as { tokenSpeed: number })?.tokenSpeed || 0
 
   const nonStreamingAssistantParam =
     typeof metadata?.assistant === 'object' &&
@@ -30,7 +30,9 @@ export const TokenSpeedIndicator = ({
       <Gauge size={16} />
       <span>
         {Math.round(
-          streaming ? toNumber(tokenSpeed?.tokenSpeed) : persistedTokenSpeed
+          streaming
+            ? toNumber(tokenSpeed?.tokenSpeed)
+            : toNumber(persistedTokenSpeed)
         )}
         &nbsp;tokens/sec
       </span>

From a075debb0538e8319377d3ba32961773f67e985e Mon Sep 17 00:00:00 2001
From: LazyYuuki <huy2840@gmail.com>
Date: Wed, 18 Jun 2025 14:02:36 +0800
Subject: [PATCH 39/48] =?UTF-8?q?=F0=9F=90=9B=20fix:=20window=20path=20pro?=
 =?UTF-8?q?blem?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src-tauri/src/core/setup.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src-tauri/src/core/setup.rs b/src-tauri/src/core/setup.rs
index 602080378..c2d3499f3 100644
--- a/src-tauri/src/core/setup.rs
+++ b/src-tauri/src/core/setup.rs
@@ -247,7 +247,10 @@ pub fn setup_sidecar(app: &App) -> Result<(), String> {
                 ]);
             #[cfg(target_os = "windows")]
             {
-                cmd = cmd.current_dir(app_handle_for_spawn.path().resource_dir().unwrap());
+                let resource_dir = app_handle_for_spawn.path().resource_dir().unwrap();
+                let normalized_path = resource_dir.to_string_lossy().replace(r"\\?\", "");
+                let normalized_pathbuf = PathBuf::from(normalized_path);
+                cmd = cmd.current_dir(normalized_pathbuf);
             }
 
             #[cfg(not(target_os = "windows"))]

From 369ba5ac75669d30599c04fdb383013e8bad2db9 Mon Sep 17 00:00:00 2001
From: Sam Hoang Van <samhv.ict@gmail.com>
Date: Wed, 18 Jun 2025 14:11:53 +0700
Subject: [PATCH 40/48] feat(server): filter /models endpoint to show only
 downloaded models (#5343)

- Add filtering logic to proxy server for GET /models requests
- Keep only models with status "downloaded" in response
- Remove Content-Length header to prevent mismatch after filtering
- Support both ListModelsResponseDto and direct array formats
- Add comprehensive tests for filtering functionality
- Fix Content-Length header conflict causing empty responses

Fixes issue where all models were returned regardless of download status.
---
 src-tauri/src/core/server.rs | 202 ++++++++++++++++++++++++++++++++++-
 1 file changed, 199 insertions(+), 3 deletions(-)

diff --git a/src-tauri/src/core/server.rs b/src-tauri/src/core/server.rs
index e5a784670..f4f270106 100644
--- a/src-tauri/src/core/server.rs
+++ b/src-tauri/src/core/server.rs
@@ -1,6 +1,7 @@
 use hyper::service::{make_service_fn, service_fn};
 use hyper::{Body, Request, Response, Server, StatusCode};
 use reqwest::Client;
+use serde_json::Value;
 use std::convert::Infallible;
 use std::net::SocketAddr;
 use std::sync::LazyLock;
@@ -263,6 +264,7 @@ async fn proxy_request(
 
     let original_path = req.uri().path();
     let path = get_destination_path(original_path, &config.prefix);
+    let method = req.method().clone();
 
     // Verify Host header (check target), but bypass for whitelisted paths
     let whitelisted_paths = ["/", "/openapi.json", "/favicon.ico"];
@@ -368,10 +370,11 @@ async fn proxy_request(
 
             let mut builder = Response::builder().status(status);
 
-            // Copy response headers, excluding CORS headers to avoid conflicts
+            // Copy response headers, excluding CORS headers and Content-Length to avoid conflicts
             for (name, value) in response.headers() {
                 // Skip CORS headers from upstream to avoid duplicates
-                if !is_cors_header(name.as_str()) {
+                // Skip Content-Length header when filtering models response to avoid mismatch
+                if !is_cors_header(name.as_str()) && name != hyper::header::CONTENT_LENGTH {
                     builder = builder.header(name, value);
                 }
             }
@@ -386,7 +389,20 @@ async fn proxy_request(
 
             // Read response body
             match response.bytes().await {
-                Ok(bytes) => Ok(builder.body(Body::from(bytes)).unwrap()),
+                Ok(bytes) => {
+                    // Check if this is a /models endpoint request and filter the response
+                    if path.contains("/models") && method == hyper::Method::GET {
+                        match filter_models_response(&bytes) {
+                            Ok(filtered_bytes) => Ok(builder.body(Body::from(filtered_bytes)).unwrap()),
+                            Err(e) => {
+                                log::warn!("Failed to filter models response: {}, returning original", e);
+                                Ok(builder.body(Body::from(bytes)).unwrap())
+                            }
+                        }
+                    } else {
+                        Ok(builder.body(Body::from(bytes)).unwrap())
+                    }
+                },
                 Err(e) => {
                     log::error!("Failed to read response body: {}", e);
                     let mut error_response =
@@ -419,6 +435,50 @@ async fn proxy_request(
     }
 }
 
+/// Filters models response to keep only models with status "downloaded"
+fn filter_models_response(bytes: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>> {
+    let response_text = std::str::from_utf8(bytes)?;
+    let mut response_json: Value = serde_json::from_str(response_text)?;
+    
+    // Check if this is a ListModelsResponseDto format with data array
+    if let Some(data_array) = response_json.get_mut("data") {
+        if let Some(models) = data_array.as_array_mut() {
+            // Keep only models where status == "downloaded"
+            models.retain(|model| {
+                if let Some(status) = model.get("status") {
+                    if let Some(status_str) = status.as_str() {
+                        status_str == "downloaded"
+                    } else {
+                        false // Remove models without string status
+                    }
+                } else {
+                    false // Remove models without status field
+                }
+            });
+            log::debug!("Filtered models response: {} downloaded models remaining", models.len());
+        }
+    } else if response_json.is_array() {
+        // Handle direct array format
+        if let Some(models) = response_json.as_array_mut() {
+            models.retain(|model| {
+                if let Some(status) = model.get("status") {
+                    if let Some(status_str) = status.as_str() {
+                        status_str == "downloaded"
+                    } else {
+                        false // Remove models without string status
+                    }
+                } else {
+                    false // Remove models without status field
+                }
+            });
+            log::debug!("Filtered models response: {} downloaded models remaining", models.len());
+        }
+    }
+    
+    let filtered_response = serde_json::to_vec(&response_json)?;
+    Ok(filtered_response)
+}
+
 /// Checks if a header is a CORS-related header that should be filtered out from upstream responses
 fn is_cors_header(header_name: &str) -> bool {
     let header_lower = header_name.to_lowercase();
@@ -585,3 +645,139 @@ pub async fn stop_server() -> Result<(), Box<dyn std::error::Error + Send + Sync
 
     Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn test_filter_models_response_with_downloaded_status() {
+        let test_response = json!({
+            "object": "list",
+            "data": [
+                {
+                    "id": "model1",
+                    "name": "Model 1",
+                    "status": "downloaded"
+                },
+                {
+                    "id": "model2",
+                    "name": "Model 2",
+                    "status": "available"
+                },
+                {
+                    "id": "model3",
+                    "name": "Model 3"
+                }
+            ]
+        });
+
+        let response_bytes = serde_json::to_vec(&test_response).unwrap();
+        let filtered_bytes = filter_models_response(&response_bytes).unwrap();
+        let filtered_response: serde_json::Value = serde_json::from_slice(&filtered_bytes).unwrap();
+
+        let data = filtered_response["data"].as_array().unwrap();
+        assert_eq!(data.len(), 1); // Should have 1 model (only model1 with "downloaded" status)
+        
+        // Verify only model1 (with "downloaded" status) is kept
+        assert!(data.iter().any(|model| model["id"] == "model1"));
+        
+        // Verify model2 and model3 are filtered out
+        assert!(!data.iter().any(|model| model["id"] == "model2"));
+        assert!(!data.iter().any(|model| model["id"] == "model3"));
+    }
+
+    #[test]
+    fn test_filter_models_response_direct_array() {
+        let test_response = json!([
+            {
+                "id": "model1",
+                "name": "Model 1",
+                "status": "downloaded"
+            },
+            {
+                "id": "model2",
+                "name": "Model 2",
+                "status": "available"
+            }
+        ]);
+
+        let response_bytes = serde_json::to_vec(&test_response).unwrap();
+        let filtered_bytes = filter_models_response(&response_bytes).unwrap();
+        let filtered_response: serde_json::Value = serde_json::from_slice(&filtered_bytes).unwrap();
+
+        let data = filtered_response.as_array().unwrap();
+        assert_eq!(data.len(), 1); // Should have 1 model (only model1 with "downloaded" status)
+        assert!(data.iter().any(|model| model["id"] == "model1"));
+        assert!(!data.iter().any(|model| model["id"] == "model2"));
+    }
+
+    #[test]
+    fn test_filter_models_response_no_status_field() {
+        let test_response = json!({
+            "object": "list",
+            "data": [
+                {
+                    "id": "model1",
+                    "name": "Model 1"
+                },
+                {
+                    "id": "model2",
+                    "name": "Model 2"
+                }
+            ]
+        });
+
+        let response_bytes = serde_json::to_vec(&test_response).unwrap();
+        let filtered_bytes = filter_models_response(&response_bytes).unwrap();
+        let filtered_response: serde_json::Value = serde_json::from_slice(&filtered_bytes).unwrap();
+
+        let data = filtered_response["data"].as_array().unwrap();
+        assert_eq!(data.len(), 0); // Should remove all models when no status field (no "downloaded" status)
+    }
+
+    #[test]
+    fn test_filter_models_response_multiple_downloaded() {
+        let test_response = json!({
+            "object": "list",
+            "data": [
+                {
+                    "id": "model1",
+                    "name": "Model 1",
+                    "status": "downloaded"
+                },
+                {
+                    "id": "model2",
+                    "name": "Model 2",
+                    "status": "available"
+                },
+                {
+                    "id": "model3",
+                    "name": "Model 3",
+                    "status": "downloaded"
+                },
+                {
+                    "id": "model4",
+                    "name": "Model 4",
+                    "status": "installing"
+                }
+            ]
+        });
+
+        let response_bytes = serde_json::to_vec(&test_response).unwrap();
+        let filtered_bytes = filter_models_response(&response_bytes).unwrap();
+        let filtered_response: serde_json::Value = serde_json::from_slice(&filtered_bytes).unwrap();
+
+        let data = filtered_response["data"].as_array().unwrap();
+        assert_eq!(data.len(), 2); // Should have 2 models (model1 and model3 with "downloaded" status)
+        
+        // Verify only models with "downloaded" status are kept
+        assert!(data.iter().any(|model| model["id"] == "model1"));
+        assert!(data.iter().any(|model| model["id"] == "model3"));
+        
+        // Verify other models are filtered out
+        assert!(!data.iter().any(|model| model["id"] == "model2"));
+        assert!(!data.iter().any(|model| model["id"] == "model4"));
+    }
+}

From 45df0b272d21566a5eb95ce87471301c7c725034 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Wed, 18 Jun 2025 14:54:56 +0700
Subject: [PATCH 41/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20render=20streaming=20t?=
 =?UTF-8?q?oken=20speed=20based=20on=20thread=20ID=20&=20assistant=20metad?=
 =?UTF-8?q?ata=20(#5346)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web-app/src/containers/ThreadContent.tsx | 12 ++++++++++--
 web-app/src/hooks/useMessages.ts         |  8 +++++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx
index 9e491bf55..076327ea6 100644
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@@ -363,7 +363,10 @@ export const ThreadContent = memo(
                   <div
                     className={cn(
                       'flex items-center gap-2',
-                      item.isLastMessage && streamingContent && 'hidden'
+                      item.isLastMessage &&
+                        streamingContent &&
+                        streamingContent.thread_id === item.thread_id &&
+                        'hidden'
                     )}
                   >
                     <CopyButton text={item.content?.[0]?.text.value || ''} />
@@ -439,7 +442,11 @@ export const ThreadContent = memo(
                   </div>
 
                   <TokenSpeedIndicator
-                    streaming={Boolean(item.isLastMessage && streamingContent)}
+                    streaming={Boolean(
+                      item.isLastMessage &&
+                        streamingContent &&
+                        streamingContent.thread_id === item.thread_id
+                    )}
                     metadata={item.metadata}
                   />
                 </div>
@@ -447,6 +454,7 @@ export const ThreadContent = memo(
             )}
           </>
         )}
+
         {item.type === 'image_url' && image && (
           <div>
             <img
diff --git a/web-app/src/hooks/useMessages.ts b/web-app/src/hooks/useMessages.ts
index 3a83b5a48..3841a50b2 100644
--- a/web-app/src/hooks/useMessages.ts
+++ b/web-app/src/hooks/useMessages.ts
@@ -34,7 +34,13 @@ export const useMessages = create<MessageState>()((set, get) => ({
       created_at: message.created_at || Date.now(),
       metadata: {
         ...message.metadata,
-        assistant: currentAssistant,
+        assistant: {
+          id: currentAssistant?.id || '',
+          name: currentAssistant?.name || '',
+          avatar: currentAssistant?.avatar || '',
+          instructions: currentAssistant?.instructions || '',
+          parameters: currentAssistant?.parameters || '',
+        },
       },
     }
     createMessage(newMessage).then((createdMessage) => {

From 6cee466f52a95be45a34bbedcc1eea457c38872e Mon Sep 17 00:00:00 2001
From: Sam Hoang Van <samhv.ict@gmail.com>
Date: Wed, 18 Jun 2025 15:30:31 +0700
Subject: [PATCH 42/48] fix(server): add gzip decompression support for /models
 endpoint filtering (#5349)

- Add gzip detection using magic number check (0x1f 0x8b)
- Implement gzip decompression before JSON parsing
- Add gzip re-compression for filtered responses
- Fix "invalid utf-8 sequence" error when upstream returns gzipped content
- Maintain Content-Encoding consistency for compressed responses
- Add comprehensive gzip handling with flate2 library

Resolves issue where filtering failed on gzip-compressed model responses.
---
 src-tauri/src/core/server.rs | 48 +++++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/src-tauri/src/core/server.rs b/src-tauri/src/core/server.rs
index f4f270106..627ec6a7c 100644
--- a/src-tauri/src/core/server.rs
+++ b/src-tauri/src/core/server.rs
@@ -7,6 +7,8 @@ use std::net::SocketAddr;
 use std::sync::LazyLock;
 use tokio::sync::Mutex;
 use tokio::task::JoinHandle;
+use flate2::read::GzDecoder;
+use std::io::Read;
 
 /// Server handle type for managing the proxy server lifecycle
 type ServerHandle = JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>>;
@@ -435,9 +437,42 @@ async fn proxy_request(
     }
 }
 
+/// Checks if the byte array starts with gzip magic number
+fn is_gzip_encoded(bytes: &[u8]) -> bool {
+    bytes.len() >= 2 && bytes[0] == 0x1f && bytes[1] == 0x8b
+}
+
+/// Decompresses gzip-encoded bytes
+fn decompress_gzip(bytes: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>> {
+    let mut decoder = GzDecoder::new(bytes);
+    let mut decompressed = Vec::new();
+    decoder.read_to_end(&mut decompressed)?;
+    Ok(decompressed)
+}
+
+/// Compresses bytes using gzip
+fn compress_gzip(bytes: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>> {
+    use flate2::write::GzEncoder;
+    use flate2::Compression;
+    use std::io::Write;
+    
+    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
+    encoder.write_all(bytes)?;
+    let compressed = encoder.finish()?;
+    Ok(compressed)
+}
+
 /// Filters models response to keep only models with status "downloaded"
 fn filter_models_response(bytes: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>> {
-    let response_text = std::str::from_utf8(bytes)?;
+    // Try to decompress if it's gzip-encoded
+    let decompressed_bytes = if is_gzip_encoded(bytes) {
+        log::debug!("Response is gzip-encoded, decompressing...");
+        decompress_gzip(bytes)?
+    } else {
+        bytes.to_vec()
+    };
+    
+    let response_text = std::str::from_utf8(&decompressed_bytes)?;
     let mut response_json: Value = serde_json::from_str(response_text)?;
     
     // Check if this is a ListModelsResponseDto format with data array
@@ -475,8 +510,15 @@ fn filter_models_response(bytes: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::E
         }
     }
     
-    let filtered_response = serde_json::to_vec(&response_json)?;
-    Ok(filtered_response)
+    let filtered_json = serde_json::to_vec(&response_json)?;
+    
+    // If original was gzip-encoded, re-compress the filtered response
+    if is_gzip_encoded(bytes) {
+        log::debug!("Re-compressing filtered response with gzip");
+        compress_gzip(&filtered_json)
+    } else {
+        Ok(filtered_json)
+    }
 }
 
 /// Checks if a header is a CORS-related header that should be filtered out from upstream responses

From eb5655bbd4faf1ab4ad144f4cd9c801d7e82d2bc Mon Sep 17 00:00:00 2001
From: Sam Hoang Van <samhv.ict@gmail.com>
Date: Wed, 18 Jun 2025 16:19:48 +0700
Subject: [PATCH 43/48] fix(proxy): implement true HTTP streaming for chat
 completions API (#5350)

---
 src-tauri/src/core/server.rs | 71 ++++++++++++++++++++++++------------
 1 file changed, 48 insertions(+), 23 deletions(-)

diff --git a/src-tauri/src/core/server.rs b/src-tauri/src/core/server.rs
index 627ec6a7c..ee8b1cbb1 100644
--- a/src-tauri/src/core/server.rs
+++ b/src-tauri/src/core/server.rs
@@ -7,6 +7,7 @@ use std::net::SocketAddr;
 use std::sync::LazyLock;
 use tokio::sync::Mutex;
 use tokio::task::JoinHandle;
+use futures_util::StreamExt;
 use flate2::read::GzDecoder;
 use std::io::Read;
 
@@ -389,11 +390,11 @@ async fn proxy_request(
                 &config.trusted_hosts,
             );
 
-            // Read response body
-            match response.bytes().await {
-                Ok(bytes) => {
-                    // Check if this is a /models endpoint request and filter the response
-                    if path.contains("/models") && method == hyper::Method::GET {
+            // Handle streaming vs non-streaming responses
+            if path.contains("/models") && method == hyper::Method::GET {
+                // For /models endpoint, we need to buffer and filter the response
+                match response.bytes().await {
+                    Ok(bytes) => {
                         match filter_models_response(&bytes) {
                             Ok(filtered_bytes) => Ok(builder.body(Body::from(filtered_bytes)).unwrap()),
                             Err(e) => {
@@ -401,24 +402,46 @@ async fn proxy_request(
                                 Ok(builder.body(Body::from(bytes)).unwrap())
                             }
                         }
-                    } else {
-                        Ok(builder.body(Body::from(bytes)).unwrap())
+                    },
+                    Err(e) => {
+                        log::error!("Failed to read response body: {}", e);
+                        let mut error_response =
+                            Response::builder().status(StatusCode::INTERNAL_SERVER_ERROR);
+                        error_response = add_cors_headers_with_host_and_origin(
+                            error_response,
+                            &host_header,
+                            &origin_header,
+                            &config.trusted_hosts,
+                        );
+                        Ok(error_response
+                            .body(Body::from("Error reading upstream response"))
+                            .unwrap())
                     }
-                },
-                Err(e) => {
-                    log::error!("Failed to read response body: {}", e);
-                    let mut error_response =
-                        Response::builder().status(StatusCode::INTERNAL_SERVER_ERROR);
-                    error_response = add_cors_headers_with_host_and_origin(
-                        error_response,
-                        &host_header,
-                        &origin_header,
-                        &config.trusted_hosts,
-                    );
-                    Ok(error_response
-                        .body(Body::from("Error reading upstream response"))
-                        .unwrap())
                 }
+            } else {
+                // For streaming endpoints (like chat completions), we need to collect and forward the stream
+                let mut stream = response.bytes_stream();
+                let (mut sender, body) = hyper::Body::channel();
+                
+                // Spawn a task to forward the stream
+                tokio::spawn(async move {
+                    while let Some(chunk_result) = stream.next().await {
+                        match chunk_result {
+                            Ok(chunk) => {
+                                if sender.send_data(chunk).await.is_err() {
+                                    log::debug!("Client disconnected during streaming");
+                                    break;
+                                }
+                            }
+                            Err(e) => {
+                                log::error!("Stream error: {}", e);
+                                break;
+                            }
+                        }
+                    }
+                });
+                
+                Ok(builder.body(body).unwrap())
             }
         }
         Err(e) => {
@@ -640,9 +663,11 @@ pub async fn start_server(
         trusted_hosts,
     };
 
-    // Create HTTP client
+    // Create HTTP client with longer timeout for streaming
     let client = Client::builder()
-        .timeout(std::time::Duration::from_secs(30))
+        .timeout(std::time::Duration::from_secs(300)) // 5 minutes for streaming
+        .pool_max_idle_per_host(10)
+        .pool_idle_timeout(std::time::Duration::from_secs(30))
         .build()?;
 
     // Create service handler

From 0681c6bb9ef2a72c2ab07f49add7bb366c31e22b Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Wed, 18 Jun 2025 19:58:24 +0700
Subject: [PATCH 44/48] fix: glitch toggle gpus (#5353)

* fix: glitch toogle gpu

* fix: Using the GPU's array index as a key for gpuLoading

* enhancement: added try-finally
---
 web-app/src/hooks/useHardware.ts         | 68 ++++++++++++++++--------
 web-app/src/routes/settings/hardware.tsx |  8 +--
 2 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/web-app/src/hooks/useHardware.ts b/web-app/src/hooks/useHardware.ts
index 7ad067cc8..16e83a7a5 100644
--- a/web-app/src/hooks/useHardware.ts
+++ b/web-app/src/hooks/useHardware.ts
@@ -87,8 +87,17 @@ interface HardwareStore {
   // Update RAM available
   updateRAMAvailable: (available: number) => void
 
-  // Toggle GPU activation
-  toggleGPUActivation: (index: number) => void
+  // Toggle GPU activation (async, with loading)
+  toggleGPUActivation: (index: number) => Promise<void>
+
+  // GPU loading state
+  gpuLoading: { [index: number]: boolean }
+  setGpuLoading: (index: number, loading: boolean) => void
+
+  // Polling control
+  pollingPaused: boolean
+  pausePolling: () => void
+  resumePolling: () => void
 
   // Reorder GPUs
   reorderGPUs: (oldIndex: number, newIndex: number) => void
@@ -96,8 +105,16 @@ interface HardwareStore {
 
 export const useHardware = create<HardwareStore>()(
   persist(
-    (set) => ({
+    (set, get) => ({
       hardwareData: defaultHardwareData,
+      gpuLoading: {},
+      pollingPaused: false,
+      setGpuLoading: (index, loading) =>
+        set((state) => ({
+          gpuLoading: { ...state.gpuLoading, [state.hardwareData.gpus[index].uuid]: loading },
+        })),
+      pausePolling: () => set({ pollingPaused: true }),
+      resumePolling: () => set({ pollingPaused: false }),
 
       setCPU: (cpu) =>
         set((state) => ({
@@ -172,25 +189,34 @@ export const useHardware = create<HardwareStore>()(
           },
         })),
 
-      toggleGPUActivation: (index) => {
-        set((state) => {
-          const newGPUs = [...state.hardwareData.gpus]
-          if (index >= 0 && index < newGPUs.length) {
-            newGPUs[index] = {
-              ...newGPUs[index],
-              activated: !newGPUs[index].activated,
+      toggleGPUActivation: async (index) => {
+        const { pausePolling, setGpuLoading, resumePolling } = get();
+        pausePolling();
+        setGpuLoading(index, true);
+        try {
+          await new Promise((resolve) => setTimeout(resolve, 200)); // Simulate async, replace with real API if needed
+          set((state) => {
+            const newGPUs = [...state.hardwareData.gpus];
+            if (index >= 0 && index < newGPUs.length) {
+              newGPUs[index] = {
+                ...newGPUs[index],
+                activated: !newGPUs[index].activated,
+              };
             }
-          }
-          setActiveGpus({
-            gpus: newGPUs.filter((e) => e.activated).map((e) => parseInt(e.id)),
-          })
-          return {
-            hardwareData: {
-              ...state.hardwareData,
-              gpus: newGPUs,
-            },
-          }
-        })
+            setActiveGpus({
+              gpus: newGPUs.filter((e) => e.activated).map((e) => parseInt(e.id)),
+            });
+            return {
+              hardwareData: {
+                ...state.hardwareData,
+                gpus: newGPUs,
+              },
+            };
+          });
+        } finally {
+          setGpuLoading(index, false);
+          setTimeout(resumePolling, 1000); // Resume polling after 1s
+        }
       },
 
       reorderGPUs: (oldIndex, newIndex) =>
diff --git a/web-app/src/routes/settings/hardware.tsx b/web-app/src/routes/settings/hardware.tsx
index beab425fe..23f4eafef 100644
--- a/web-app/src/routes/settings/hardware.tsx
+++ b/web-app/src/routes/settings/hardware.tsx
@@ -49,7 +49,7 @@ function SortableGPUItem({ gpu, index }: { gpu: GPU; index: number }) {
     isDragging,
   } = useSortable({ id: gpu.id || index })
 
-  const { toggleGPUActivation } = useHardware()
+  const { toggleGPUActivation, gpuLoading } = useHardware()
 
   const style = {
     transform: CSS.Transform.toString(transform),
@@ -78,6 +78,7 @@ function SortableGPUItem({ gpu, index }: { gpu: GPU; index: number }) {
           <div className="flex items-center gap-4">
             <Switch
               checked={gpu.activated}
+              disabled={!!gpuLoading[index]}
               onCheckedChange={() => toggleGPUActivation(index)}
             />
           </div>
@@ -122,6 +123,7 @@ function Hardware() {
     updateCPUUsage,
     updateRAMAvailable,
     reorderGPUs,
+    pollingPaused,
   } = useHardware()
   const { vulkanEnabled, setVulkanEnabled } = useVulkan()
 
@@ -155,16 +157,16 @@ function Hardware() {
   }
 
   useEffect(() => {
+    if (pollingPaused) return;
     const intervalId = setInterval(() => {
       getHardwareInfo().then((data) => {
-        setHardwareData(data as unknown as HardwareData)
         updateCPUUsage(data.cpu.usage)
         updateRAMAvailable(data.ram.available)
       })
     }, 5000)
 
     return () => clearInterval(intervalId)
-  }, [setHardwareData, updateCPUUsage, updateRAMAvailable])
+  }, [setHardwareData, updateCPUUsage, updateRAMAvailable, pollingPaused])
 
   const handleClickSystemMonitor = async () => {
     try {

From c6cd37d91631a006e1073744a8f1fedaa3ee13ca Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Wed, 18 Jun 2025 20:28:30 +0700
Subject: [PATCH 45/48] fix: built in models capabilities (#5354)

---
 web-app/src/services/providers.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/web-app/src/services/providers.ts b/web-app/src/services/providers.ts
index 6bd2b63f0..517d2c5f0 100644
--- a/web-app/src/services/providers.ts
+++ b/web-app/src/services/providers.ts
@@ -13,6 +13,7 @@ import {
 import { modelSettings } from '@/lib/predefined'
 import { fetchModels } from './models'
 import { ExtensionManager } from '@/lib/extension'
+import { isProd } from '@/lib/version'
 
 export const getProviders = async (): Promise<ModelProvider[]> => {
   const engines = !localStorage.getItem('migration_completed')
@@ -65,7 +66,7 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
           ].filter(Boolean) as string[]
           return {
             ...(modelManifest ?? { id: model, name: model }),
-            capabilities,
+            ...(!isProd ? { capabilities } : {}),
           } as Model
         })
     }

From 5b60116d218c15a49b6a5ee6eab3dcc1a16ee727 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Wed, 18 Jun 2025 21:48:18 +0700
Subject: [PATCH 46/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20setting=20provider=20h?=
 =?UTF-8?q?ide=20model=20capabilities=20(#5355)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 🐛fix: setting provider hide model capabilities

* 🐛fix: hide tools icon on dropdown model providers

* fix: stop server on app close or reload

* ✨enhancement: reset heading class

---------

Co-authored-by: Louis <louis@jan.ai>
---
 web-app/src/containers/DropdownModelProvider.tsx        | 3 ++-
 web-app/src/index.css                                   | 9 +++++++++
 web-app/src/routes/__root.tsx                           | 8 ++++++++
 web-app/src/routes/hub.tsx                              | 2 +-
 web-app/src/routes/settings/providers/$providerName.tsx | 4 +++-
 web-app/src/services/providers.ts                       | 3 +--
 6 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/web-app/src/containers/DropdownModelProvider.tsx b/web-app/src/containers/DropdownModelProvider.tsx
index 6d5d24155..5b125282a 100644
--- a/web-app/src/containers/DropdownModelProvider.tsx
+++ b/web-app/src/containers/DropdownModelProvider.tsx
@@ -16,6 +16,7 @@ import { ModelSetting } from '@/containers/ModelSetting'
 import ProvidersAvatar from '@/containers/ProvidersAvatar'
 import { Fzf } from 'fzf'
 import { localStorageKey } from '@/constants/localStorage'
+import { isProd } from '@/lib/version'
 
 type DropdownModelProviderProps = {
   model?: ThreadModel
@@ -400,7 +401,7 @@ const DropdownModelProvider = ({
                                 />
 
                                 <div className="flex-1"></div>
-                                {capabilities.length > 0 && (
+                                {!isProd && capabilities.length > 0 && (
                                   <div className="flex-shrink-0 -mr-1.5">
                                     <Capabilities capabilities={capabilities} />
                                   </div>
diff --git a/web-app/src/index.css b/web-app/src/index.css
index 185cb0612..d8ae284e9 100644
--- a/web-app/src/index.css
+++ b/web-app/src/index.css
@@ -79,6 +79,15 @@
   ::-ms-reveal {
     display: none;
   }
+
+  .reset-heading {
+    :is(h1, h2, h3, h4, h5, h6) {
+      font-weight: 600;
+      font-size: 14px !important;
+      margin-top: 0 !important;
+      margin-bottom: 0.5em;
+    }
+  }
 }
 
 @layer utilities {
diff --git a/web-app/src/routes/__root.tsx b/web-app/src/routes/__root.tsx
index 67e88ed90..6f6099cbb 100644
--- a/web-app/src/routes/__root.tsx
+++ b/web-app/src/routes/__root.tsx
@@ -18,6 +18,7 @@ import { AnalyticProvider } from '@/providers/AnalyticProvider'
 import { useLeftPanel } from '@/hooks/useLeftPanel'
 import { cn } from '@/lib/utils'
 import ToolApproval from '@/containers/dialogs/ToolApproval'
+import { useEffect } from 'react'
 
 export const Route = createRootRoute({
   component: RootLayout,
@@ -82,6 +83,13 @@ function RootLayout() {
     router.location.pathname === route.systemMonitor ||
     router.location.pathname === route.appLogs
 
+  useEffect(() => {
+    return () => {
+      // This is to attempt to stop the local API server when the app is closed or reloaded.
+      window.core?.api?.stopServer()
+    }
+  }, [])
+
   return (
     <Fragment>
       <ThemeProvider />
diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx
index 42147d0f8..3bca8649f 100644
--- a/web-app/src/routes/hub.tsx
+++ b/web-app/src/routes/hub.tsx
@@ -495,7 +495,7 @@ function Hub() {
                         <div className="line-clamp-2 mt-3 text-main-view-fg/60">
                           <RenderMarkdown
                             enableRawHtml={true}
-                            className="select-none"
+                            className="select-none reset-heading"
                             components={{
                               a: ({ ...props }) => (
                                 <a
diff --git a/web-app/src/routes/settings/providers/$providerName.tsx b/web-app/src/routes/settings/providers/$providerName.tsx
index 3d8f67af5..2daa496b7 100644
--- a/web-app/src/routes/settings/providers/$providerName.tsx
+++ b/web-app/src/routes/settings/providers/$providerName.tsx
@@ -455,7 +455,9 @@ function ProviderDetail() {
                           title={
                             <div className="flex items-center gap-2">
                               <h1 className="font-medium">{model.id}</h1>
-                              <Capabilities capabilities={capabilities} />
+                              {!isProd && (
+                                <Capabilities capabilities={capabilities} />
+                              )}
                             </div>
                           }
                           actions={
diff --git a/web-app/src/services/providers.ts b/web-app/src/services/providers.ts
index 517d2c5f0..6bd2b63f0 100644
--- a/web-app/src/services/providers.ts
+++ b/web-app/src/services/providers.ts
@@ -13,7 +13,6 @@ import {
 import { modelSettings } from '@/lib/predefined'
 import { fetchModels } from './models'
 import { ExtensionManager } from '@/lib/extension'
-import { isProd } from '@/lib/version'
 
 export const getProviders = async (): Promise<ModelProvider[]> => {
   const engines = !localStorage.getItem('migration_completed')
@@ -66,7 +65,7 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
           ].filter(Boolean) as string[]
           return {
             ...(modelManifest ?? { id: model, name: model }),
-            ...(!isProd ? { capabilities } : {}),
+            capabilities,
           } as Model
         })
     }

From 22396111be16228092e133c34407e00f8d83c2e8 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 19 Jun 2025 00:12:03 +0700
Subject: [PATCH 47/48] fix: stop api server on page unload (#5356)

* fix: stop api server on page unload

* fix: check api server status on reload

* refactor: api server state

* fix: should not pop the guard
---
 src-tauri/src/core/cmd.rs                     |  38 +++++--
 src-tauri/src/core/server.rs                  | 100 +++++++++++-------
 src-tauri/src/core/state.rs                   |   5 +
 src-tauri/src/lib.rs                          |   2 +
 web-app/src/routes/__root.tsx                 |   8 --
 .../src/routes/settings/local-api-server.tsx  |  14 ++-
 6 files changed, 109 insertions(+), 58 deletions(-)

diff --git a/src-tauri/src/core/cmd.rs b/src-tauri/src/core/cmd.rs
index 4a48e63d3..4b4463d12 100644
--- a/src-tauri/src/core/cmd.rs
+++ b/src-tauri/src/core/cmd.rs
@@ -348,23 +348,41 @@ pub async fn start_server(
     api_key: String,
     trusted_hosts: Vec<String>,
 ) -> Result<bool, String> {
-    let auth_token = app
-        .state::<AppState>()
-        .app_token
-        .clone()
-        .unwrap_or_default();
-    server::start_server(host, port, prefix, auth_token, api_key, trusted_hosts)
-        .await
-        .map_err(|e| e.to_string())?;
+    let state = app.state::<AppState>();
+    let auth_token = state.app_token.clone().unwrap_or_default();
+    let server_handle = state.server_handle.clone();
+
+    server::start_server(
+        server_handle,
+        host,
+        port,
+        prefix,
+        auth_token,
+        api_key,
+        trusted_hosts,
+    )
+    .await
+    .map_err(|e| e.to_string())?;
     Ok(true)
 }
 
 #[tauri::command]
-pub async fn stop_server() -> Result<(), String> {
-    server::stop_server().await.map_err(|e| e.to_string())?;
+pub async fn stop_server(state: State<'_, AppState>) -> Result<(), String> {
+    let server_handle = state.server_handle.clone();
+
+    server::stop_server(server_handle)
+        .await
+        .map_err(|e| e.to_string())?;
     Ok(())
 }
 
+#[tauri::command]
+pub async fn get_server_status(state: State<'_, AppState>) -> Result<bool, String> {
+    let server_handle = state.server_handle.clone();
+
+    Ok(server::is_server_running(server_handle).await)
+}
+
 #[tauri::command]
 pub async fn read_logs(app: AppHandle) -> Result<String, String> {
     let log_path = get_jan_data_folder_path(app).join("logs").join("app.log");
diff --git a/src-tauri/src/core/server.rs b/src-tauri/src/core/server.rs
index ee8b1cbb1..6da4ebf9b 100644
--- a/src-tauri/src/core/server.rs
+++ b/src-tauri/src/core/server.rs
@@ -1,21 +1,16 @@
+use flate2::read::GzDecoder;
+use futures_util::StreamExt;
 use hyper::service::{make_service_fn, service_fn};
 use hyper::{Body, Request, Response, Server, StatusCode};
 use reqwest::Client;
 use serde_json::Value;
 use std::convert::Infallible;
-use std::net::SocketAddr;
-use std::sync::LazyLock;
-use tokio::sync::Mutex;
-use tokio::task::JoinHandle;
-use futures_util::StreamExt;
-use flate2::read::GzDecoder;
 use std::io::Read;
+use std::net::SocketAddr;
+use std::sync::Arc;
+use tokio::sync::Mutex;
 
-/// Server handle type for managing the proxy server lifecycle
-type ServerHandle = JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>>;
-
-/// Global singleton for the current server instance
-static SERVER_HANDLE: LazyLock<Mutex<Option<ServerHandle>>> = LazyLock::new(|| Mutex::new(None));
+use crate::core::state::ServerHandle;
 
 /// Configuration for the proxy server
 #[derive(Clone)]
@@ -272,7 +267,7 @@ async fn proxy_request(
     // Verify Host header (check target), but bypass for whitelisted paths
     let whitelisted_paths = ["/", "/openapi.json", "/favicon.ico"];
     let is_whitelisted_path = whitelisted_paths.contains(&path.as_str());
-    
+
     if !is_whitelisted_path {
         if !host_header.is_empty() {
             if !is_valid_host(&host_header, &config.trusted_hosts) {
@@ -333,7 +328,10 @@ async fn proxy_request(
                 .unwrap());
         }
     } else if is_whitelisted_path {
-        log::debug!("Bypassing authorization check for whitelisted path: {}", path);
+        log::debug!(
+            "Bypassing authorization check for whitelisted path: {}",
+            path
+        );
     }
 
     // Block access to /configs endpoint
@@ -394,13 +392,14 @@ async fn proxy_request(
             if path.contains("/models") && method == hyper::Method::GET {
                 // For /models endpoint, we need to buffer and filter the response
                 match response.bytes().await {
-                    Ok(bytes) => {
-                        match filter_models_response(&bytes) {
-                            Ok(filtered_bytes) => Ok(builder.body(Body::from(filtered_bytes)).unwrap()),
-                            Err(e) => {
-                                log::warn!("Failed to filter models response: {}, returning original", e);
-                                Ok(builder.body(Body::from(bytes)).unwrap())
-                            }
+                    Ok(bytes) => match filter_models_response(&bytes) {
+                        Ok(filtered_bytes) => Ok(builder.body(Body::from(filtered_bytes)).unwrap()),
+                        Err(e) => {
+                            log::warn!(
+                                "Failed to filter models response: {}, returning original",
+                                e
+                            );
+                            Ok(builder.body(Body::from(bytes)).unwrap())
                         }
                     },
                     Err(e) => {
@@ -422,7 +421,7 @@ async fn proxy_request(
                 // For streaming endpoints (like chat completions), we need to collect and forward the stream
                 let mut stream = response.bytes_stream();
                 let (mut sender, body) = hyper::Body::channel();
-                
+
                 // Spawn a task to forward the stream
                 tokio::spawn(async move {
                     while let Some(chunk_result) = stream.next().await {
@@ -440,7 +439,7 @@ async fn proxy_request(
                         }
                     }
                 });
-                
+
                 Ok(builder.body(body).unwrap())
             }
         }
@@ -478,7 +477,7 @@ fn compress_gzip(bytes: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error + Se
     use flate2::write::GzEncoder;
     use flate2::Compression;
     use std::io::Write;
-    
+
     let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
     encoder.write_all(bytes)?;
     let compressed = encoder.finish()?;
@@ -486,7 +485,9 @@ fn compress_gzip(bytes: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error + Se
 }
 
 /// Filters models response to keep only models with status "downloaded"
-fn filter_models_response(bytes: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>> {
+fn filter_models_response(
+    bytes: &[u8],
+) -> Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>> {
     // Try to decompress if it's gzip-encoded
     let decompressed_bytes = if is_gzip_encoded(bytes) {
         log::debug!("Response is gzip-encoded, decompressing...");
@@ -494,10 +495,10 @@ fn filter_models_response(bytes: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::E
     } else {
         bytes.to_vec()
     };
-    
+
     let response_text = std::str::from_utf8(&decompressed_bytes)?;
     let mut response_json: Value = serde_json::from_str(response_text)?;
-    
+
     // Check if this is a ListModelsResponseDto format with data array
     if let Some(data_array) = response_json.get_mut("data") {
         if let Some(models) = data_array.as_array_mut() {
@@ -513,7 +514,10 @@ fn filter_models_response(bytes: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::E
                     false // Remove models without status field
                 }
             });
-            log::debug!("Filtered models response: {} downloaded models remaining", models.len());
+            log::debug!(
+                "Filtered models response: {} downloaded models remaining",
+                models.len()
+            );
         }
     } else if response_json.is_array() {
         // Handle direct array format
@@ -529,12 +533,15 @@ fn filter_models_response(bytes: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::E
                     false // Remove models without status field
                 }
             });
-            log::debug!("Filtered models response: {} downloaded models remaining", models.len());
+            log::debug!(
+                "Filtered models response: {} downloaded models remaining",
+                models.len()
+            );
         }
     }
-    
+
     let filtered_json = serde_json::to_vec(&response_json)?;
-    
+
     // If original was gzip-encoded, re-compress the filtered response
     if is_gzip_encoded(bytes) {
         log::debug!("Re-compressing filtered response with gzip");
@@ -634,8 +641,19 @@ fn is_valid_host(host: &str, trusted_hosts: &[String]) -> bool {
     })
 }
 
+pub async fn is_server_running(server_handle: Arc<Mutex<Option<ServerHandle>>>) -> bool {
+    let handle_guard = server_handle.lock().await;
+
+    if handle_guard.is_some() {
+        true
+    } else {
+        false
+    }
+}
+
 /// Starts the proxy server
 pub async fn start_server(
+    server_handle: Arc<Mutex<Option<ServerHandle>>>,
     host: String,
     port: u16,
     prefix: String,
@@ -644,7 +662,7 @@ pub async fn start_server(
     trusted_hosts: Vec<String>,
 ) -> Result<bool, Box<dyn std::error::Error + Send + Sync>> {
     // Check if server is already running
-    let mut handle_guard = SERVER_HANDLE.lock().await;
+    let mut handle_guard = server_handle.lock().await;
     if handle_guard.is_some() {
         return Err("Server is already running".into());
     }
@@ -687,7 +705,7 @@ pub async fn start_server(
     log::info!("Proxy server started on http://{}", addr);
 
     // Spawn server task
-    let server_handle = tokio::spawn(async move {
+    let server_task = tokio::spawn(async move {
         if let Err(e) = server.await {
             log::error!("Server error: {}", e);
             return Err(Box::new(e) as Box<dyn std::error::Error + Send + Sync>);
@@ -695,16 +713,20 @@ pub async fn start_server(
         Ok(())
     });
 
-    *handle_guard = Some(server_handle);
+    *handle_guard = Some(server_task);
     Ok(true)
 }
 
 /// Stops the currently running proxy server
-pub async fn stop_server() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
-    let mut handle_guard = SERVER_HANDLE.lock().await;
+pub async fn stop_server(
+    server_handle: Arc<Mutex<Option<ServerHandle>>>,
+) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    let mut handle_guard = server_handle.lock().await;
 
     if let Some(handle) = handle_guard.take() {
         handle.abort();
+        // remove the handle to prevent future use
+        *handle_guard = None;
         log::info!("Proxy server stopped");
     } else {
         log::debug!("No server was running");
@@ -746,10 +768,10 @@ mod tests {
 
         let data = filtered_response["data"].as_array().unwrap();
         assert_eq!(data.len(), 1); // Should have 1 model (only model1 with "downloaded" status)
-        
+
         // Verify only model1 (with "downloaded" status) is kept
         assert!(data.iter().any(|model| model["id"] == "model1"));
-        
+
         // Verify model2 and model3 are filtered out
         assert!(!data.iter().any(|model| model["id"] == "model2"));
         assert!(!data.iter().any(|model| model["id"] == "model3"));
@@ -838,11 +860,11 @@ mod tests {
 
         let data = filtered_response["data"].as_array().unwrap();
         assert_eq!(data.len(), 2); // Should have 2 models (model1 and model3 with "downloaded" status)
-        
+
         // Verify only models with "downloaded" status are kept
         assert!(data.iter().any(|model| model["id"] == "model1"));
         assert!(data.iter().any(|model| model["id"] == "model3"));
-        
+
         // Verify other models are filtered out
         assert!(!data.iter().any(|model| model["id"] == "model2"));
         assert!(!data.iter().any(|model| model["id"] == "model4"));
diff --git a/src-tauri/src/core/state.rs b/src-tauri/src/core/state.rs
index cb6a5d3fa..9957ba92e 100644
--- a/src-tauri/src/core/state.rs
+++ b/src-tauri/src/core/state.rs
@@ -4,6 +4,10 @@ use crate::core::utils::download::DownloadManagerState;
 use rand::{distributions::Alphanumeric, Rng};
 use rmcp::{service::RunningService, RoleClient};
 use tokio::sync::Mutex;
+use tokio::task::JoinHandle;
+
+/// Server handle type for managing the proxy server lifecycle
+pub type ServerHandle = JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>>;
 
 #[derive(Default)]
 pub struct AppState {
@@ -12,6 +16,7 @@ pub struct AppState {
     pub download_manager: Arc<Mutex<DownloadManagerState>>,
     pub cortex_restart_count: Arc<Mutex<u32>>,
     pub cortex_killed_intentionally: Arc<Mutex<bool>>,
+    pub server_handle: Arc<Mutex<Option<ServerHandle>>>,
 }
 pub fn generate_app_token() -> String {
     rand::thread_rng()
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index 076984106..4ed6ecee7 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -55,6 +55,7 @@ pub fn run() {
             core::cmd::app_token,
             core::cmd::start_server,
             core::cmd::stop_server,
+            core::cmd::get_server_status,
             core::cmd::read_logs,
             core::cmd::change_app_data_folder,
             core::cmd::reset_cortex_restart_count,
@@ -92,6 +93,7 @@ pub fn run() {
             download_manager: Arc::new(Mutex::new(DownloadManagerState::default())),
             cortex_restart_count: Arc::new(Mutex::new(0)),
             cortex_killed_intentionally: Arc::new(Mutex::new(false)),
+            server_handle: Arc::new(Mutex::new(None)),
         })
         .setup(|app| {
             app.handle().plugin(
diff --git a/web-app/src/routes/__root.tsx b/web-app/src/routes/__root.tsx
index 6f6099cbb..67e88ed90 100644
--- a/web-app/src/routes/__root.tsx
+++ b/web-app/src/routes/__root.tsx
@@ -18,7 +18,6 @@ import { AnalyticProvider } from '@/providers/AnalyticProvider'
 import { useLeftPanel } from '@/hooks/useLeftPanel'
 import { cn } from '@/lib/utils'
 import ToolApproval from '@/containers/dialogs/ToolApproval'
-import { useEffect } from 'react'
 
 export const Route = createRootRoute({
   component: RootLayout,
@@ -83,13 +82,6 @@ function RootLayout() {
     router.location.pathname === route.systemMonitor ||
     router.location.pathname === route.appLogs
 
-  useEffect(() => {
-    return () => {
-      // This is to attempt to stop the local API server when the app is closed or reloaded.
-      window.core?.api?.stopServer()
-    }
-  }, [])
-
   return (
     <Fragment>
       <ThemeProvider />
diff --git a/web-app/src/routes/settings/local-api-server.tsx b/web-app/src/routes/settings/local-api-server.tsx
index dd7561be5..94f577074 100644
--- a/web-app/src/routes/settings/local-api-server.tsx
+++ b/web-app/src/routes/settings/local-api-server.tsx
@@ -17,7 +17,8 @@ import { windowKey } from '@/constants/windows'
 import { IconLogs } from '@tabler/icons-react'
 import { cn } from '@/lib/utils'
 import { ApiKeyInput } from '@/containers/ApiKeyInput'
-import { useState } from 'react'
+import { useEffect, useState } from 'react'
+import { invoke } from '@tauri-apps/api/core'
 
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 export const Route = createFileRoute(route.settings.local_api_server as any)({
@@ -44,6 +45,17 @@ function LocalAPIServer() {
     !apiKey || apiKey.toString().trim().length === 0
   )
 
+  useEffect(() => {
+    const checkServerStatus = async () => {
+      invoke('get_server_status').then((running) => {
+        if (running) {
+          setServerStatus('running')
+        }
+      })
+    }
+    checkServerStatus()
+  }, [setServerStatus])
+
   const handleApiKeyValidation = (isValid: boolean) => {
     setIsApiKeyEmpty(!isValid)
   }

From 67592f3f45ccd863d704bde1b60826a451b6fbd3 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Thu, 19 Jun 2025 23:08:45 +0700
Subject: [PATCH 48/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20avoid=20render=20html?=
 =?UTF-8?q?=20title=20thread=20(#5375)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 🐛fix: avoid render html title thread

* chore: minor bump - tokenjs for manual adding models

---------

Co-authored-by: Louis <louis@jan.ai>
---
 web-app/package.json                             |  2 +-
 web-app/src/containers/DropdownModelProvider.tsx | 11 +++--------
 web-app/src/containers/ThreadList.tsx            |  4 +---
 web-app/src/hooks/useThreads.ts                  |  8 +++-----
 4 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/web-app/package.json b/web-app/package.json
index ad82e5688..8b3193817 100644
--- a/web-app/package.json
+++ b/web-app/package.json
@@ -65,7 +65,7 @@
     "remark-math": "^6.0.0",
     "sonner": "^2.0.3",
     "tailwindcss": "^4.1.4",
-    "token.js": "npm:token.js-fork@0.7.5",
+    "token.js": "npm:token.js-fork@0.7.9",
     "tw-animate-css": "^1.2.7",
     "ulidx": "^2.4.1",
     "unified": "^11.0.5",
diff --git a/web-app/src/containers/DropdownModelProvider.tsx b/web-app/src/containers/DropdownModelProvider.tsx
index 5b125282a..0747a1ad1 100644
--- a/web-app/src/containers/DropdownModelProvider.tsx
+++ b/web-app/src/containers/DropdownModelProvider.tsx
@@ -391,14 +391,9 @@ const DropdownModelProvider = ({
                               )}
                             >
                               <div className="flex items-center gap-2 flex-1 min-w-0">
-                                <span
-                                  className="truncate text-main-view-fg/80 text-sm"
-                                  dangerouslySetInnerHTML={{
-                                    __html:
-                                      searchableModel.highlightedId ||
-                                      searchableModel.model.id,
-                                  }}
-                                />
+                                <span className="truncate text-main-view-fg/80 text-sm">
+                                  {searchableModel.model.id}
+                                </span>
 
                                 <div className="flex-1"></div>
                                 {!isProd && capabilities.length > 0 && (
diff --git a/web-app/src/containers/ThreadList.tsx b/web-app/src/containers/ThreadList.tsx
index 0702c1bf9..9e3df65c1 100644
--- a/web-app/src/containers/ThreadList.tsx
+++ b/web-app/src/containers/ThreadList.tsx
@@ -101,9 +101,7 @@ const SortableItem = memo(({ thread }: { thread: Thread }) => {
       )}
     >
       <div className="py-1 pr-2 truncate">
-        <span
-          dangerouslySetInnerHTML={{ __html: thread.title || 'New Thread' }}
-        />
+        <span>{thread.title || 'New Thread'}</span>
       </div>
       <div className="flex items-center">
         <DropdownMenu
diff --git a/web-app/src/hooks/useThreads.ts b/web-app/src/hooks/useThreads.ts
index f8f990b44..e81b48e8e 100644
--- a/web-app/src/hooks/useThreads.ts
+++ b/web-app/src/hooks/useThreads.ts
@@ -2,7 +2,7 @@ import { create } from 'zustand'
 import { ulid } from 'ulidx'
 import { createThread, deleteThread, updateThread } from '@/services/threads'
 import { Fzf } from 'fzf'
-import { highlightFzfMatch } from '../utils/highlight'
+
 type ThreadState = {
   threads: Record<string, Thread>
   currentThreadId?: string
@@ -68,12 +68,10 @@ export const useThreads = create<ThreadState>()((set, get) => ({
     return fzfResults.map(
       (result: { item: Thread; positions: Set<number> }) => {
         const thread = result.item // Fzf stores the original item here
-        // Ensure result.positions is an array, default to empty if undefined
-        const positions = Array.from(result.positions) || []
-        const highlightedTitle = highlightFzfMatch(thread.title, positions)
+
         return {
           ...thread,
-          title: highlightedTitle, // Override title with highlighted version
+          title: thread.title, // Override title with highlighted version
         }
       }
     )