From 2b8c448be49e0002fb97795c2c88035003026460 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Fri, 13 Jun 2025 11:37:34 +0700 Subject: [PATCH 01/48] chore: enable shortcut zoom (#5261) * chore: enable shortcut zoom * chore: update shortcut setting --- src-tauri/capabilities/default.json | 15 +++++---------- src-tauri/tauri.conf.json | 1 + web-app/src/routes/settings/shortcuts.tsx | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src-tauri/capabilities/default.json b/src-tauri/capabilities/default.json index 2dd5925c3..76c9bc567 100644 --- a/src-tauri/capabilities/default.json +++ b/src-tauri/capabilities/default.json @@ -2,16 +2,13 @@ "$schema": "../gen/schemas/desktop-schema.json", "identifier": "default", "description": "enables the default permissions", - "windows": [ - "main" - ], + "windows": ["main"], "remote": { - "urls": [ - "http://*" - ] + "urls": ["http://*"] }, "permissions": [ "core:default", + "core:webview:allow-set-webview-zoom", "core:window:allow-start-dragging", "core:window:allow-set-theme", "shell:allow-spawn", @@ -81,9 +78,7 @@ { "identifier": "opener:allow-open-url", "description": "opens the default permissions for the core module", - "windows": [ - "*" - ], + "windows": ["*"], "allow": [ { "url": "https://*" @@ -98,4 +93,4 @@ }, "store:default" ] -} \ No newline at end of file +} diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index 545873172..1810c0661 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -13,6 +13,7 @@ "macOSPrivateApi": true, "windows": [ { + "zoomHotkeysEnabled": true, "label": "main", "title": "Jan", "width": 1024, diff --git a/web-app/src/routes/settings/shortcuts.tsx b/web-app/src/routes/settings/shortcuts.tsx index ae28acae0..2d46ad7c2 100644 --- a/web-app/src/routes/settings/shortcuts.tsx +++ b/web-app/src/routes/settings/shortcuts.tsx @@ -47,6 +47,28 @@ function Shortcuts() { } /> + + + + + + + } + /> + + + - + + + } + /> {/* Chat */} From e04dfc73c7ebf4703bf5b4543da3c82cd06fb2c3 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Fri, 13 Jun 2025 15:22:55 +0700 Subject: [PATCH 02/48] fix: thinking block (#5263) --- web-app/src/containers/ThreadContent.tsx | 2 +- web-app/src/routes/hub.tsx | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx index 2ad4477e6..f067a4431 100644 --- a/web-app/src/containers/ThreadContent.tsx +++ b/web-app/src/containers/ThreadContent.tsx @@ -184,7 +184,7 @@ export const ThreadContent = memo( | undefined return ( - + {item.content?.[0]?.text && item.role === 'user' && (
diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx index 9ae3d151e..8c299bd5f 100644 --- a/web-app/src/routes/hub.tsx +++ b/web-app/src/routes/hub.tsx @@ -236,7 +236,8 @@ function Hub() {
From 23240f958e3a6bde152167fd2de041cd40398278 Mon Sep 17 00:00:00 2001 From: Louis Date: Fri, 13 Jun 2025 17:05:49 +0700 Subject: [PATCH 03/48] Merge pull request #5262 from menloresearch/chore/sync-new-hub-data chore: sync new hub data --- extensions/assistant-extension/src/index.ts | 2 +- .../model-extension/resources/default.json | 6113 +++++++++++++++-- web-app/src/hooks/useAssistant.ts | 2 +- web-app/src/utils/models.ts | 36 +- 4 files changed, 5672 insertions(+), 481 deletions(-) diff --git a/extensions/assistant-extension/src/index.ts b/extensions/assistant-extension/src/index.ts index 6706e5ff3..a0bbe71f1 100644 --- a/extensions/assistant-extension/src/index.ts +++ b/extensions/assistant-extension/src/index.ts @@ -75,7 +75,7 @@ export default class JanAssistantExtension extends AssistantExtension { 'Jan is a helpful desktop assistant that can reason through complex tasks and use tools to complete them on the user’s behalf.', model: '*', instructions: - 'Jan is a helpful desktop assistant that can reason through complex tasks and use tools to complete them on the user’s behalf. Respond naturally and concisely, take actions when needed, and guide the user toward their goals.', + 'You have access to a set of tools to help you answer the user’s question. You can use only one tool per message, and you’ll receive the result of that tool in the user’s next response. To complete a task, use tools step by step—each step should be guided by the outcome of the previous one.\nTool Usage Rules:\n1. Always provide the correct values as arguments when using tools. Do not pass variable names—use actual values instead.\n2. You may perform multiple tool steps to complete a task.\n3. Avoid repeating a tool call with exactly the same parameters to prevent infinite loops.', tools: [ { type: 'retrieval', diff --git a/extensions/model-extension/resources/default.json b/extensions/model-extension/resources/default.json index ce17c9616..dbd9d906e 100644 --- a/extensions/model-extension/resources/default.json +++ b/extensions/model-extension/resources/default.json @@ -1,25 +1,146 @@ [ { - "author": "CohereForAI", - "id": "cortexso/command-r", + "author": "PrimeIntellect", + "id": "cortexso/intellect-2", "metadata": { - "_id": "66751b98585f2bf57092b2ae", + "_id": "6821ac2482ae7d76d34abdb8", "author": "cortexso", "cardData": { - "license": "cc-by-nc-4.0" + "license": "apache-2.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp", "featured"] }, - "createdAt": "2024-06-21T06:20:08.000Z", - "description": "---\nlicense: cc-by-nc-4.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nC4AI Command-R is a research release of a 35 billion parameter highly performant generative model. Command-R is a large language model with open weights optimized for a variety of use cases including reasoning, summarization, and question answering. Command-R has the capability for multilingual generation evaluated in 10 languages and highly performant RAG capabilities.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Command-r-32b](https://huggingface.co/cortexhub/command-r/tree/32b) | `cortex run command-r:32b` |\n| 1 | [Command-r-35b](https://huggingface.co/cortexhub/command-r/tree/35b) | `cortex run command-r:35b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/command-r\n ```\n \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run command-r\n ```\n \n## Credits\n\n- **Author:** Cohere For AI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://cohere.com/c4ai-cc-by-nc-license)", + "createdAt": "2025-05-12T08:07:00.000Z", + "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\n**Prime Intellect** released **INTELLECT-2**, a 32 billion parameter large language model (LLM) trained through distributed reinforcement learning on globally donated GPU resources. Built on the **Qwen2** architecture and fine-tuned with the **prime-rl** framework, INTELLECT-2 demonstrates strong performance in math, coding, and logical reasoning.\n\nThis model leverages GRPO (Generalized Reinforcement Policy Optimization) over verifiable rewards, introducing asynchronous distributed RL training with enhanced stability techniques. While its primary focus was on verifiable mathematical and coding tasks, it remains compatible with general-purpose text generation tasks.\n\n## Variants\n\n### INTELLECT-2\n\n| No | Variant | Branch | Cortex CLI command |\n|----|----------------------------------------------------------------------------------|--------|-----------------------------------|\n| 1 | [INTELLECT-2 (32B)](https://huggingface.co/cortexso/intellect-2/tree/32b) | 32b | `cortex run intellect-2:32b` |\n\nEach branch includes multiple GGUF quantized versions, optimized for various hardware configurations:\n- **INTELLECT-2-32B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/intellect-2\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run intellect-2\n ```\n\n## Credits\n\n- **Author:** Prime Intellect\n- **Converter:** [Menlo Research](https://menlo.ai/)\n- **Original License:** [Apache-2.0](https://choosealicense.com/licenses/apache-2.0/)\n- **Paper:** [Intellect 2 Technical Report](https://storage.googleapis.com/public-technical-paper/INTELLECT_2_Technical_Report.pdf)", "disabled": false, - "downloads": 14, + "downloads": 1436, "gated": false, - "id": "cortexso/command-r", - "lastModified": "2024-11-12T20:13:19.000Z", - "likes": 1, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|endoftext|>", + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- '' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" and not message.tool_calls %}\n {%- set content = message.content %}\n {%- if not loop.last %}\n {%- set content = message.content.split('')[-1].lstrip('\\n') %}\n {%- endif %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- if not loop.last %}\n {%- set content = message.content.split('')[-1].lstrip('\\n') %}\n {%- endif %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n\\n' }}\n{%- endif %}\n", + "context_length": 40960, + "eos_token": "<|im_end|>", + "total": 32763876352 + }, + "id": "cortexso/intellect-2", + "lastModified": "2025-05-12T14:18:35.000Z", + "likes": 0, "model-index": null, - "modelId": "cortexso/command-r", + "modelId": "cortexso/intellect-2", + "pipeline_tag": "text-generation", "private": false, - "sha": "ca1564f6a6d4d03181b01e87e6c3e3fc959c7103", + "sha": "9d237b26053af28e0119331e0dfbc75b45a0317b", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "intellect-2-q2_k.gguf" + }, + { + "rfilename": "intellect-2-q3_k_l.gguf" + }, + { + "rfilename": "intellect-2-q3_k_m.gguf" + }, + { + "rfilename": "intellect-2-q3_k_s.gguf" + }, + { + "rfilename": "intellect-2-q4_k_m.gguf" + }, + { + "rfilename": "intellect-2-q4_k_s.gguf" + }, + { + "rfilename": "intellect-2-q5_k_m.gguf" + }, + { + "rfilename": "intellect-2-q5_k_s.gguf" + }, + { + "rfilename": "intellect-2-q6_k.gguf" + }, + { + "rfilename": "intellect-2-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "featured", + "text-generation", + "license:apache-2.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 206130755200, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "intellect-2:32b", + "size": 19851336256 + } + ] + }, + { + "author": "Microsoft", + "id": "cortexso/phi-4-reasoning", + "metadata": { + "_id": "681857cda178d73748a1295f", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp", "featured"] + }, + "createdAt": "2025-05-05T06:16:45.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\n**Microsoft Research** developed and released the **Phi-4-reasoning** series, a cutting-edge family of reasoning-focused language models optimized for chain-of-thought (CoT), step-by-step problem solving, and high-efficiency inference. These models excel in advanced mathematical reasoning, scientific Q&A, and instruction-following scenarios.\n\nThe Phi-4 models introduce extended context lengths, ChatML reasoning templates, and strong performance on benchmark datasets, while maintaining compact sizes that are ideal for memory- and latency-constrained environments.\n\n## Variants\n\n### Phi-4-reasoning\n\n| No | Variant | Branch | Cortex CLI command |\n|----|-------------------------------------------------------------------------------------|------------|-------------------------------------|\n| 1 | [phi-4-mini-reasoning](https://huggingface.co/microsoft/phi-4-mini-reasoning) | 4b | `cortex run phi4:4b` |\n| 2 | [phi-4-reasoning](https://huggingface.co/microsoft/phi-4-reasoning-plus) | 14b | `cortex run phi4:14b` |\n| 3 | [phi-4-reasoning-plus](https://huggingface.co/microsoft/phi-4-reasoning-plus) | 14b-plus | `cortex run phi4:14b-plus` |\n\nEach branch supports multiple quantized GGUF versions:\n- **phi-4-mini-reasoning:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **phi-4-reasoning:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **phi-4-reasoning-plus:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/phi4\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run phi4\n ```\n\n## Credits\n\n- **Author:** Microsoft Research\n- **Converter:** [Menlo Research](https://menlo.ai/)\n- **Original License:** [MIT License](https://opensource.org/license/mit/)\n- **Blogs:** [Phi-4 Reasoning Blog](https://www.microsoft.com/en-us/research/blog/)\n", + "disabled": false, + "downloads": 2894, + "gated": false, + "gguf": { + "architecture": "phi3", + "bos_token": "<|endoftext|>", + "chat_template": "{{ '<|system|>Your name is Phi, an AI math expert developed by Microsoft.' }}{% for message in messages %}{% if message['role'] == 'system' %} {{ message['content'] }}{% if 'tools' in message and message['tools'] is not none %}{{ '<|tool|>' + message['tools'] + '<|/tool|>' }}{% endif %}{% endif %}{% endfor %}{{ '<|end|>' }}{% for message in messages %}{% if message['role'] != 'system' %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}", + "context_length": 131072, + "eos_token": "<|endoftext|>", + "total": 3836021856 + }, + "id": "cortexso/phi-4-reasoning", + "lastModified": "2025-05-05T09:36:18.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/phi-4-reasoning", + "pipeline_tag": "text-generation", + "private": false, + "sha": "218f08078412d1bcd46e7ce48c4442b14b98164d", "siblings": [ { "rfilename": ".gitattributes" @@ -32,60 +153,2169 @@ }, { "rfilename": "model.yml" + }, + { + "rfilename": "phi-4-mini-reasoning-q2_k.gguf" + }, + { + "rfilename": "phi-4-mini-reasoning-q3_k_l.gguf" + }, + { + "rfilename": "phi-4-mini-reasoning-q3_k_m.gguf" + }, + { + "rfilename": "phi-4-mini-reasoning-q3_k_s.gguf" + }, + { + "rfilename": "phi-4-mini-reasoning-q4_k_m.gguf" + }, + { + "rfilename": "phi-4-mini-reasoning-q4_k_s.gguf" + }, + { + "rfilename": "phi-4-mini-reasoning-q5_k_m.gguf" + }, + { + "rfilename": "phi-4-mini-reasoning-q5_k_s.gguf" + }, + { + "rfilename": "phi-4-mini-reasoning-q6_k.gguf" + }, + { + "rfilename": "phi-4-mini-reasoning-q8_0.gguf" + }, + { + "rfilename": "phi-4-reasoning-plus-q2_k.gguf" + }, + { + "rfilename": "phi-4-reasoning-plus-q3_k_l.gguf" + }, + { + "rfilename": "phi-4-reasoning-plus-q3_k_m.gguf" + }, + { + "rfilename": "phi-4-reasoning-plus-q3_k_s.gguf" + }, + { + "rfilename": "phi-4-reasoning-plus-q4_k_m.gguf" + }, + { + "rfilename": "phi-4-reasoning-plus-q4_k_s.gguf" + }, + { + "rfilename": "phi-4-reasoning-plus-q5_k_m.gguf" + }, + { + "rfilename": "phi-4-reasoning-plus-q5_k_s.gguf" + }, + { + "rfilename": "phi-4-reasoning-plus-q6_k.gguf" + }, + { + "rfilename": "phi-4-reasoning-plus-q8_0.gguf" + }, + { + "rfilename": "phi-4-reasoning-q2_k.gguf" + }, + { + "rfilename": "phi-4-reasoning-q3_k_l.gguf" + }, + { + "rfilename": "phi-4-reasoning-q3_k_m.gguf" + }, + { + "rfilename": "phi-4-reasoning-q3_k_s.gguf" + }, + { + "rfilename": "phi-4-reasoning-q4_k_m.gguf" + }, + { + "rfilename": "phi-4-reasoning-q4_k_s.gguf" + }, + { + "rfilename": "phi-4-reasoning-q5_k_m.gguf" + }, + { + "rfilename": "phi-4-reasoning-q5_k_s.gguf" + }, + { + "rfilename": "phi-4-reasoning-q6_k.gguf" + }, + { + "rfilename": "phi-4-reasoning-q8_0.gguf" } ], "spaces": [], - "tags": ["license:cc-by-nc-4.0", "region:us"], - "usedStorage": 227869888992 + "tags": [ + "gguf", + "cortex.cpp", + "featured", + "text-generation", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 212004788352, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] }, "models": [ { - "id": "command-r:32b-gguf-q8-0", - "size": 34326891584 + "id": "phi-4-reasoning:14b", + "size": 9053115968 }, { - "id": "command-r:35b-gguf", - "size": 21527041888 + "id": "phi-4-reasoning:4b", + "size": 2491874464 }, { - "id": "command-r:32b-gguf-q6-k", - "size": 26505169984 + "id": "phi-4-reasoning:14b-plus", + "size": 9053116000 + } + ] + }, + { + "author": "Internlm", + "id": "cortexso/internlm3-8b-it", + "metadata": { + "_id": "678dcf22fbe4dceca4562d1f", + "author": "cortexso", + "cardData": { + "license": "apache-2.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2025-01-20T04:20:50.000Z", + "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**InternLM** developed and released the [InternLM3-8B-Instruct](https://huggingface.co/internlm/InternLM3-8B-Instruct), an 8-billion parameter instruction-tuned language model designed for general-purpose usage and advanced reasoning tasks. The model delivers state-of-the-art performance on reasoning and knowledge-intensive tasks, outperforming other models like Llama3.1-8B and Qwen2.5-7B. Trained on 4 trillion high-quality tokens, InternLM3 achieves exceptional efficiency, reducing training costs by over 75% compared to other models of similar scale. \n\nThe model features dual operational modes: a deep thinking mode for solving complex reasoning tasks through long chain-of-thought processes and a normal response mode for fluent and interactive user experiences. These capabilities make InternLM3-8B-Instruct ideal for applications in conversational AI, advanced reasoning, and general-purpose language understanding.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Internlm3-8b-it](https://huggingface.co/cortexso/internlm3-8b-it/tree/8b) | `cortex run internlm3-8b-it:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/internlm3-8b-it\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run internlm3-8b-it\n ```\n\n## Credits\n\n- **Author:** InternLM\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/internlm/internlm3-8b-instruct/blob/main/LICENSE.txt)\n- **Papers:** [InternLM2 Technical Report](https://arxiv.org/abs/2403.17297)", + "disabled": false, + "downloads": 229, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "context_length": 32768, + "eos_token": "<|im_end|>", + "total": 8804241408 + }, + "id": "cortexso/internlm3-8b-it", + "lastModified": "2025-03-03T05:57:41.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/internlm3-8b-it", + "pipeline_tag": "text-generation", + "private": false, + "sha": "957eb6aa16a10eda3ce1a87dcacfd99bda5c469a", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "internlm3-8b-instruct-q2_k.gguf" + }, + { + "rfilename": "internlm3-8b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "internlm3-8b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "internlm3-8b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "internlm3-8b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "internlm3-8b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "internlm3-8b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "internlm3-8b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "internlm3-8b-instruct-q6_k.gguf" + }, + { + "rfilename": "internlm3-8b-instruct-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2403.17297", + "license:apache-2.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 56027406208, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "internlm3-8b-it:8b", + "size": 5358623936 + } + ] + }, + { + "author": "Google", + "id": "cortexso/gemma3", + "metadata": { + "_id": "67d14a4c2e461dfe226bd1be", + "author": "cortexso", + "cardData": { + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp", "featured"] + }, + "createdAt": "2025-03-12T08:48:12.000Z", + "description": "---\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n## Overview\n**Google** developed and released the **Gemma 3** series, featuring multiple model sizes with both pre-trained and instruction-tuned variants. These multimodal models handle both text and image inputs while generating text outputs, making them versatile for various applications. Gemma 3 models are built from the same research and technology used to create the Gemini models, offering state-of-the-art capabilities in a lightweight and accessible format.\n\nThe Gemma 3 models include four different sizes with open weights, providing excellent performance across tasks like question answering, summarization, and reasoning while maintaining efficiency for deployment in resource-constrained environments such as laptops, desktops, or custom cloud infrastructure.\n\n## Variants\n\n### Gemma 3\n| No | Variant | Branch | Cortex CLI command |\n| -- | ------------------------------------------------------ | ------ | ----------------------------- |\n| 1 | [Gemma-3-1B](https://huggingface.co/cortexso/gemma3/tree/1b) | 1b | `cortex run gemma3:1b` |\n| 2 | [Gemma-3-4B](https://huggingface.co/cortexso/gemma3/tree/4b) | 4b | `cortex run gemma3:4b` |\n| 3 | [Gemma-3-12B](https://huggingface.co/cortexso/gemma3/tree/12b) | 12b | `cortex run gemma3:12b` |\n| 4 | [Gemma-3-27B](https://huggingface.co/cortexso/gemma3/tree/27b) | 27b | `cortex run gemma3:27b` |\n\nEach branch contains a default quantized version.\n\n### Key Features\n- **Multimodal capabilities**: Handles both text and image inputs\n- **Large context window**: 128K tokens\n- **Multilingual support**: Over 140 languages\n- **Available in multiple sizes**: From 1B to 27B parameters\n- **Open weights**: For both pre-trained and instruction-tuned variants\n\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/gemma3\n ```\n\n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run gemma3\n ```\n\n## Credits\n- **Author:** Google\n- **Original License:** [Gemma License](https://ai.google.dev/gemma/terms)\n- **Papers:** [Gemma 3 Technical Report](https://storage.googleapis.com/deepmind-media/gemma/Gemma3Report.pdf)", + "disabled": false, + "downloads": 5425, + "gated": false, + "gguf": { + "architecture": "gemma3", + "bos_token": "", + "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'model\n'}}\n{%- endif -%}\n", + "context_length": 131072, + "eos_token": "", + "total": 11765788416 + }, + "id": "cortexso/gemma3", + "lastModified": "2025-05-13T12:45:28.000Z", + "likes": 1, + "model-index": null, + "modelId": "cortexso/gemma3", + "pipeline_tag": "text-generation", + "private": false, + "sha": "289bd96e0dbb2f82e77c56c9c09d66ff76769895", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "gemma-3-12b-it-q2_k.gguf" + }, + { + "rfilename": "gemma-3-12b-it-q3_k_l.gguf" + }, + { + "rfilename": "gemma-3-12b-it-q3_k_m.gguf" + }, + { + "rfilename": "gemma-3-12b-it-q3_k_s.gguf" + }, + { + "rfilename": "gemma-3-12b-it-q4_k_m.gguf" + }, + { + "rfilename": "gemma-3-12b-it-q4_k_s.gguf" + }, + { + "rfilename": "gemma-3-12b-it-q5_k_m.gguf" + }, + { + "rfilename": "gemma-3-12b-it-q5_k_s.gguf" + }, + { + "rfilename": "gemma-3-12b-it-q6_k.gguf" + }, + { + "rfilename": "gemma-3-12b-it-q8_0.gguf" + }, + { + "rfilename": "gemma-3-1b-it-q2_k.gguf" + }, + { + "rfilename": "gemma-3-1b-it-q3_k_l.gguf" + }, + { + "rfilename": "gemma-3-1b-it-q3_k_m.gguf" + }, + { + "rfilename": "gemma-3-1b-it-q3_k_s.gguf" + }, + { + "rfilename": "gemma-3-1b-it-q4_k_m.gguf" + }, + { + "rfilename": "gemma-3-1b-it-q4_k_s.gguf" + }, + { + "rfilename": "gemma-3-1b-it-q5_k_m.gguf" + }, + { + "rfilename": "gemma-3-1b-it-q5_k_s.gguf" + }, + { + "rfilename": "gemma-3-1b-it-q6_k.gguf" + }, + { + "rfilename": "gemma-3-1b-it-q8_0.gguf" + }, + { + "rfilename": "gemma-3-27b-it-q2_k.gguf" + }, + { + "rfilename": "gemma-3-27b-it-q3_k_l.gguf" + }, + { + "rfilename": "gemma-3-27b-it-q3_k_m.gguf" + }, + { + "rfilename": "gemma-3-27b-it-q3_k_s.gguf" + }, + { + "rfilename": "gemma-3-27b-it-q4_k_m.gguf" + }, + { + "rfilename": "gemma-3-27b-it-q4_k_s.gguf" + }, + { + "rfilename": "gemma-3-27b-it-q5_k_m.gguf" + }, + { + "rfilename": "gemma-3-27b-it-q5_k_s.gguf" + }, + { + "rfilename": "gemma-3-27b-it-q6_k.gguf" + }, + { + "rfilename": "gemma-3-27b-it-q8_0.gguf" + }, + { + "rfilename": "gemma-3-4b-it-q2_k.gguf" + }, + { + "rfilename": "gemma-3-4b-it-q3_k_l.gguf" + }, + { + "rfilename": "gemma-3-4b-it-q3_k_m.gguf" + }, + { + "rfilename": "gemma-3-4b-it-q3_k_s.gguf" + }, + { + "rfilename": "gemma-3-4b-it-q4_k_m.gguf" + }, + { + "rfilename": "gemma-3-4b-it-q4_k_s.gguf" + }, + { + "rfilename": "gemma-3-4b-it-q5_k_m.gguf" + }, + { + "rfilename": "gemma-3-4b-it-q5_k_s.gguf" + }, + { + "rfilename": "gemma-3-4b-it-q6_k.gguf" + }, + { + "rfilename": "gemma-3-4b-it-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "featured", + "text-generation", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 280561347040, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "gemma3:4b", + "size": 2489757760 }, { - "id": "command-r:32b-gguf-q5-km", - "size": 23051422784 + "id": "gemma3:27b", + "size": 16546404640 }, { - "id": "command-r:32b-gguf-q4-km", - "size": 19800837184 + "id": "gemma3:12b", + "size": 7300574912 }, { - "id": "command-r:32b-gguf-q5-ks", - "size": 22494366784 + "id": "gemma3:1b", + "size": 806058144 + } + ] + }, + { + "author": "Qwen", + "id": "cortexso/qwen-qwq", + "metadata": { + "_id": "67c909487c87605263db5352", + "author": "cortexso", + "cardData": { + "license": "apache-2.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp", "featured"] + }, + "createdAt": "2025-03-06T02:32:40.000Z", + "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview \n\n**QwQ** is the reasoning model of the **Qwen** series. Unlike conventional instruction-tuned models, **QwQ** is designed to think and reason, achieving significantly enhanced performance in downstream tasks, especially challenging problem-solving scenarios. \n\n**QwQ-32B** is the **medium-sized** reasoning model in the QwQ family, capable of **competitive performance** against state-of-the-art reasoning models, such as **DeepSeek-R1** and **o1-mini**. It is optimized for tasks requiring logical deduction, multi-step reasoning, and advanced comprehension. \n\nThe model is well-suited for **AI research, automated theorem proving, advanced dialogue systems, and high-level decision-making applications**. \n\n## Variants \n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [QwQ-32B](https://huggingface.co/cortexso/qwen-qwq/tree/main) | `cortex run qwen-qwq:32b` | \n\n## Use it with Jan (UI) \n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart) \n2. Use in Jan model Hub: \n ```bash\n cortexso/qwen-qwq\n ``` \n\n## Use it with Cortex (CLI) \n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart) \n2. Run the model with command: \n ```bash\n cortex run qwen-qwq\n ``` \n\n## Credits \n\n- **Author:** Qwen Team \n- **Converter:** [Homebrew](https://www.homebrew.ltd/) \n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/) \n- **Paper:** [Introducing QwQ-32B: The Medium-Sized Reasoning Model](https://qwenlm.github.io/blog/qwq-32b/)", + "disabled": false, + "downloads": 582, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|endoftext|>", + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- '' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" and not message.tool_calls %}\n {%- set content = message.content.split('')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content.split('')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n\\n' }}\n{%- endif %}\n", + "context_length": 131072, + "eos_token": "<|im_end|>", + "total": 32763876352 + }, + "id": "cortexso/qwen-qwq", + "lastModified": "2025-03-13T02:39:51.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/qwen-qwq", + "pipeline_tag": "text-generation", + "private": false, + "sha": "17e393edf64f5ecca3089b4b5822d05a165882bd", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "qwq-32b-q2_k.gguf" + }, + { + "rfilename": "qwq-32b-q3_k_l.gguf" + }, + { + "rfilename": "qwq-32b-q3_k_m.gguf" + }, + { + "rfilename": "qwq-32b-q3_k_s.gguf" + }, + { + "rfilename": "qwq-32b-q4_k_m.gguf" + }, + { + "rfilename": "qwq-32b-q4_k_s.gguf" + }, + { + "rfilename": "qwq-32b-q5_k_m.gguf" + }, + { + "rfilename": "qwq-32b-q5_k_s.gguf" + }, + { + "rfilename": "qwq-32b-q6_k.gguf" + }, + { + "rfilename": "qwq-32b-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "featured", + "text-generation", + "license:apache-2.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 206130754880, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "qwen-qwq:32b", + "size": 19851336224 + } + ] + }, + { + "author": "DeepCogito", + "id": "cortexso/cogito-v1", + "metadata": { + "_id": "67f67ca2c68bea1f264edc11", + "author": "cortexso", + "cardData": { + "license": "apache-2.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp", "featured"] + }, + "createdAt": "2025-04-09T13:56:50.000Z", + "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\n**DeepCogito** introduces the **Cogito-v1 Preview** series, a powerful suite of hybrid reasoning models trained with Iterated Distillation and Amplification (IDA). These models are designed to push the boundaries of open-weight LLMs through scalable alignment and self-improvement strategies, offering unmatched performance across coding, STEM, multilingual, and agentic use cases.\n\nEach model in this series operates in both **standard** (direct answer) and **reasoning** (self-reflective) modes, significantly outperforming size-equivalent open models such as LLaMA, DeepSeek, and Qwen. The 70B variant notably surpasses the newly released LLaMA 4 109B MoE model in benchmarks.\n\n## Variants\n\n### Cogito-v1 Preview\n\n| No | Variant | Branch | Cortex CLI command |\n|----|--------------------------------------------------------------------------------------------------|--------|-----------------------------------------------|\n| 1 | [Cogito-v1-Preview-LLaMA-3B](https://huggingface.co/cortexso/cogito-v1/tree/3b) | 3b | `cortex run cognito-v1:3b` |\n| 2 | [Cogito-v1-Preview-LLaMA-8B](https://huggingface.co/cortexso/cogito-v1/tree/8b) | 8b | `cortex run cognito-v1:8b` |\n| 3 | [Cogito-v1-Preview-Qwen-14B](https://huggingface.co/cortexso/cogito-v1/tree/14b) | 14b | `cortex run cognito-v1:14b` |\n| 4 | [Cogito-v1-Preview-Qwen-32B](https://huggingface.co/cortexso/cogito-v1/tree/32b) | 32b | `cortex run cognito-v1:32b` |\n| 5 | [Cogito-v1-Preview-LLaMA-70B](https://huggingface.co/cortexso/cogito-v1/tree/70b) | 70b | `cortex run cognito-v1:70b` |\n\nEach branch contains a default quantized version:\n- **LLaMA-3B:** q4-km \n- **LLaMA-8B:** q4-km \n- **Qwen-14B:** q4-km \n- **Qwen-32B:** q4-km \n- **LLaMA-70B:** q4-km \n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart) \n2. Use in Jan model Hub: \n ```bash\n deepcogito/cognito-v1\n ```\n## Use it with Cortex (CLI)\n\n1. Install Cortex using [Quickstart](https://cortex.so/)\n2. Run the model with command:\n ```bash\n cortex run cognito-v1\n ```\n\n## Credits\n\n- **Author:** DeepCogito\n- **Original License:** [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n- **Papers:** [Cognito v1 Preview](https://www.deepcogito.com/research/cogito-v1-preview)", + "disabled": false, + "downloads": 4045, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- if not enable_thinking is defined %}\n {%- set enable_thinking = false %}\n{%- endif %}\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n{#- Set the system message. If enable_thinking is true, add the \"Enable deep thinking subroutine.\" #}\n{%- if enable_thinking %}\n {%- if system_message != \"\" %}\n {%- set system_message = \"Enable deep thinking subroutine.\n\n\" ~ system_message %}\n {%- else %}\n {%- set system_message = \"Enable deep thinking subroutine.\" %}\n {%- endif %}\n{%- endif %}\n{#- Set the system message. In case there are tools present, add them to the system message. #}\n{%- if tools is not none or system_message != '' %}\n {{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n {{- system_message }}\n {%- if tools is not none %}\n {%- if system_message != \"\" %}\n {{- \"\n\n\" }}\n {%- endif %}\n {{- \"Available Tools:\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n{%- endif %}\n\n{#- Rest of the messages #}\n{%- for message in messages %}\n {#- The special cases are when the message is from a tool (via role ipython/tool/tool_results) or when the message is from the assistant, but has \"tool_calls\". If not, we add the message directly as usual. #}\n {#- Case 1 - Usual, non tool related message. #}\n {%- if not (message.role == \"ipython\" or message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}\n {%- if message['content'] is string %}\n {{- message['content'] | trim }}\n {%- else %}\n {%- for item in message['content'] %}\n {%- if item.type == 'text' %}\n {{- item.text | trim }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '<|eot_id|>' }}\n \n {#- Case 2 - the response is from the assistant, but has a tool call returned. The assistant may also have returned some content along with the tool call. #}\n {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n {{- \"<|start_header_id|>assistant<|end_header_id|>\n\n\" }}\n {%- if message['content'] is string %}\n {{- message['content'] | trim }}\n {%- else %}\n {%- for item in message['content'] %}\n {%- if item.type == 'text' %}\n {{- item.text | trim }}\n {%- if item.text | trim != \"\" %}\n {{- \"\n\n\" }}\n {%- endif %}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- \"[\" }}\n {%- for tool_call in message.tool_calls %}\n {%- set out = tool_call.function|tojson %}\n {%- if not tool_call.id is defined %}\n {{- out }}\n {%- else %}\n {{- out[:-1] }}\n {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n {%- endif %}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]<|eot_id|>\" }}\n {%- endif %}\n {%- endfor %}\n \n {#- Case 3 - the response is from a tool call. The tool call may have an id associated with it as well. If it does, we add it to the prompt. #}\n {%- elif message.role == \"ipython\" or message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n {%- if message.tool_call_id is defined and message.tool_call_id != '' %}\n {{- '{\"content\": ' + (message.content | tojson) + ', \"call_id\": \"' + message.tool_call_id + '\"}' }}\n {%- else %}\n {{- '{\"content\": ' + (message.content | tojson) + '}' }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}", + "context_length": 131072, + "eos_token": "<|eot_id|>", + "total": 3606752320 + }, + "id": "cortexso/cogito-v1", + "lastModified": "2025-04-10T03:02:13.000Z", + "likes": 3, + "model-index": null, + "modelId": "cortexso/cogito-v1", + "pipeline_tag": "text-generation", + "private": false, + "sha": "7e55c8c2946b9b48c606431e7a2eaf299c15b80d", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "cogito-v1-preview-llama-3b-q2_k.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-3b-q3_k_l.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-3b-q3_k_m.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-3b-q3_k_s.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-3b-q4_k_m.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-3b-q4_k_s.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-3b-q5_k_m.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-3b-q5_k_s.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-3b-q6_k.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-3b-q8_0.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-70b-q4_k_m.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-8b-q2_k.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-8b-q3_k_l.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-8b-q3_k_m.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-8b-q3_k_s.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-8b-q4_k_m.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-8b-q4_k_s.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-8b-q5_k_m.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-8b-q5_k_s.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-8b-q6_k.gguf" + }, + { + "rfilename": "cogito-v1-preview-llama-8b-q8_0.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-14b-q2_k.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-14b-q3_k_l.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-14b-q3_k_m.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-14b-q3_k_s.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-14b-q4_k_m.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-14b-q4_k_s.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-14b-q5_k_m.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-14b-q5_k_s.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-14b-q6_k.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-14b-q8_0.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-32b-q2_k.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-32b-q3_k_l.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-32b-q3_k_m.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-32b-q3_k_s.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-32b-q4_k_m.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-32b-q4_k_s.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-32b-q5_k_m.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-32b-q5_k_s.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-32b-q6_k.gguf" + }, + { + "rfilename": "cogito-v1-preview-qwen-32b-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "featured", + "text-generation", + "license:apache-2.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 417094614784, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "cogito-v1:8b", + "size": 4920738752 }, { - "id": "command-r:32b-gguf-q2-k", - "size": 12810767424 + "id": "cogito-v1:70b", + "size": 42520398016 }, { - "id": "command-r:32b-gguf-q3-kl", - "size": 17563438144 + "id": "cogito-v1:3b", + "size": 2241004384 }, { - "id": "command-r:gguf", - "size": 21527041888 + "id": "cogito-v1:32b", + "size": 19848503488 }, { - "id": "command-r:32b-gguf-q3-ks", - "size": 14708689984 + "id": "cogito-v1:14b", + "size": 8985277888 + } + ] + }, + { + "author": "ibm-granite", + "id": "cortexso/granite-3.2-it", + "metadata": { + "_id": "67ab23c8e77c0a1c32f62879", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2025-02-11T10:17:44.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\nGranite-3.2-it is an advanced AI language model derived from the IBM Granite framework, specifically designed for instruction-following tasks in Italian. Its primary purpose is to facilitate human-like interactions by understanding and generating responses that are contextually relevant and coherent. This model can be effectively utilized in various applications, including customer support, content creation, and language translation, enhancing communication efficiency across diverse sectors. Its performance demonstrates a strong ability to comprehend nuanced instructions and generate accurate outputs, making it suitable for professional and creative environments alike. Overall, Granite-3.2-it stands out for its adaptability, responsiveness, and proficiency in Italian language tasks.\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Granite-3.2-it-8b](https://huggingface.co/cortexso/granite-3.2-it/tree/8b) | cortex run granite-3.2-it:8b|\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/granite-3.2-it\n ```\n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run granite-3.2-it\n ```\n## Credits\n- **Author:** ibm-granite\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://www.apache.org/licenses/LICENSE-2.0)\n- **Paper:** [IBM Granite 3.2 Blog](https://www.ibm.com/new/announcements/ibm-granite-3-2-open-source-reasoning-and-vision)", + "disabled": false, + "downloads": 352, + "gated": false, + "gguf": { + "architecture": "granite", + "bos_token": "<|end_of_text|>", + "chat_template": "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"Knowledge Cutoff Date: April 2024.\nToday's Date: \" + strftime_now('%B %d, %Y') + \".\nYou are Granite, developed by IBM.\" %}\n {%- if tools and documents %}\n {%- set system_message = system_message + \" You are a helpful AI assistant with access to the following tools. When a tool is required to answer the user's query, respond with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.\n\nWrite the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data.\" %}\n {%- elif tools %}\n {%- set system_message = system_message + \" You are a helpful AI assistant with access to the following tools. When a tool is required to answer the user's query, respond with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.\" %}\n {%- elif documents %}\n {%- set system_message = system_message + \" Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data.\" %}\n {%- elif thinking %}\n {%- set system_message = system_message + \" You are a helpful AI assistant.\nRespond to every user query in a comprehensive and detailed way. You can write down your thoughts and reasoning process before responding. In the thought process, engage in a comprehensive cycle of analysis, summarization, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. In the response section, based on various attempts, explorations, and reflections from the thoughts section, systematically present the final solution that you deem correct. The response should summarize the thought process. Write your thoughts after 'Here is my thought process:' and write your response after 'Here is my response:' for each user query.\" %}\n {%- else %}\n {%- set system_message = system_message + \" You are a helpful AI assistant.\" %} \n {%- endif %}\n {%- if 'citations' in controls and documents %}\n {%- set system_message = system_message + '\n\nIn your response, use the symbols and to indicate when a fact comes from a document in the search result, e.g 0 for a fact from document 0. Afterwards, list all the citations with their corresponding documents in an ordered list.' %}\n {%- endif %}\n {%- if 'hallucinations' in controls and documents %}\n {%- set system_message = system_message + '\n\nFinally, after the response is written, include a numbered list of sentences from the response that are potentially hallucinated and not based in the documents.' %}\n {%- endif %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{{- '<|start_of_role|>system<|end_of_role|>' + system_message + '<|end_of_text|>\n' }}\n{%- if tools %}\n {{- '<|start_of_role|>tools<|end_of_role|>' }}\n {{- tools | tojson(indent=4) }}\n {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- if documents %}\n {{- '<|start_of_role|>documents<|end_of_role|>' }}\n {%- for document in documents %}\n {{- 'Document ' + loop.index0 | string + '\n' }}\n {{- document['text'] }}\n {%- if not loop.last %}\n {{- '\n\n'}}\n {%- endif%}\n {%- endfor %}\n {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in loop_messages %}\n {{- '<|start_of_role|>' + message['role'] + '<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- if loop.last and add_generation_prompt %}\n {{- '<|start_of_role|>assistant' }}\n {%- if controls %}\n {{- ' ' + controls | tojson()}}\n {%- endif %}\n {{- '<|end_of_role|>' }}\n {%- endif %}\n{%- endfor %}", + "context_length": 131072, + "eos_token": "<|end_of_text|>", + "total": 8170848256 + }, + "id": "cortexso/granite-3.2-it", + "lastModified": "2025-03-03T02:11:18.000Z", + "likes": 1, + "model-index": null, + "modelId": "cortexso/granite-3.2-it", + "pipeline_tag": "text-generation", + "private": false, + "sha": "2fb3d81e43760500c0ad28f9b7d047c75abc16dd", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "granite-3.2-8b-instruct-q2_k.gguf" + }, + { + "rfilename": "granite-3.2-8b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "granite-3.2-8b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "granite-3.2-8b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "granite-3.2-8b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "granite-3.2-8b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "granite-3.2-8b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "granite-3.2-8b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "granite-3.2-8b-instruct-q6_k.gguf" + }, + { + "rfilename": "granite-3.2-8b-instruct-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 56447768704, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "granite-3.2-it:8b", + "size": 4942859456 + } + ] + }, + { + "author": "allenai", + "id": "cortexso/olmo-2", + "metadata": { + "_id": "6746c45ca0de7ab99efe78d5", + "author": "cortexso", + "cardData": { + "license": "other", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-11-27T07:03:56.000Z", + "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\nOLMo-2 is a series of Open Language Models designed to enable the science of language models. These models are trained on the Dolma dataset, with all code, checkpoints, logs (coming soon), and associated training details made openly available.\n\nThe OLMo-2 13B Instruct November 2024 is a post-trained variant of the OLMo-2 13B model, which has undergone supervised fine-tuning on an OLMo-specific variant of the Tülu 3 dataset. Additional training techniques include Direct Preference Optimization (DPO) and Reinforcement Learning from Virtual Rewards (RLVR), optimizing it for state-of-the-art performance across various tasks, including chat, MATH, GSM8K, and IFEval.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Olmo-2-7b](https://huggingface.co/cortexso/olmo-2/tree/7b) | `cortex run olmo-2:7b` |\n| 2 | [Olmo-2-13b](https://huggingface.co/cortexso/olmo-2/tree/13b) | `cortex run olmo-2:13b` |\n| 3 | [Olmo-2-32b](https://huggingface.co/cortexso/olmo-2/tree/32b) | `cortex run olmo-2:32b` |\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/olmo-2\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run olmo-2\n ```\n \n## Credits\n\n- **Author:** allenai\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Paper](https://arxiv.org/abs/2501.00656)", + "disabled": false, + "downloads": 352, + "gated": false, + "gguf": { + "architecture": "olmo2", + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|assistant|>\n' + message['content'] + eos_token + '\n' }}{% else %}{{ '<|assistant|>\n' + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}{% endfor %}", + "context_length": 4096, + "eos_token": "<|endoftext|>", + "total": 32234279936 + }, + "id": "cortexso/olmo-2", + "lastModified": "2025-03-14T03:06:15.000Z", + "likes": 1, + "model-index": null, + "modelId": "cortexso/olmo-2", + "pipeline_tag": "text-generation", + "private": false, + "sha": "b76f7629d2da0ccc9535845bab99291e317de088", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "olmo-2-0325-32b-instruct-q2_k.gguf" + }, + { + "rfilename": "olmo-2-0325-32b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "olmo-2-0325-32b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "olmo-2-0325-32b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "olmo-2-0325-32b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "olmo-2-0325-32b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "olmo-2-0325-32b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "olmo-2-0325-32b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "olmo-2-0325-32b-instruct-q6_k.gguf" + }, + { + "rfilename": "olmo-2-0325-32b-instruct-q8_0.gguf" + }, + { + "rfilename": "olmo-2-1124-13b-instruct-q2_k.gguf" + }, + { + "rfilename": "olmo-2-1124-13b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "olmo-2-1124-13b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "olmo-2-1124-13b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "olmo-2-1124-13b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "olmo-2-1124-13b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "olmo-2-1124-13b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "olmo-2-1124-13b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "olmo-2-1124-13b-instruct-q6_k.gguf" + }, + { + "rfilename": "olmo-2-1124-13b-instruct-q8_0.gguf" + }, + { + "rfilename": "olmo-2-1124-7b-instruct-q2_k.gguf" + }, + { + "rfilename": "olmo-2-1124-7b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "olmo-2-1124-7b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "olmo-2-1124-7b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "olmo-2-1124-7b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "olmo-2-1124-7b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "olmo-2-1124-7b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "olmo-2-1124-7b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "olmo-2-1124-7b-instruct-q6_k.gguf" + }, + { + "rfilename": "olmo-2-1124-7b-instruct-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2501.00656", + "license:other", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 335683989120, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "olmo-2:32b", + "size": 19482558496 }, { - "id": "command-r:32b-gguf-q3-km", - "size": 16231746624 + "id": "olmo-2:13b", + "size": 8354349408 }, { - "id": "command-r:32b-gguf-q4-ks", - "size": 18849516608 + "id": "olmo-2:7b", + "size": 4472020160 + } + ] + }, + { + "author": "Microsoft", + "id": "cortexso/phi-4", + "metadata": { + "_id": "677f682eb2e41c2f45dbee73", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2025-01-09T06:09:50.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\nPhi-4 model, a state-of-the-art 14B parameter Transformer designed for advanced reasoning, conversational AI, and high-quality text generation. Built on a mix of synthetic datasets, filtered public domain content, academic books, and Q&A datasets, Phi-4 ensures exceptional performance through data quality and alignment. It features a 16K token context length, trained on 9.8T tokens over 21 days using 1920 H100-80G GPUs. Phi-4 underwent rigorous fine-tuning and preference optimization to enhance instruction adherence and safety. Released on December 12, 2024, it represents a static model with data cutoff as of June 2024, suitable for diverse applications in research and dialogue systems.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Phi-4-14b](https://huggingface.co/cortexso/phi-4/tree/14b) | `cortex run phi-4:14b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```text\n cortexso/phi-4\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run phi-4\n ```\n\n## Credits\n\n- **Author:** Microsoft Research\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/microsoft/phi-4/blob/main/LICENSE)\n- **Papers:** [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)", + "disabled": false, + "downloads": 463, + "gated": false, + "gguf": { + "architecture": "phi3", + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}", + "context_length": 16384, + "eos_token": "<|im_end|>", + "total": 14659507200 + }, + "id": "cortexso/phi-4", + "lastModified": "2025-03-02T15:30:47.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/phi-4", + "pipeline_tag": "text-generation", + "private": false, + "sha": "cc1f8271734a2ac438a1a7c60a62f111b9476524", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "phi-4-q2_k.gguf" + }, + { + "rfilename": "phi-4-q3_k_l.gguf" + }, + { + "rfilename": "phi-4-q3_k_m.gguf" + }, + { + "rfilename": "phi-4-q3_k_s.gguf" + }, + { + "rfilename": "phi-4-q4_k_m.gguf" + }, + { + "rfilename": "phi-4-q4_k_s.gguf" + }, + { + "rfilename": "phi-4-q5_k_m.gguf" + }, + { + "rfilename": "phi-4-q5_k_s.gguf" + }, + { + "rfilename": "phi-4-q6_k.gguf" + }, + { + "rfilename": "phi-4-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2412.08905", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 93205915520, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "phi-4:14b", + "size": 9053114560 + } + ] + }, + { + "author": "MistralAI", + "id": "cortexso/mistral-small-24b", + "metadata": { + "_id": "679c3a8f4061a1ab60e703b7", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2025-01-31T02:50:55.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\nThe 'mistral-small-24b' model is an advanced AI language model optimized for a variety of natural language processing tasks. It is particularly well-suited for applications such as text generation, chatbots, content summarization, and language translation. Built on the foundation of 'mistralai/Mistral-Small-24B-Base-2501', it leverages state-of-the-art techniques for understanding and generating human-like text. Users can expect significant improvements in fluency and contextual relevance, making it effective for both professional and creative use cases. Its efficiency allows for deployment in resource-constrained environments, catering to a diverse range of industries and applications.\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Mistral-Small-24b](https://huggingface.co/cortexso/mistral-small-24b/tree/24b) | cortex run mistral-small-24b:24b |\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n \n ```bash\n cortexso/mistral-small-24b\n ```\n \n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n \n ```bash\n cortex run mistral-small-24b\n ```\n \n## Credits\n- **Author:** mistralai\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)\n- **Paper:** [Mistral Small 3 Blog](https://mistral.ai/news/mistral-small-3)", + "disabled": false, + "downloads": 683, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "", + "context_length": 32768, + "eos_token": "", + "total": 23572403200 + }, + "id": "cortexso/mistral-small-24b", + "lastModified": "2025-03-03T06:09:47.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/mistral-small-24b", + "pipeline_tag": "text-generation", + "private": false, + "sha": "5a28cb4b0f1aa4e0b55f527b71c88eb5b56ebd71", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "mistral-small-24b-base-2501-q2_k.gguf" + }, + { + "rfilename": "mistral-small-24b-base-2501-q3_k_l.gguf" + }, + { + "rfilename": "mistral-small-24b-base-2501-q3_k_m.gguf" + }, + { + "rfilename": "mistral-small-24b-base-2501-q3_k_s.gguf" + }, + { + "rfilename": "mistral-small-24b-base-2501-q4_k_m.gguf" + }, + { + "rfilename": "mistral-small-24b-base-2501-q4_k_s.gguf" + }, + { + "rfilename": "mistral-small-24b-base-2501-q5_k_m.gguf" + }, + { + "rfilename": "mistral-small-24b-base-2501-q5_k_s.gguf" + }, + { + "rfilename": "mistral-small-24b-base-2501-q6_k.gguf" + }, + { + "rfilename": "mistral-small-24b-base-2501-q8_0.gguf" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:mit", + "endpoints_compatible", + "region:us" + ], + "usedStorage": 148517729600, + "widgetData": [ + { + "text": "My name is Julien and I like to" + }, + { + "text": "I like traveling by train because" + }, + { + "text": "Paris is an amazing place to visit," + }, + { + "text": "Once upon a time," + } + ] + }, + "models": [ + { + "id": "mistral-small-24b:24b", + "size": 14333907488 + } + ] + }, + { + "author": "DeepSeek-AI", + "id": "cortexso/deepseek-r1-distill-qwen-7b", + "metadata": { + "_id": "6790a5b2044aeb2bd5922877", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2025-01-22T08:00:50.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Qwen 7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) model, a distilled version of the Qwen 7B language model. This version is fine-tuned for high-performance text generation and optimized for dialogue and information-seeking tasks, providing even greater capabilities with its larger size compared to the 7B variant.\n\nThe model is designed for applications in customer support, conversational AI, and research, focusing on delivering accurate, helpful, and safe outputs while maintaining efficiency.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-qwen-7b-7b](https://huggingface.co/cortexso/deepseek-r1-distill-qwen-7b/tree/7b) | `cortex run deepseek-r1-distill-qwen-7b:7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/deepseek-r1-distill-qwen-7b\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run deepseek-r1-distill-qwen-7b\n ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)", + "disabled": false, + "downloads": 1008, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|begin▁of▁sentence|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\\n'}}{% endif %}", + "context_length": 131072, + "eos_token": "<|end▁of▁sentence|>", + "total": 7615616512 + }, + "id": "cortexso/deepseek-r1-distill-qwen-7b", + "lastModified": "2025-03-03T06:27:42.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/deepseek-r1-distill-qwen-7b", + "pipeline_tag": "text-generation", + "private": false, + "sha": "8e256fee6ed3616f3f90b0eb453083a115f1fe40", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "deepseek-r1-distill-qwen-7b-q2_k.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_l.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-7b-q4_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-7b-q4_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-7b-q5_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-7b-q5_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-7b-q6_k.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-7b-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 53341802656, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "deepseek-r1-distill-qwen-7b:7b", + "size": 4683073184 + } + ] + }, + { + "author": "DeepSeek-AI", + "id": "cortexso/deepseek-r1-distill-qwen-14b", + "metadata": { + "_id": "678fdf2be186002cc0ba006e", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2025-01-21T17:53:47.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Qwen 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) model, a distilled version of the Qwen 14B language model. This variant represents the largest and most powerful model in the DeepSeek R1 Distill series, fine-tuned for high-performance text generation, dialogue optimization, and advanced reasoning tasks. \n\nThe model is designed for applications that require extensive understanding, such as conversational AI, research, large-scale knowledge systems, and customer service, providing superior performance in accuracy, efficiency, and safety.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-qwen-14b-14b](https://huggingface.co/cortexso/deepseek-r1-distill-qwen-14b/tree/14b) | `cortex run deepseek-r1-distill-qwen-14b:14b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/deepseek-r1-distill-qwen-14b\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run deepseek-r1-distill-qwen-14b\n ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)", + "disabled": false, + "downloads": 1261, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|begin▁of▁sentence|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\\n'}}{% endif %}", + "context_length": 131072, + "eos_token": "<|end▁of▁sentence|>", + "total": 14770033664 + }, + "id": "cortexso/deepseek-r1-distill-qwen-14b", + "lastModified": "2025-03-03T06:40:22.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/deepseek-r1-distill-qwen-14b", + "pipeline_tag": "text-generation", + "private": false, + "sha": "ca42c63b1c148ac7be176ef0ed8384d3775bed5b", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "deepseek-r1-distill-qwen-14b-q2_k.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_l.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-14b-q4_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-14b-q4_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-14b-q5_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-14b-q5_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-14b-q6_k.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-14b-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 102845421536, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "deepseek-r1-distill-qwen-14b:14b", + "size": 8988109920 + } + ] + }, + { + "author": "DeepSeek-AI", + "id": "cortexso/deepseek-r1-distill-qwen-32b", + "metadata": { + "_id": "678fe132df84bd3d94f37e58", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2025-01-21T18:02:26.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Qwen 32B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) model, a distilled version of the Qwen 32B language model. This is the most advanced and largest model in the DeepSeek R1 Distill family, offering unparalleled performance in text generation, dialogue optimization, and reasoning tasks. \n\nThe model is tailored for large-scale applications in conversational AI, research, enterprise solutions, and knowledge systems, delivering exceptional accuracy, efficiency, and safety at scale.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-qwen-32b-32b](https://huggingface.co/cortexso/deepseek-r1-distill-qwen-32b/tree/32b) | `cortex run deepseek-r1-distill-qwen-32b:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/deepseek-r1-distill-qwen-32b\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run deepseek-r1-distill-qwen-32b\n ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)", + "disabled": false, + "downloads": 597, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|begin▁of▁sentence|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\\n'}}{% endif %}", + "context_length": 131072, + "eos_token": "<|end▁of▁sentence|>", + "total": 32763876352 + }, + "id": "cortexso/deepseek-r1-distill-qwen-32b", + "lastModified": "2025-03-03T06:41:05.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/deepseek-r1-distill-qwen-32b", + "pipeline_tag": "text-generation", + "private": false, + "sha": "0ec9981b2b5ad5c04a5357a3c328f10735efc79a", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "deepseek-r1-distill-qwen-32b-q2_k.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_l.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-32b-q4_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-32b-q4_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-32b-q5_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-32b-q5_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-32b-q6_k.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-32b-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 225982083296, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "deepseek-r1-distill-qwen-32b:32b", + "size": 19851335520 + } + ] + }, + { + "author": "DeepSeek-AI", + "id": "cortexso/deepseek-r1-distill-llama-70b", + "metadata": { + "_id": "678fe1673b0a6384a4e1f887", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2025-01-21T18:03:19.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Llama 70B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B) model, a distilled version of the Llama 70B language model. This model represents the pinnacle of the DeepSeek R1 Distill series, designed for exceptional performance in text generation, dialogue tasks, and advanced reasoning, offering unparalleled capabilities for large-scale AI applications.\n\nThe model is ideal for enterprise-grade applications, research, conversational AI, and large-scale knowledge systems, providing top-tier accuracy, safety, and efficiency.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-llama-70b-70b](https://huggingface.co/cortexso/deepseek-r1-distill-llama-70b/tree/70b) | `cortex run deepseek-r1-distill-llama-70b:70b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/deepseek-r1-distill-llama-70b\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run deepseek-r1-distill-llama-70b\n ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)", + "disabled": false, + "downloads": 580, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "<|begin▁of▁sentence|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\\n'}}{% endif %}", + "context_length": 131072, + "eos_token": "<|end▁of▁sentence|>", + "total": 70553706560 + }, + "id": "cortexso/deepseek-r1-distill-llama-70b", + "lastModified": "2025-03-03T06:42:21.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/deepseek-r1-distill-llama-70b", + "pipeline_tag": "text-generation", + "private": false, + "sha": "d03fa1c83966573864075845a4b493af9aa8ed53", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "deepseek-r1-distill-llama-70b-q4_k_m.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 85040791136, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "deepseek-r1-distill-llama-70b:70b", + "size": 42520395584 + } + ] + }, + { + "author": "DeepSeek-AI", + "id": "cortexso/deepseek-r1-distill-llama-8b", + "metadata": { + "_id": "678f4b5625a9b93997f1f666", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2025-01-21T07:23:02.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Llama 8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) model, a distilled version of the Llama 8B language model. This variant is fine-tuned for high-performance text generation, optimized for dialogue, and tailored for information-seeking tasks. It offers a robust balance between model size and performance, making it suitable for demanding conversational AI and research use cases.\n\nThe model is designed to deliver accurate, efficient, and safe responses in applications such as customer support, knowledge systems, and research environments.\n\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-llama-8b-8b](https://huggingface.co/cortexso/deepseek-r1-distill-llama-8b/tree/8b) | `cortex run deepseek-r1-distill-llama-8b:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/deepseek-r1-distill-llama-8b\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run deepseek-r1-distill-llama-8b\n ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)", + "disabled": false, + "downloads": 933, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "<|begin▁of▁sentence|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\\n'}}{% endif %}", + "context_length": 131072, + "eos_token": "<|end▁of▁sentence|>", + "total": 8030261312 + }, + "id": "cortexso/deepseek-r1-distill-llama-8b", + "lastModified": "2025-03-03T06:33:03.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/deepseek-r1-distill-llama-8b", + "pipeline_tag": "text-generation", + "private": false, + "sha": "b3321ad8a700b3aa2c3fc44ac84a167bd11ecdb8", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "deepseek-r1-distill-llama-8b-q2_k.gguf" + }, + { + "rfilename": "deepseek-r1-distill-llama-8b-q3_k_l.gguf" + }, + { + "rfilename": "deepseek-r1-distill-llama-8b-q3_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-llama-8b-q3_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-llama-8b-q4_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-llama-8b-q4_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-llama-8b-q5_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-llama-8b-q5_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-llama-8b-q6_k.gguf" + }, + { + "rfilename": "deepseek-r1-distill-llama-8b-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 56187723232, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "deepseek-r1-distill-llama-8b:8b", + "size": 4920736256 + } + ] + }, + { + "author": "NovaSky-AI", + "id": "cortexso/sky-t1", + "metadata": { + "_id": "6782f82c860ee02fe01dbd60", + "author": "cortexso", + "cardData": { + "license": "apache-2.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2025-01-11T23:01:00.000Z", + "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**NovaSky Team** developed and released the [Sky-T1](https://huggingface.co/novasky-ai/Sky-T1-32B-Preview), a 32-billion parameter reasoning model adapted from Qwen2.5-32B-Instruct. This model is designed for advanced reasoning, coding, and mathematical tasks, achieving performance comparable to state-of-the-art models like o1-preview while being cost-efficient. Sky-T1 was trained on 17K verified responses from Qwen/QwQ-32B-Preview, with additional science data from the Still-2 dataset, ensuring high-quality and diverse learning sources.\n\nThe model supports complex reasoning via long chain-of-thought processes and excels in both coding and mathematical challenges. Utilizing Llama-Factory with DeepSpeed Zero-3 Offload, Sky-T1 training was completed in just 19 hours on 8 H100 GPUs, demonstrating efficient resource utilization. These capabilities make Sky-T1 an exceptional tool for applications in programming, academic research, and reasoning-intensive tasks.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Sky-t1-32b](https://huggingface.co/cortexso/sky-t1/tree/32b) | `cortex run sky-t1:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/sky-t1\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run sky-t1\n ```\n\n## Credits\n\n- **Author:** NovaSky Team\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Sky-T1: Fully Open-Source Reasoning Model](https://novasky-ai.github.io/posts/sky-t1/)", + "disabled": false, + "downloads": 116, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|endoftext|>", + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "context_length": 32768, + "eos_token": "<|im_end|>", + "total": 32763876352 + }, + "id": "cortexso/sky-t1", + "lastModified": "2025-03-03T05:51:45.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/sky-t1", + "pipeline_tag": "text-generation", + "private": false, + "sha": "05f06ab0191808f8eb21fa3c60c9ec4a6bef4978", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "sky-t1-32b-preview-q2_k.gguf" + }, + { + "rfilename": "sky-t1-32b-preview-q3_k_l.gguf" + }, + { + "rfilename": "sky-t1-32b-preview-q3_k_m.gguf" + }, + { + "rfilename": "sky-t1-32b-preview-q3_k_s.gguf" + }, + { + "rfilename": "sky-t1-32b-preview-q4_k_m.gguf" + }, + { + "rfilename": "sky-t1-32b-preview-q4_k_s.gguf" + }, + { + "rfilename": "sky-t1-32b-preview-q5_k_m.gguf" + }, + { + "rfilename": "sky-t1-32b-preview-q5_k_s.gguf" + }, + { + "rfilename": "sky-t1-32b-preview-q6_k.gguf" + }, + { + "rfilename": "sky-t1-32b-preview-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:apache-2.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 225982094944, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "sky-t1:32b", + "size": 19851336576 + } + ] + }, + { + "author": "CohereForAI", + "id": "cortexso/aya", + "metadata": { + "_id": "672aa4167f36760042e632ed", + "author": "cortexso", + "cardData": { + "license": "cc-by-nc-4.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-11-05T23:02:46.000Z", + "description": "---\nlicense: cc-by-nc-4.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**Cohere For AI** developed and released the [Aya 23](https://huggingface.co/CohereForAI/aya-23-35B), an open weights instruction fine-tuned model with advanced multilingual capabilities. Aya 23 is built upon the highly performant Command family of models and fine-tuned using the Aya Collection to deliver state-of-the-art performance across 23 languages. This multilingual large language model is designed to support a wide range of use cases, including multilingual text generation, understanding, and translation tasks.\n\nAya 23, balancing efficiency and performance. It offers robust multilingual support for languages such as Arabic, Chinese, English, Spanish, Hindi, Vietnamese, and more, making it a versatile tool for global applications. A 35-billion parameter version is also available [here](https://huggingface.co/CohereForAI/aya-23-35b).\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Aya-8b](https://huggingface.co/cortexso/aya/tree/8b) | `cortex run aya:8b` |\n| 2 | [Aya-35b](https://huggingface.co/cortexso/aya/tree/35b) | `cortex run aya:35b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/aya\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run aya\n ```\n\n## Credits\n\n- **Author:** Cohere For AI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://spdx.org/licenses/CC-BY-NC-4.0)", + "disabled": false, + "downloads": 168, + "gated": false, + "gguf": { + "architecture": "command-r", + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Aya, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}", + "context_length": 8192, + "eos_token": "<|END_OF_TURN_TOKEN|>", + "total": 34980831232 + }, + "id": "cortexso/aya", + "lastModified": "2025-03-02T14:58:34.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/aya", + "pipeline_tag": "text-generation", + "private": false, + "sha": "d97fef50adc54a22ec1e3133771f7cb17528742b", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "aya-23-35b-q2_k.gguf" + }, + { + "rfilename": "aya-23-35b-q3_k_l.gguf" + }, + { + "rfilename": "aya-23-35b-q3_k_m.gguf" + }, + { + "rfilename": "aya-23-35b-q3_k_s.gguf" + }, + { + "rfilename": "aya-23-35b-q4_k_m.gguf" + }, + { + "rfilename": "aya-23-35b-q4_k_s.gguf" + }, + { + "rfilename": "aya-23-35b-q5_k_m.gguf" + }, + { + "rfilename": "aya-23-35b-q5_k_s.gguf" + }, + { + "rfilename": "aya-23-35b-q6_k.gguf" + }, + { + "rfilename": "aya-23-35b-q8_0.gguf" + }, + { + "rfilename": "aya-23-8b-q2_k.gguf" + }, + { + "rfilename": "aya-23-8b-q3_k_l.gguf" + }, + { + "rfilename": "aya-23-8b-q3_k_m.gguf" + }, + { + "rfilename": "aya-23-8b-q3_k_s.gguf" + }, + { + "rfilename": "aya-23-8b-q4_k_m.gguf" + }, + { + "rfilename": "aya-23-8b-q4_k_s.gguf" + }, + { + "rfilename": "aya-23-8b-q5_k_m.gguf" + }, + { + "rfilename": "aya-23-8b-q5_k_s.gguf" + }, + { + "rfilename": "aya-23-8b-q6_k.gguf" + }, + { + "rfilename": "aya-23-8b-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:cc-by-nc-4.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 302730192928, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "aya:35b", + "size": 21527043520 + }, + { + "id": "aya:8b", + "size": 5056974496 + } + ] + }, + { + "author": "PowerInfer", + "id": "cortexso/small-thinker", + "metadata": { + "_id": "6777192582e1ec3ecb79d1a4", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2025-01-02T22:54:29.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**PowerInfer** developed and released the [SmallThinker-3B-preview](https://huggingface.co/PowerInfer/SmallThinker-3B-Preview), a fine-tuned version of the Qwen2.5-3B-Instruct model. SmallThinker is optimized for efficient deployment on resource-constrained devices while maintaining high performance in reasoning, coding, and general text generation tasks. It outperforms its base model on key benchmarks, including AIME24, AMC23, and GAOKAO2024, making it a robust tool for both edge deployment and as a draft model for larger systems like QwQ-32B-Preview.\n\nSmallThinker was fine-tuned in two phases using high-quality datasets, including PowerInfer/QWQ-LONGCOT-500K and PowerInfer/LONGCOT-Refine-500K. Its small size allows for up to 70% faster inference speeds compared to larger models, making it ideal for applications requiring quick responses and efficient computation.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Small-thinker-3b](https://huggingface.co/cortexso/small-thinker/tree/3b) | `cortex run small-thinker:3b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/small-thinker\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run small-thinker\n ```\n\n## Credits\n\n- **Author:** PowerInfer\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/PowerInfer/SmallThinker-3B-Preview/blob/main/LICENSE)", + "disabled": false, + "downloads": 273, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|endoftext|>", + "chat_template": "{% set system_message = 'You are a helpful assistant.' %}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}", + "context_length": 32768, + "eos_token": "<|im_end|>", + "total": 3397103616 + }, + "id": "cortexso/small-thinker", + "lastModified": "2025-03-03T06:05:50.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/small-thinker", + "pipeline_tag": "text-generation", + "private": false, + "sha": "f2746c69548d6ff92db6ec663400ad9a0dc51bbc", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "smallthinker-3b-preview-q2_k.gguf" + }, + { + "rfilename": "smallthinker-3b-preview-q3_k_l.gguf" + }, + { + "rfilename": "smallthinker-3b-preview-q3_k_m.gguf" + }, + { + "rfilename": "smallthinker-3b-preview-q3_k_s.gguf" + }, + { + "rfilename": "smallthinker-3b-preview-q4_k_m.gguf" + }, + { + "rfilename": "smallthinker-3b-preview-q4_k_s.gguf" + }, + { + "rfilename": "smallthinker-3b-preview-q5_k_m.gguf" + }, + { + "rfilename": "smallthinker-3b-preview-q5_k_s.gguf" + }, + { + "rfilename": "smallthinker-3b-preview-q6_k.gguf" + }, + { + "rfilename": "smallthinker-3b-preview-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 23981289568, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "small-thinker:3b", + "size": 2104931616 } ] }, @@ -96,20 +2326,31 @@ "_id": "66b06c37491b555fefe0a0bf", "author": "cortexso", "cardData": { - "license": "gemma" + "license": "gemma", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] }, "createdAt": "2024-08-05T06:07:51.000Z", - "description": "---\nlicense: gemma\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nThe [Gemma](https://huggingface.co/google/gemma-2-2b-it), state-of-the-art open model trained with the Gemma datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Gemma family with the 4B, 7B version in two variants 8K and 128K which is the context length (in tokens) that it can support.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Gemma2-2b](https://huggingface.co/cortexso/gemma2/tree/2b) | `cortex run gemma2:2b` |\n| 2 | [Gemma2-9b](https://huggingface.co/cortexso/gemma2/tree/9b) | `cortex run gemma2:9b` |\n| 3 | [Gemma2-27b](https://huggingface.co/cortexso/gemma2/tree/27b) | `cortex run gemma2:27b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/gemma2\n ```\n \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run gemma2\n ```\n \n## Credits\n\n- **Author:** Go\u200cogle\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://ai.google.dev/gemma/terms)\n- **Papers:** [Gemma Technical Report](https://arxiv.org/abs/2403.08295)", + "description": "---\nlicense: gemma\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nThe [Gemma](https://huggingface.co/google/gemma-2-2b-it), state-of-the-art open model trained with the Gemma datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Gemma family with the 4B, 7B version in two variants 8K and 128K which is the context length (in tokens) that it can support.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Gemma2-2b](https://huggingface.co/cortexso/gemma2/tree/2b) | `cortex run gemma2:2b` |\n| 2 | [Gemma2-9b](https://huggingface.co/cortexso/gemma2/tree/9b) | `cortex run gemma2:9b` |\n| 3 | [Gemma2-27b](https://huggingface.co/cortexso/gemma2/tree/27b) | `cortex run gemma2:27b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/gemma2\n ```\n \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run gemma2\n ```\n \n## Credits\n\n- **Author:** Go‌ogle\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://ai.google.dev/gemma/terms)\n- **Papers:** [Gemma Technical Report](https://arxiv.org/abs/2403.08295)", "disabled": false, - "downloads": 190, + "downloads": 796, "gated": false, + "gguf": { + "architecture": "gemma2", + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "context_length": 8192, + "eos_token": "", + "total": 27227128320 + }, "id": "cortexso/gemma2", - "lastModified": "2024-11-12T20:13:02.000Z", + "lastModified": "2025-03-03T06:25:38.000Z", "likes": 0, "model-index": null, "modelId": "cortexso/gemma2", + "pipeline_tag": "text-generation", "private": false, - "sha": "5fe1c79fabadcd2cb59cd05f76019d0a5fd71ce0", + "sha": "36fdfde32513f2a0be9e1b166952d4cee227aaf6", "siblings": [ { "rfilename": ".gitattributes" @@ -117,6 +2358,96 @@ { "rfilename": "README.md" }, + { + "rfilename": "gemma-2-27b-it-q2_k.gguf" + }, + { + "rfilename": "gemma-2-27b-it-q3_k_l.gguf" + }, + { + "rfilename": "gemma-2-27b-it-q3_k_m.gguf" + }, + { + "rfilename": "gemma-2-27b-it-q3_k_s.gguf" + }, + { + "rfilename": "gemma-2-27b-it-q4_k_m.gguf" + }, + { + "rfilename": "gemma-2-27b-it-q4_k_s.gguf" + }, + { + "rfilename": "gemma-2-27b-it-q5_k_m.gguf" + }, + { + "rfilename": "gemma-2-27b-it-q5_k_s.gguf" + }, + { + "rfilename": "gemma-2-27b-it-q6_k.gguf" + }, + { + "rfilename": "gemma-2-27b-it-q8_0.gguf" + }, + { + "rfilename": "gemma-2-2b-it-q2_k.gguf" + }, + { + "rfilename": "gemma-2-2b-it-q3_k_l.gguf" + }, + { + "rfilename": "gemma-2-2b-it-q3_k_m.gguf" + }, + { + "rfilename": "gemma-2-2b-it-q3_k_s.gguf" + }, + { + "rfilename": "gemma-2-2b-it-q4_k_m.gguf" + }, + { + "rfilename": "gemma-2-2b-it-q4_k_s.gguf" + }, + { + "rfilename": "gemma-2-2b-it-q5_k_m.gguf" + }, + { + "rfilename": "gemma-2-2b-it-q5_k_s.gguf" + }, + { + "rfilename": "gemma-2-2b-it-q6_k.gguf" + }, + { + "rfilename": "gemma-2-2b-it-q8_0.gguf" + }, + { + "rfilename": "gemma-2-9b-it-q2_k.gguf" + }, + { + "rfilename": "gemma-2-9b-it-q3_k_l.gguf" + }, + { + "rfilename": "gemma-2-9b-it-q3_k_m.gguf" + }, + { + "rfilename": "gemma-2-9b-it-q3_k_s.gguf" + }, + { + "rfilename": "gemma-2-9b-it-q4_k_m.gguf" + }, + { + "rfilename": "gemma-2-9b-it-q4_k_s.gguf" + }, + { + "rfilename": "gemma-2-9b-it-q5_k_m.gguf" + }, + { + "rfilename": "gemma-2-9b-it-q5_k_s.gguf" + }, + { + "rfilename": "gemma-2-9b-it-q6_k.gguf" + }, + { + "rfilename": "gemma-2-9b-it-q8_0.gguf" + }, { "rfilename": "metadata.yml" }, @@ -125,165 +2456,300 @@ } ], "spaces": [], - "tags": ["arxiv:2403.08295", "license:gemma", "region:us"], - "usedStorage": 265964141287 + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2403.08295", + "license:gemma", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 280987360512, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] }, "models": [ { - "id": "gemma2:2b-gguf-q6-k", - "size": 2151393024 - }, - { - "id": "gemma2:2b-gguf-q3-km", - "size": 1461667584 - }, - { - "id": "gemma2:gguf", - "size": 1708582496 - }, - { - "id": "gemma2:9b-gguf-q4-km", + "id": "gemma2:9b", "size": 5761057888 }, { - "id": "gemma2:9b-gguf-q3-ks", - "size": 4337665120 - }, - { - "id": "gemma2:2b-gguf-q4-ks", - "size": 1638651648 - }, - { - "id": "gemma2:9b-gguf-q5-ks", - "size": 6483592288 - }, - { - "id": "gemma2:9b-gguf-q3-km", - "size": 4761781344 - }, - { - "id": "gemma2:9b-gguf-q3-kl", - "size": 5132452960 - }, - { - "id": "gemma2:27b-gguf-q5-ks", - "size": 18884206240 - }, - { - "id": "gemma2:9b-gguf-q2-k", - "size": 3805398112 - }, - { - "id": "gemma2:27b-gguf-q3-km", - "size": 13424647840 - }, - { - "id": "gemma2:2b-gguf", - "size": 1708582496 - }, - { - "id": "gemma2:onnx", - "size": 1708582496 - }, - { - "id": "gemma2:27b-gguf-q4-km", + "id": "gemma2:27b", "size": 16645381792 }, { - "id": "gemma2:9b-gguf-q5-km", - "size": 6647366752 - }, - { - "id": "gemma2:27b-gguf-q2-k", - "size": 10449575584 - }, - { - "id": "gemma2:9b-gguf-q4-ks", - "size": 5478925408 - }, - { - "id": "gemma2:27b-gguf-q3-ks", - "size": 12169060000 - }, - { - "id": "gemma2:2b-gguf-q2-k", - "size": 1229829888 - }, - { - "id": "gemma2:2b-gguf-q4-km", + "id": "gemma2:2b", "size": 1708582656 - }, - { - "id": "gemma2:27b-gguf-q4-ks", - "size": 15739264672 - }, - { - "id": "gemma2:9b-gguf-q8-0", - "size": 9827148896 - }, - { - "id": "gemma2:27b-gguf-q8-0", - "size": 28937387680 - }, - { - "id": "gemma2:9b-gguf-q6-k", - "size": 7589069920 - }, - { - "id": "gemma2:2b-gguf-q8-0", - "size": 2784495360 - }, - { - "id": "gemma2:27b-gguf-q5-km", - "size": 19408117408 - }, - { - "id": "gemma2:2b-gguf-q3-kl", - "size": 1550436096 - }, - { - "id": "gemma2:27b-gguf-q6-k", - "size": 22343524000 - }, - { - "id": "gemma2:2b-gguf-q3-ks", - "size": 1360660224 - }, - { - "id": "gemma2:27b-gguf-q3-kl", - "size": 14519361184 - }, - { - "id": "gemma2:2b-gguf-q5-ks", - "size": 1882543872 - }, - { - "id": "gemma2:2b-gguf-q5-km", - "size": 1923278592 } ] }, { - "author": "CohereForAI", - "id": "cortexso/aya", + "author": "agentica-org", + "id": "cortexso/deepscaler", "metadata": { - "_id": "66790e21db26e8589ccd3816", + "_id": "67aaa7a5a6e6b3d852e347b2", "author": "cortexso", "cardData": { - "license": "apache-2.0" + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] }, - "createdAt": "2024-06-24T06:11:45.000Z", - "description": "---\nlicense: cc-by-sa-4.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nAya Expanse is an open-weight research release of a model with highly advanced multilingual capabilities. It focuses on pairing a highly performant pre-trained Command family of models with the result of a year\u2019s dedicated research from Cohere For AI, including data arbitrage, multilingual preference training, safety tuning, and model merging. The result is a powerful multilingual large language model serving 23 languages.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [main](https://huggingface.co/cortexso/aya-expanse/tree/main) | `cortex run aya-expanse` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```\n cortexso/aya-expanse\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```\n cortex run aya-expanse\n ```\n\n## Credits\n\n- **Author:** CohereAI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://cohere.com/c4ai-cc-by-nc-license)\n- **Papers:** [Aya Expanse Blog](https://cohere.com/blog/aya-expanse-connecting-our-world)", + "createdAt": "2025-02-11T01:28:05.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\nDeepscaler is an advanced AI model developed from the agentica-org's DeepScaleR-1.5B-Preview, designed to enhance the efficiency and scalability of various machine learning tasks. Its core purpose is to provide high-quality predictive analytics and data processing capabilities while optimizing resource usage. Deepscaler is particularly useful in scenarios such as natural language processing, computer vision, and more complex data interpretation tasks, making it suitable for applications in industries like finance, healthcare, and entertainment. Users can leverage its performance to achieve faster training times and improved accuracy in their models. Overall, Deepscaler's architecture allows it to deliver robust results with reduced computational overhead, making it an excellent choice for developers and organizations aiming to scale their AI solutions.\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepscaler-1.5b](https://huggingface.co/cortexso/deepscaler/tree/1.5b) | cortex run deepscaler:1.5b |\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/deepscaler\n ```\n \n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run deepscaler\n ```\n## Credits\n- **Author:** agentica-org\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [LICENSE](https://huggingface.co/agentica-org/DeepScaleR-1.5B-Preview/blob/main/LICENSE)", "disabled": false, - "downloads": 25, + "downloads": 404, "gated": false, - "id": "cortexso/aya", - "lastModified": "2024-11-12T20:24:22.000Z", + "gguf": { + "architecture": "qwen2", + "bos_token": "<|begin▁of▁sentence|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}", + "context_length": 131072, + "eos_token": "<|end▁of▁sentence|>", + "total": 1777088000 + }, + "id": "cortexso/deepscaler", + "lastModified": "2025-03-03T06:07:30.000Z", + "likes": 1, + "model-index": null, + "modelId": "cortexso/deepscaler", + "pipeline_tag": "text-generation", + "private": false, + "sha": "f2ac6bdbe311a9dbaf2bc4d77baa460b06b169e6", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "deepscaler-1.5b-preview-q2_k.gguf" + }, + { + "rfilename": "deepscaler-1.5b-preview-q3_k_l.gguf" + }, + { + "rfilename": "deepscaler-1.5b-preview-q3_k_m.gguf" + }, + { + "rfilename": "deepscaler-1.5b-preview-q3_k_s.gguf" + }, + { + "rfilename": "deepscaler-1.5b-preview-q4_k_m.gguf" + }, + { + "rfilename": "deepscaler-1.5b-preview-q4_k_s.gguf" + }, + { + "rfilename": "deepscaler-1.5b-preview-q5_k_m.gguf" + }, + { + "rfilename": "deepscaler-1.5b-preview-q5_k_s.gguf" + }, + { + "rfilename": "deepscaler-1.5b-preview-q6_k.gguf" + }, + { + "rfilename": "deepscaler-1.5b-preview-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 12728615584, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "deepscaler:1.5b", + "size": 1117321888 + } + ] + }, + { + "author": "Falcon LLM TII UAE", + "id": "cortexso/falcon3", + "metadata": { + "_id": "6761d4519d9bc9c3b6e25ad4", + "author": "cortexso", + "cardData": { + "license": "apache-2.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-12-17T19:43:13.000Z", + "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n\n## Overview\n\nFalcon3-10B-Instruct is part of the Falcon3 family of Open Foundation Models, offering state-of-the-art performance in reasoning, language understanding, instruction following, code, and mathematics. With 10 billion parameters, Falcon3-10B-Instruct is optimized for high-quality instruction-following tasks and supports multilingual capabilities in English, French, Spanish, and Portuguese. It provides a long context length of up to 32K tokens, making it suitable for extended document understanding and processing.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Falcon3-10b](https://huggingface.co/cortexso/falcon3/tree/10b) | `cortex run falcon3:10b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/falcon3\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run falcon3\n ```\n \n## Credits\n\n- **Author:** Falcon3 Team\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://falconllm.tii.ae/falcon-terms-and-conditions.html)\n- **Papers:** [Paper](https://arxiv.org/abs/2311.16867)", + "disabled": false, + "downloads": 276, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "<|endoftext|>", + "chat_template": "{%- if tools %}\n{{- '<|system|>\\n' }}\n{%- if messages[0]['role'] == 'system' %}\n{{- messages[0]['content'] }}\n{%- set remaining_messages = messages[1:] %}\n{%- else %}\n{%- set remaining_messages = messages %}\n{%- endif %}\n{{- 'You are a Falcon assistant skilled in function calling. You are helpful, respectful, and concise.\\n\\n# Tools\\n\\nYou have access to the following functions. You MUST use them to answer questions when needed. For each function call, you MUST return a JSON object inside tags.\\n\\n' + tools|tojson(indent=2) + '\\n\\n# Output Format\\n\\nYour response MUST follow this format when making function calls:\\n\\n[\\n {\"name\": \"function_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}},\\n {\"name\": \"another_function\", \"arguments\": {\"arg\": \"value\"}}\\n]\\n\\nIf no function calls are needed, respond normally without the tool_call tags.\\n' }}\n{%- for message in remaining_messages %}\n{%- if message['role'] == 'user' %}\n{{- '<|user|>\\n' + message['content'] + '\\n' }}\n{%- elif message['role'] == 'assistant' %}\n{%- if message.content %}\n{{- '<|assistant|>\\n' + message['content'] }}\n{%- endif %}\n{%- if message.tool_calls %}\n{{- '\\n\\n' }}\n{{- message.tool_calls|tojson(indent=2) }}\n{{- '\\n' }}\n{%- endif %}\n{{- eos_token + '\\n' }}\n{%- elif message['role'] == 'tool' %}\n{{- '<|assistant|>\\n\\n' + message['content'] + '\\n\\n' }}\n{%- endif %}\n{%- endfor %}\n{{- '<|assistant|>\\n' if add_generation_prompt }}\n{%- else %}\n{%- for message in messages %}\n{%- if message['role'] == 'system' %}\n{{- '<|system|>\\n' + message['content'] + '\\n' }}\n{%- elif message['role'] == 'user' %}\n{{- '<|user|>\\n' + message['content'] + '\\n' }}\n{%- elif message['role'] == 'assistant' %}\n{%- if not loop.last %}\n{{- '<|assistant|>\\n' + message['content'] + eos_token + '\\n' }}\n{%- else %}\n{{- '<|assistant|>\\n' + message['content'] + eos_token }}\n{%- endif %}\n{%- endif %}\n{%- if loop.last and add_generation_prompt %}\n{{- '<|assistant|>\\n' }}\n{%- endif %}\n{%- endfor %}\n{%- endif %}", + "context_length": 32768, + "eos_token": "<|endoftext|>", + "total": 10305653760 + }, + "id": "cortexso/falcon3", + "lastModified": "2025-03-03T03:54:15.000Z", + "likes": 1, + "model-index": null, + "modelId": "cortexso/falcon3", + "pipeline_tag": "text-generation", + "private": false, + "sha": "60030375504feacf3ba4205e8b9809e3dffc2ef7", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "falcon3-10b-instruct-q2_k.gguf" + }, + { + "rfilename": "falcon3-10b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "falcon3-10b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "falcon3-10b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "falcon3-10b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "falcon3-10b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "falcon3-10b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "falcon3-10b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "falcon3-10b-instruct-q6_k.gguf" + }, + { + "rfilename": "falcon3-10b-instruct-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2311.16867", + "license:apache-2.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 65157537088, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "falcon3:10b", + "size": 6287521312 + } + ] + }, + { + "author": "Qwen", + "id": "cortexso/qwen2", + "metadata": { + "_id": "667917d974da9f6bfc120671", + "author": "cortexso", + "cardData": { + "license": "other", + "license_link": "https://huggingface.co/Qwen/Qwen2-72B-Instruct/blob/main/LICENSE", + "license_name": "tongyi-qianwen", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-06-24T06:53:13.000Z", + "description": "---\nlicense: other\nlicense_name: tongyi-qianwen\nlicense_link: https://huggingface.co/Qwen/Qwen2-72B-Instruct/blob/main/LICENSE\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nQwen2 is the new series of Qwen large language models. For Qwen2, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters, including a Mixture-of-Experts model. This repo contains the instruction-tuned 72B Qwen2 model.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Qwen2-7b](https://huggingface.co/cortexso/qwen2/tree/7b) | `cortex run qwen2:7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/qwen2\n ```\n \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run qwen2\n ```\n \n## Credits\n\n- **Author:** Qwen\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/Qwen/Qwen2-72B-Instruct/blob/main/LICENSE)", + "disabled": false, + "downloads": 130, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "context_length": 32768, + "eos_token": "<|im_end|>", + "total": 7615616512 + }, + "id": "cortexso/qwen2", + "lastModified": "2025-03-02T15:15:09.000Z", "likes": 0, "model-index": null, - "modelId": "cortexso/aya", + "modelId": "cortexso/qwen2", + "pipeline_tag": "text-generation", "private": false, - "sha": "cae2291fec1dc73739fb8189f9165d23ebe398b8", + "sha": "e2c6376ad87c7b2da92bc2a2b63ba168d85b1c6d", "siblings": [ { "rfilename": ".gitattributes" @@ -296,20 +2762,987 @@ }, { "rfilename": "model.yml" + }, + { + "rfilename": "qwen2-7b-instruct-q2_k.gguf" + }, + { + "rfilename": "qwen2-7b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "qwen2-7b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "qwen2-7b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "qwen2-7b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "qwen2-7b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "qwen2-7b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "qwen2-7b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "qwen2-7b-instruct-q6_k.gguf" + }, + { + "rfilename": "qwen2-7b-instruct-q8_0.gguf" } ], "spaces": [], - "tags": ["license:apache-2.0", "region:us"], - "usedStorage": 21527051168 + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:other", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 53341783520, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] }, "models": [ { - "id": "aya:12.9b-gguf", - "size": 21527051168 + "id": "qwen2:7b", + "size": 4683071456 + } + ] + }, + { + "author": "Nous Research", + "id": "cortexso/hermes3", + "metadata": { + "_id": "675a4743cb0f75e1a3a19ae5", + "author": "cortexso", + "cardData": { + "license": "llama3", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-12-12T02:15:31.000Z", + "description": "---\nlicense: llama3\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**Nous Research** developed and released the [Hermes 3](https://huggingface.co/NousResearch/Hermes-3-Llama-3.2-3B), a state-of-the-art instruction-tuned language model built on Llama-3.2-3B. This 3-billion parameter model is a fine-tuned version of Llama-3.2 and represents a leap forward in reasoning, multi-turn conversation, and structured outputs. It incorporates advanced role-playing capabilities, reliable function calling, and improved coherence over long contexts, making it a versatile assistant for various applications.\n\nHermes 3 was trained with high-quality data, leveraging fine-tuning techniques on H100 GPUs via LambdaLabs GPU Cloud. The model excels in both general-purpose and specialized tasks, including code generation, reasoning, and advanced conversational abilities. With support for ChatML prompt formatting, Hermes 3 ensures compatibility with OpenAI endpoints and facilitates structured, steerable interactions for end-users.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Hermes3-3b](https://huggingface.co/cortexso/hermes3/tree/main) | `cortex run hermes3:3b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/hermes3\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run hermes3\n ```\n\n## Credits\n\n- **Author:** Nous Research\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/meta-llama/Meta-Llama-3-8B/blob/main/LICENSE)\n- **Papers:** [Hermes 3 Technical Report](https://arxiv.org/pdf/2408.11857)", + "disabled": false, + "downloads": 421, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "context_length": 131072, + "eos_token": "<|im_end|>", + "total": 3212749888 + }, + "id": "cortexso/hermes3", + "lastModified": "2025-03-03T02:36:41.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/hermes3", + "pipeline_tag": "text-generation", + "private": false, + "sha": "b987bf2aa863d1c3590e242aaf5b81a5dc3ea8f3", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "hermes-3-llama-3.2-3b-q2_k.gguf" + }, + { + "rfilename": "hermes-3-llama-3.2-3b-q3_k_l.gguf" + }, + { + "rfilename": "hermes-3-llama-3.2-3b-q3_k_m.gguf" + }, + { + "rfilename": "hermes-3-llama-3.2-3b-q3_k_s.gguf" + }, + { + "rfilename": "hermes-3-llama-3.2-3b-q4_k_m.gguf" + }, + { + "rfilename": "hermes-3-llama-3.2-3b-q4_k_s.gguf" + }, + { + "rfilename": "hermes-3-llama-3.2-3b-q5_k_m.gguf" + }, + { + "rfilename": "hermes-3-llama-3.2-3b-q5_k_s.gguf" + }, + { + "rfilename": "hermes-3-llama-3.2-3b-q6_k.gguf" + }, + { + "rfilename": "hermes-3-llama-3.2-3b-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2408.11857", + "license:llama3", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 23033625536, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "hermes3:3b", + "size": 2019373888 + } + ] + }, + { + "author": "Qwen", + "id": "cortexso/qwen2.5-coder", + "metadata": { + "_id": "6732691d254c0b2144f11764", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-11-11T20:29:17.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**Qwen Labs** developed and released the [Qwen2.5-Coder](https://huggingface.co/Qwen) model, a state-of-the-art language model tailored for code generation, understanding, and completion tasks. Featuring a 2.5B parameter dense Transformer architecture, Qwen2.5-Coder is designed to assist developers and researchers by generating high-quality code snippets, providing algorithm explanations, and completing coding prompts with accuracy. The model was trained on a diverse blend of programming languages and frameworks using carefully filtered code datasets to ensure precision and relevance. It leverages advanced fine-tuning techniques and rigorous safety measures to optimize instruction adherence and deliver reliable, contextually aware outputs. Released in November 2024, Qwen2.5-Coder offers an effective tool for software development, academic research, and programming education.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Qwen2.5-coder-14b](https://huggingface.co/cortexso/qwen2.5-coder/tree/14b) | `cortex run qwen2.5-coder:14b` |\n| 1 | [Qwen2.5-coder-32b](https://huggingface.co/cortexso/qwen2.5-coder/tree/32b) | `cortex run qwen2.5-coder:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/qwen2.5-coder\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run qwen2.5-coder\n ```\n\n## Credits\n\n- **Author:** Qwen Labs\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct/blob/main/LICENSE)\n- **Papers:** [Qwen2.5-Coder Technical Report](https://arxiv.org/abs/2409.12186)", + "disabled": false, + "downloads": 1369, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|endoftext|>", + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "context_length": 32768, + "eos_token": "<|im_end|>", + "total": 14770033664 + }, + "id": "cortexso/qwen2.5-coder", + "lastModified": "2025-03-03T04:26:33.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/qwen2.5-coder", + "pipeline_tag": "text-generation", + "private": false, + "sha": "b472c129cc68732d81e50ce48e621fe1861e8d1c", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "qwen2.5-coder-14b-instruct-q2_k.gguf" + }, + { + "rfilename": "qwen2.5-coder-14b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "qwen2.5-coder-14b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "qwen2.5-coder-14b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "qwen2.5-coder-14b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "qwen2.5-coder-14b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "qwen2.5-coder-14b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "qwen2.5-coder-14b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "qwen2.5-coder-14b-instruct-q6_k.gguf" + }, + { + "rfilename": "qwen2.5-coder-14b-instruct-q8_0.gguf" + }, + { + "rfilename": "qwen2.5-coder-32b-instruct-q2_k.gguf" + }, + { + "rfilename": "qwen2.5-coder-32b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "qwen2.5-coder-32b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "qwen2.5-coder-32b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "qwen2.5-coder-32b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "qwen2.5-coder-32b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "qwen2.5-coder-32b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "qwen2.5-coder-32b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "qwen2.5-coder-32b-instruct-q6_k.gguf" + }, + { + "rfilename": "qwen2.5-coder-32b-instruct-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2409.12186", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 328827521152, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "qwen2.5-coder:32b", + "size": 19851336256 }, { - "id": "aya:gguf", - "size": 21527051168 + "id": "qwen2.5-coder:14b", + "size": 8988110656 + } + ] + }, + { + "author": "Microsoft", + "id": "cortexso/phi-3.5", + "metadata": { + "_id": "67211d1b527f6fcd90b9dca3", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-10-29T17:36:27.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n**Microsoft** developed and released the [Phi-3.5](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) model, a state-of-the-art large language model built upon the Phi-3 architecture. With its focus on high-quality, reasoning-dense data, this model represents a significant advancement in instruction-tuned language models. Phi-3.5 has been fine-tuned through supervised learning, proximal policy optimization (PPO), and direct preference optimization (DPO) to ensure precise instruction following and robust safety measures. Supporting a 128K token context length, the model demonstrates exceptional performance in tasks requiring extended context understanding and complex reasoning. The model's training data consists of synthetic datasets and carefully filtered publicly available web content, inheriting the high-quality foundation established in the Phi-3 series.\n\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Phi-3.5-3b](https://huggingface.co/cortexso/phi-3.5/tree/3b) | `cortex run phi-3.5:3b` |\n\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/phi-3.5\n ```\n\n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run phi-3.5\n ```\n\n## Credits\n- **Author:** Microsoft\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/mit/)\n- **Papers:** [Phi-3.5 Paper](https://arxiv.org/abs/2404.14219)", + "disabled": false, + "downloads": 299, + "gated": false, + "gguf": { + "architecture": "phi3", + "bos_token": "", + "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}", + "context_length": 131072, + "eos_token": "<|endoftext|>", + "total": 3821079648 + }, + "id": "cortexso/phi-3.5", + "lastModified": "2025-03-03T05:42:47.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/phi-3.5", + "pipeline_tag": "text-generation", + "private": false, + "sha": "7fd139ae9bdff00feae40ad3e4d7ce6dc0c48a91", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "phi-3.5-mini-instruct-q2_k.gguf" + }, + { + "rfilename": "phi-3.5-mini-instruct-q3_k_l.gguf" + }, + { + "rfilename": "phi-3.5-mini-instruct-q3_k_m.gguf" + }, + { + "rfilename": "phi-3.5-mini-instruct-q3_k_s.gguf" + }, + { + "rfilename": "phi-3.5-mini-instruct-q4_k_m.gguf" + }, + { + "rfilename": "phi-3.5-mini-instruct-q4_k_s.gguf" + }, + { + "rfilename": "phi-3.5-mini-instruct-q5_k_m.gguf" + }, + { + "rfilename": "phi-3.5-mini-instruct-q5_k_s.gguf" + }, + { + "rfilename": "phi-3.5-mini-instruct-q6_k.gguf" + }, + { + "rfilename": "phi-3.5-mini-instruct-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2404.14219", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 26770128384, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "phi-3.5:3b", + "size": 2393232384 + } + ] + }, + { + "author": "meta-llama", + "id": "cortexso/llama3.3", + "metadata": { + "_id": "67568c9b6ac1ee73523d7623", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-12-09T06:22:19.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**Meta** developed and released the [Llama3.3](https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct) model, a state-of-the-art multilingual large language model designed for instruction-tuned generative tasks. With 70 billion parameters, this model is optimized for multilingual dialogue use cases, providing high-quality text input and output. Llama3.3 has been fine-tuned through supervised learning and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. It sets a new standard in performance, outperforming many open-source and closed-source chat models on common industry benchmarks. The model’s capabilities make it a powerful tool for applications requiring conversational AI, multilingual support, and instruction adherence.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Llama3.3-70b](https://huggingface.co/cortexso/llama3.3/tree/70b) | `cortex run llama3.3:70b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/llama3.3\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run llama3.3\n ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://llama.meta.com/llama3/license/)\n- **Papers:** [Llama-3 Blog](https://llama.meta.com/llama3/)", + "disabled": false, + "downloads": 964, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "context_length": 131072, + "eos_token": "<|eot_id|>", + "total": 70553706560 + }, + "id": "cortexso/llama3.3", + "lastModified": "2025-03-03T03:59:38.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/llama3.3", + "pipeline_tag": "text-generation", + "private": false, + "sha": "9cc0507ae02f03cf59c630c1ffa5d369441e27eb", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "llama-3.3-70b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 42520398432, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "llama3.3:70b", + "size": 42520398432 + } + ] + }, + { + "author": "inftech.ai", + "id": "cortexso/opencoder", + "metadata": { + "_id": "672fb2f43db04d9bf3f4c393", + "author": "cortexso", + "cardData": { + "license": "other", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-11-09T19:07:32.000Z", + "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nOpenCoder is an open and reproducible code LLM family, featuring 1.5B and 8B base and chat models that support both English and Chinese languages. Built from scratch, OpenCoder is pretrained on 2.5 trillion tokens, composed of 90% raw code and 10% code-related web data. It undergoes supervised fine-tuning (SFT) with over 4.5 million high-quality examples, achieving performance on par with top-tier code LLMs\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Opencoder-8b](https://huggingface.co/cortexso/opencoder/tree/8b) | `cortex run opencoder:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/opencoder\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run opencoder\n ```\n \n## Credits\n\n- **Author:** inftech.ai\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/infly/OpenCoder-8B-Instruct/blob/main/LICENSE)\n- **Papers:** [Paper](https://arxiv.org/abs/2411.04905)", + "disabled": false, + "downloads": 650, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "<|im_start|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are OpenCoder, created by OpenCoder Team.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "context_length": 8192, + "eos_token": "<|im_end|>", + "total": 7771262976 + }, + "id": "cortexso/opencoder", + "lastModified": "2025-03-03T02:25:59.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/opencoder", + "pipeline_tag": "text-generation", + "private": false, + "sha": "2b98756c8b01811470941deb8a0259de3dd4018c", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "opencoder-8b-instruct-q2_k.gguf" + }, + { + "rfilename": "opencoder-8b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "opencoder-8b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "opencoder-8b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "opencoder-8b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "opencoder-8b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "opencoder-8b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "opencoder-8b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "opencoder-8b-instruct-q6_k.gguf" + }, + { + "rfilename": "opencoder-8b-instruct-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2411.04905", + "license:other", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 54076349664, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "opencoder:8b", + "size": 4736059168 + } + ] + }, + { + "author": "Google", + "id": "cortexso/gemma", + "metadata": { + "_id": "6667b642f760460127737cc6", + "author": "cortexso", + "cardData": { + "license": "gemma", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-06-11T02:28:18.000Z", + "description": "---\nlicense: gemma\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nThe [Gemma](https://huggingface.co/google/gemma-7b), state-of-the-art open model trained with the Gemma datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Gemma family with the 4B, 7B version in two variants 8K and 128K which is the context length (in tokens) that it can support.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Gemma-7b](https://huggingface.co/cortexso/gemma/tree/7b) | `cortex run gemma:7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/gemma\n ```\n \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run gemma\n ```\n \n## Credits\n\n- **Author:** Go‌ogle\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://ai.google.dev/gemma/terms)\n- **Papers:** [Gemma Technical Report](https://arxiv.org/abs/2403.08295)", + "disabled": false, + "downloads": 280, + "gated": false, + "gguf": { + "architecture": "gemma", + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "context_length": 8192, + "eos_token": "", + "total": 8537680896 + }, + "id": "cortexso/gemma", + "lastModified": "2025-03-03T06:14:39.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/gemma", + "pipeline_tag": "text-generation", + "private": false, + "sha": "801b78a606397281d5953e5e8f2a64b6158e2db2", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "gemma-7b-it-q2_k.gguf" + }, + { + "rfilename": "gemma-7b-it-q3_k_l.gguf" + }, + { + "rfilename": "gemma-7b-it-q3_k_m.gguf" + }, + { + "rfilename": "gemma-7b-it-q3_k_s.gguf" + }, + { + "rfilename": "gemma-7b-it-q4_k_m.gguf" + }, + { + "rfilename": "gemma-7b-it-q4_k_s.gguf" + }, + { + "rfilename": "gemma-7b-it-q5_k_m.gguf" + }, + { + "rfilename": "gemma-7b-it-q5_k_s.gguf" + }, + { + "rfilename": "gemma-7b-it-q6_k.gguf" + }, + { + "rfilename": "gemma-7b-it-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2403.08295", + "license:gemma", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 60258935328, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "gemma:7b", + "size": 5329759680 + } + ] + }, + { + "author": "MistralAI", + "id": "cortexso/mistral-nemo", + "metadata": { + "_id": "66f4e292515759ca6d5287bd", + "author": "cortexso", + "cardData": { + "license": "apache-2.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-09-26T04:26:58.000Z", + "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nMistralai developed and released the [Mistral-Nemo](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407) family of large language models (LLMs).\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Mistral-nemo-12b](https://huggingface.co/cortexso/mistral-nemo/tree/12b) | `cortex run mistral-nemo:12b` ||\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/mistral-nemo\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run mistral-nemo\n ```\n\n## Credits\n\n- **Author:** MistralAI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Apache 2 License](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Mistral Nemo Blog](https://mistral.ai/news/mistral-nemo/)", + "disabled": false, + "downloads": 546, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "", + "chat_template": "{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif %}\n {%- set ns.index = ns.index + 1 %}\n {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if message[\"role\"] == \"user\" %}\n {%- if tools is not none and (message == user_messages[-1]) %}\n {{- \"[AVAILABLE_TOOLS][\" }}\n {%- for tool in tools %}\n {%- set tool = tool.function %}\n {{- '{\"type\": \"function\", \"function\": {' }}\n {%- for key, val in tool.items() if key != \"return\" %}\n {%- if val is string %}\n {{- '\"' + key + '\": \"' + val + '\"' }}\n {%- else %}\n {{- '\"' + key + '\": ' + val|tojson }}\n {%- endif %}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"}}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %}\n {{- \"[/AVAILABLE_TOOLS]\" }}\n {%- endif %}\n {%- if loop.last and system_message is defined %}\n {{- \"[INST]\" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n {%- else %}\n {{- \"[INST]\" + message[\"content\"] + \"[/INST]\" }}\n {%- endif %}\n {%- elif (message.tool_calls is defined and message.tool_calls is not none) %}\n {{- \"[TOOL_CALLS][\" }}\n {%- for tool_call in message.tool_calls %}\n {%- set out = tool_call.function|tojson %}\n {{- out[:-1] }}\n {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" + eos_token }}\n {%- endif %}\n {%- endfor %}\n {%- elif message[\"role\"] == \"assistant\" %}\n {{- message[\"content\"] + eos_token}}\n {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {%- if message.content is defined and message.content.content is defined %}\n {%- set content = message.content.content %}\n {%- else %}\n {%- set content = message.content %}\n {%- endif %}\n {{- '[TOOL_RESULTS]{\"content\": ' + content|string + \", \" }}\n {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n {%- else %}\n {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n {%- endif %}\n{%- endfor %}\n", + "context_length": 131072, + "eos_token": "", + "total": 12247782400 + }, + "id": "cortexso/mistral-nemo", + "lastModified": "2025-03-03T02:42:16.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/mistral-nemo", + "pipeline_tag": "text-generation", + "private": false, + "sha": "487a202e44ea08566ab73ed16b5f7f685d12cf6b", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "mistral-nemo-instruct-2407-q2_k.gguf" + }, + { + "rfilename": "mistral-nemo-instruct-2407-q3_k_l.gguf" + }, + { + "rfilename": "mistral-nemo-instruct-2407-q3_k_m.gguf" + }, + { + "rfilename": "mistral-nemo-instruct-2407-q3_k_s.gguf" + }, + { + "rfilename": "mistral-nemo-instruct-2407-q4_k_m.gguf" + }, + { + "rfilename": "mistral-nemo-instruct-2407-q4_k_s.gguf" + }, + { + "rfilename": "mistral-nemo-instruct-2407-q5_k_m.gguf" + }, + { + "rfilename": "mistral-nemo-instruct-2407-q5_k_s.gguf" + }, + { + "rfilename": "mistral-nemo-instruct-2407-q6_k.gguf" + }, + { + "rfilename": "mistral-nemo-instruct-2407-q8_0.gguf" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:apache-2.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 85369454144, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "mistral-nemo:12b", + "size": 7477207744 + } + ] + }, + { + "author": "meta-llama", + "id": "cortexso/llama3.2", + "metadata": { + "_id": "66f63309ba963b1db95deaa4", + "author": "cortexso", + "cardData": { + "license": "llama3.2", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp", "featured"] + }, + "createdAt": "2024-09-27T04:22:33.000Z", + "description": "---\nlicense: llama3.2\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\nMeta developed and released the [Meta Llama 3.2](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [LLama3.2-1b](https://huggingface.co/cortexso/llama3.2/tree/1b) | `cortex run llama3.2:1b` |\n| 2 | [LLama3.2-3b](https://huggingface.co/cortexso/llama3.2/tree/3b) | `cortex run llama3.2:3b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/llama3.2\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run llama3.2\n ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct/blob/main/LICENSE.txt)\n- **Papers:** [Llama-3.2 Blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)", + "disabled": false, + "downloads": 11227, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "context_length": 131072, + "eos_token": "<|eot_id|>", + "total": 1235814432 + }, + "id": "cortexso/llama3.2", + "lastModified": "2025-03-03T06:22:08.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/llama3.2", + "pipeline_tag": "text-generation", + "private": false, + "sha": "73313225fbeff0cebf5ccf48121cba6ca1a80e7d", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "llama-3.2-1b-instruct-q2_k.gguf" + }, + { + "rfilename": "llama-3.2-1b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "llama-3.2-1b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "llama-3.2-1b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "llama-3.2-1b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "llama-3.2-1b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "llama-3.2-1b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "llama-3.2-1b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "llama-3.2-1b-instruct-q6_k.gguf" + }, + { + "rfilename": "llama-3.2-1b-instruct-q8_0.gguf" + }, + { + "rfilename": "llama-3.2-3b-instruct-q2_k.gguf" + }, + { + "rfilename": "llama-3.2-3b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "llama-3.2-3b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "llama-3.2-3b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "llama-3.2-3b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "llama-3.2-3b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "llama-3.2-3b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "llama-3.2-3b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "llama-3.2-3b-instruct-q6_k.gguf" + }, + { + "rfilename": "llama-3.2-3b-instruct-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "featured", + "text-generation", + "license:llama3.2", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 31409886432, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "llama3.2:3b", + "size": 2019377312 + }, + { + "id": "llama3.2:1b", + "size": 911503104 } ] }, @@ -327,7 +3760,7 @@ "createdAt": "2024-10-26T15:40:05.000Z", "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nQwen2.5 by Qwen is a family of model include various specialized models for coding and mathematics available in multiple sizes from 0.5B to 72B parameters\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Qwen-2.5-0.5b](https://huggingface.co/cortexso/qwen2.5/tree/0.5b) | `cortex run qwen2.5:0.5b` |\n| 2 | [Qwen-2.5-1.5b](https://huggingface.co/cortexso/qwen2.5/tree/1.5b) | `cortex run qwen2.5:1.5b` |\n| 3 | [Qwen-2.5-3b](https://huggingface.co/cortexso/qwen2.5/tree/3b) | `cortex run qwen2.5:3b` |\n| 4 | [Qwen-2.5-7b](https://huggingface.co/cortexso/qwen2.5/tree/7b) | `cortex run qwen2.5:7b` |\n| 5 | [Qwen-2.5-14b](https://huggingface.co/cortexso/qwen2.5/tree/14b) | `cortex run qwen2.5:14b` |\n| 6 | [Qwen-2.5-32b](https://huggingface.co/cortexso/qwen2.5/tree/32b) | `cortex run qwen2.5:32b` |\n| 7 | [Qwen-2.5-72b](https://huggingface.co/cortexso/qwen2.5/tree/72b) | `cortex run qwen2.5:72b` |\n| 8 | [Qwen-2.5-coder-1.5b](https://huggingface.co/cortexso/qwen2.5/tree/coder-1.5b) | `cortex run qwen2.5:coder-1.5b` |\n| 9 | [Qwen-2.5-coder-7b](https://huggingface.co/cortexso/qwen2.5/tree/coder-7b) | `cortex run qwen2.5:coder-7b` |\n| 10 | [Qwen-2.5-math-1.5b](https://huggingface.co/cortexso/qwen2.5/tree/math-1.5b) | `cortex run qwen2.5:math-1.5b` |\n| 11 | [Qwen-2.5-math-7b](https://huggingface.co/cortexso/qwen2.5/tree/math-7b) | `cortex run qwen2.5:math-7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```\n cortexso/qwen2.5\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```\n cortex run qwen2.5\n ```\n\n## Credits\n\n- **Author:** Qwen\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License Apache 2.0](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Qwen2.5 Blog](https://qwenlm.github.io/blog/qwen2.5/)", "disabled": false, - "downloads": 2482, + "downloads": 3608, "gated": false, "gguf": { "architecture": "qwen2", @@ -338,13 +3771,13 @@ "total": 494032768 }, "id": "cortexso/qwen2.5", - "lastModified": "2025-02-25T07:36:34.000Z", - "likes": 0, + "lastModified": "2025-03-03T04:07:15.000Z", + "likes": 1, "model-index": null, "modelId": "cortexso/qwen2.5", "pipeline_tag": "text-generation", "private": false, - "sha": "7b8b2c31e393f5cf085fe6e535fa5d6ee1cb1c5c", + "sha": "d801e60d205491ab449425f3779b13bedbbe463d", "siblings": [ { "rfilename": ".gitattributes" @@ -672,19 +4105,19 @@ "region:us", "conversational" ], - "usedStorage": 1466939625856, + "usedStorage": 596251612960, "widgetData": [ { "text": "Hi, what can you help me with?" }, { - "text": "Hey, let's have a conversation!" + "text": "What is 84 * 3 / 2?" }, { - "text": "Hello there!" + "text": "Tell me an interesting fact about the universe!" }, { - "text": "Hey my name is Clara! How are you?" + "text": "Explain quantum computing in simple terms." } ] }, @@ -698,13 +4131,21 @@ "size": 986048416 }, { - "id": "qwen2.5:7b", - "size": 4683073856 + "id": "qwen2.5:3b", + "size": 1929902912 + }, + { + "id": "qwen2.5:14b", + "size": 8988110592 }, { "id": "qwen2.5:0.5b", "size": 397807808 }, + { + "id": "qwen2.5:72b", + "size": 47415715104 + }, { "id": "qwen2.5:coder-1.5b", "size": 986048480 @@ -713,64 +4154,52 @@ "id": "qwen2.5:32b", "size": 19851336192 }, - { - "id": "qwen2.5:3b", - "size": 1929902912 - }, - { - "id": "qwen2.5:14b", - "size": 8988110592 - }, { "id": "qwen2.5:math-7b", "size": 4683073856 }, { - "id": "qwen2.5:72b", - "size": 47415715104 + "id": "qwen2.5:7b", + "size": 4683073856 }, { "id": "qwen2.5:coder-7b", "size": 4683073920 - }, - { - "id": "qwen2.5:main", - "size": 8098525504 } ] }, { - "author": "meta-llama", - "id": "cortexso/llama3.2", + "author": "MistralAI", + "id": "cortexso/codestral", "metadata": { - "_id": "66f63309ba963b1db95deaa4", + "_id": "66724fb044ee478111905260", "author": "cortexso", "cardData": { - "license": "llama3.2", + "license": "other", "pipeline_tag": "text-generation", - "tags": ["cortex.cpp", "featured"] + "tags": ["cortex.cpp"] }, - "createdAt": "2024-09-27T04:22:33.000Z", - "description": "---\nlicense: llama3.2\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\nMeta developed and released the [Meta Llama 3.2](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 2 | [gguf](https://huggingface.co/cortexso/llama3.2/tree/main) | `cortex run llama3.2` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/llama3.2\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run llama3.2\n ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct/blob/main/LICENSE.txt)\n- **Papers:** [Llama-3.2 Blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)", + "createdAt": "2024-06-19T03:25:36.000Z", + "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nCodestral-22B-v0.1 is trained on a diverse dataset of 80+ programming languages, including the most popular ones, such as Python, Java, C, C++, JavaScript, and Bash\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Codestral-22b](https://huggingface.co/cortexso/codestral/tree/22b) | `cortex run codestral:22b` |\n\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/codestral\n ```\n \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run codestral\n ```\n \n## Credits\n\n- **Author:** Mistral AI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://mistral.ai/licenses/MNPL-0.1.md)\n- **Papers:** [Codestral Blog](https://mistral.ai/news/codestral/)", "disabled": false, - "downloads": 761, + "downloads": 517, "gated": false, "gguf": { "architecture": "llama", - "bos_token": "<|begin_of_text|>", - "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", - "context_length": 131072, - "eos_token": "<|eot_id|>", - "total": 1235814432 + "bos_token": "", + "chat_template": "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.last and system_message is defined %}\n {{- '[INST] ' + system_message + '\\n\\n' + message['content'] + '[/INST]' }}\n {%- else %}\n {{- '[INST] ' + message['content'] + '[/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + eos_token}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n", + "context_length": 32768, + "eos_token": "", + "total": 22247282688 }, - "id": "cortexso/llama3.2", - "lastModified": "2025-02-24T10:25:50.000Z", + "id": "cortexso/codestral", + "lastModified": "2025-03-02T15:11:11.000Z", "likes": 0, "model-index": null, - "modelId": "cortexso/llama3.2", + "modelId": "cortexso/codestral", "pipeline_tag": "text-generation", "private": false, - "sha": "5aabb7db00af6183d866ff69260db98b55760359", + "sha": "6b522a6f0ce9c94a2f317c3802180aca4f526a30", "siblings": [ { "rfilename": ".gitattributes" @@ -779,64 +4208,34 @@ "rfilename": "README.md" }, { - "rfilename": "llama-3.2-1b-instruct-q2_k.gguf" + "rfilename": "codestral-22b-v0.1-q2_k.gguf" }, { - "rfilename": "llama-3.2-1b-instruct-q3_k_l.gguf" + "rfilename": "codestral-22b-v0.1-q3_k_l.gguf" }, { - "rfilename": "llama-3.2-1b-instruct-q3_k_m.gguf" + "rfilename": "codestral-22b-v0.1-q3_k_m.gguf" }, { - "rfilename": "llama-3.2-1b-instruct-q3_k_s.gguf" + "rfilename": "codestral-22b-v0.1-q3_k_s.gguf" }, { - "rfilename": "llama-3.2-1b-instruct-q4_k_m.gguf" + "rfilename": "codestral-22b-v0.1-q4_k_m.gguf" }, { - "rfilename": "llama-3.2-1b-instruct-q4_k_s.gguf" + "rfilename": "codestral-22b-v0.1-q4_k_s.gguf" }, { - "rfilename": "llama-3.2-1b-instruct-q5_k_m.gguf" + "rfilename": "codestral-22b-v0.1-q5_k_m.gguf" }, { - "rfilename": "llama-3.2-1b-instruct-q5_k_s.gguf" + "rfilename": "codestral-22b-v0.1-q5_k_s.gguf" }, { - "rfilename": "llama-3.2-1b-instruct-q6_k.gguf" + "rfilename": "codestral-22b-v0.1-q6_k.gguf" }, { - "rfilename": "llama-3.2-1b-instruct-q8_0.gguf" - }, - { - "rfilename": "llama-3.2-3b-instruct-q2_k.gguf" - }, - { - "rfilename": "llama-3.2-3b-instruct-q3_k_l.gguf" - }, - { - "rfilename": "llama-3.2-3b-instruct-q3_k_m.gguf" - }, - { - "rfilename": "llama-3.2-3b-instruct-q3_k_s.gguf" - }, - { - "rfilename": "llama-3.2-3b-instruct-q4_k_m.gguf" - }, - { - "rfilename": "llama-3.2-3b-instruct-q4_k_s.gguf" - }, - { - "rfilename": "llama-3.2-3b-instruct-q5_k_m.gguf" - }, - { - "rfilename": "llama-3.2-3b-instruct-q5_k_s.gguf" - }, - { - "rfilename": "llama-3.2-3b-instruct-q6_k.gguf" - }, - { - "rfilename": "llama-3.2-3b-instruct-q8_0.gguf" + "rfilename": "codestral-22b-v0.1-q8_0.gguf" }, { "rfilename": "metadata.yml" @@ -849,76 +4248,67 @@ "tags": [ "gguf", "cortex.cpp", - "featured", "text-generation", - "license:llama3.2", + "license:other", "endpoints_compatible", "region:us", "conversational" ], - "usedStorage": 50404795008, + "usedStorage": 166025350400, "widgetData": [ { "text": "Hi, what can you help me with?" }, { - "text": "Hey, let's have a conversation!" + "text": "What is 84 * 3 / 2?" }, { - "text": "Hello there!" + "text": "Tell me an interesting fact about the universe!" }, { - "text": "Hey my name is Clara! How are you?" + "text": "Explain quantum computing in simple terms." } ] }, "models": [ { - "id": "llama3.2:1b", - "size": 911503104 - }, - { - "id": "llama3.2:main", - "size": 3421898912 - }, - { - "id": "llama3.2:3b", - "size": 2019377312 + "id": "codestral:22b", + "size": 13341239008 } ] }, { - "author": "DeepSeek-AI", - "id": "cortexso/deepseek-r1", + "author": "Nous Research", + "id": "cortexso/openhermes-2.5", "metadata": { - "_id": "67a0bcf13ac2dd6adf0bdfcf", + "_id": "6669ee8d6993100c6f8befa7", "author": "cortexso", "cardData": { - "license": "mit", + "license": "apache-2.0", "pipeline_tag": "text-generation", - "tags": ["cortexp.cpp", "featured"] + "tags": ["cortex.cpp"] }, - "createdAt": "2025-02-03T12:56:17.000Z", - "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortexp.cpp\n- featured\n---\n\n## Overview\n\n**DeepSeek** developed and released the **DeepSeek-R1** series, featuring multiple model sizes fine-tuned for high-performance text generation. These models are optimized for dialogue, reasoning, and information-seeking tasks, providing a balance of efficiency and accuracy while maintaining a smaller footprint compared to their original counterparts.\n\nThe DeepSeek-R1 models include distilled and full-scale variants of both **Qwen** and **Llama** architectures, catering to various applications such as customer support, conversational AI, research, and enterprise automation.\n\n## Variants\n\n### DeepSeek-R1\n\n| No | Variant | Branch | Cortex CLI command |\n| -- | ---------------------------------------------------------------------------------------------- | ------- | ------------------------------------------ |\n| 1 | [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/cortexso/deepseek-r1/tree/1.5b) | 1.5b | `cortex run deepseek-r1:1.5b` |\n| 2 | [DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/cortexso/deepseek-r1/tree/7b) | 7b | `cortex run deepseek-r1:7b` |\n| 3 | [DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/cortexso/deepseek-r1/tree/8b) | 8b | `cortex run deepseek-r1:8b` |\n| 4 | [DeepSeek-R1-Distill-Qwen-14B](https://huggingface.co/cortexso/deepseek-r1/tree/14b) | 14b | `cortex run deepseek-r1:14b` |\n| 5 | [DeepSeek-R1-Distill-Qwen-32B](https://huggingface.co/cortexso/deepseek-r1/tree/32b) | 32b | `cortex run deepseek-r1:32b` |\n| 6 | [DeepSeek-R1-Distill-Llama-70B](https://huggingface.co/cortexso/deepseek-r1/tree/70b) | 70b | `cortex run deepseek-r1:70b` |\n\nEach branch contains a default quantized version:\n- **Qwen-1.5B:** q4-km\n- **Qwen-7B:** q4-km\n- **Llama-8B:** q4-km\n- **Qwen-14B:** q4-km\n- **Qwen-32B:** q4-km\n- **Llama-70B:** q4-km\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```text\n cortexso/deepseek-r1\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run deepseek-r1\n ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1#license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)", + "createdAt": "2024-06-12T18:53:01.000Z", + "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nOpenHermes 2.5 Mistral 7B is a state of the art Mistral Fine-tune, a continuation of OpenHermes 2 model, which trained on additional code datasets.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [OpenHermes 2.5-7b](https://huggingface.co/cortexso/openhermes-2.5/tree/7b) | `cortex run openhermes-2.5:7b` |\n\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/openhermes-2.5\n ```\n \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run openhermes-2.5\n ```\n \n## Credits\n\n- **Author:** Nous Research\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md)\n- **Papers:** [Openhermes 2.5](https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B)", "disabled": false, - "downloads": 999, + "downloads": 230, "gated": false, "gguf": { "architecture": "llama", - "bos_token": "<\uff5cbegin\u2581of\u2581sentence\uff5c>", - "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<\uff5cUser\uff5c>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<\uff5cAssistant\uff5c><\uff5ctool\u2581calls\u2581begin\uff5c><\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{{'<\uff5ctool\u2581calls\u2581end\uff5c><\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>' + message['content'] + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<\uff5cAssistant\uff5c>' + content + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<\uff5ctool\u2581outputs\u2581begin\uff5c><\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<\uff5cAssistant\uff5c>\\n'}}{% endif %}", - "context_length": 131072, - "eos_token": "<\uff5cend\u2581of\u2581sentence\uff5c>", - "total": 70553706560 + "bos_token": "", + "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "context_length": 32768, + "eos_token": "<|im_end|>", + "total": 7241748480 }, - "id": "cortexso/deepseek-r1", - "lastModified": "2025-02-24T17:35:43.000Z", + "id": "cortexso/openhermes-2.5", + "lastModified": "2025-03-02T14:54:17.000Z", "likes": 0, "model-index": null, - "modelId": "cortexso/deepseek-r1", + "modelId": "cortexso/openhermes-2.5", "pipeline_tag": "text-generation", "private": false, - "sha": "b08ca722cc176e8d830a4e348f51367ea47b7bed", + "sha": "e4ef98ea46b61d21e434a79704717f7065c306a9", "siblings": [ { "rfilename": ".gitattributes" @@ -927,217 +4317,1700 @@ "rfilename": "README.md" }, { - "rfilename": "deepseek-r1-distill-llama-70b-q4_k_m.gguf" + "rfilename": "metadata.yml" }, { - "rfilename": "deepseek-r1-distill-llama-8b-q2_k.gguf" + "rfilename": "model.yml" }, { - "rfilename": "deepseek-r1-distill-llama-8b-q3_k_l.gguf" + "rfilename": "openhermes-2.5-mistral-7b-q2_k.gguf" }, { - "rfilename": "deepseek-r1-distill-llama-8b-q3_k_m.gguf" + "rfilename": "openhermes-2.5-mistral-7b-q3_k_l.gguf" }, { - "rfilename": "deepseek-r1-distill-llama-8b-q3_k_s.gguf" + "rfilename": "openhermes-2.5-mistral-7b-q3_k_m.gguf" }, { - "rfilename": "deepseek-r1-distill-llama-8b-q4_k_m.gguf" + "rfilename": "openhermes-2.5-mistral-7b-q3_k_s.gguf" }, { - "rfilename": "deepseek-r1-distill-llama-8b-q4_k_s.gguf" + "rfilename": "openhermes-2.5-mistral-7b-q4_k_m.gguf" }, { - "rfilename": "deepseek-r1-distill-llama-8b-q5_k_m.gguf" + "rfilename": "openhermes-2.5-mistral-7b-q4_k_s.gguf" }, { - "rfilename": "deepseek-r1-distill-llama-8b-q5_k_s.gguf" + "rfilename": "openhermes-2.5-mistral-7b-q5_k_m.gguf" }, { - "rfilename": "deepseek-r1-distill-llama-8b-q6_k.gguf" + "rfilename": "openhermes-2.5-mistral-7b-q5_k_s.gguf" }, { - "rfilename": "deepseek-r1-distill-llama-8b-q8_0.gguf" + "rfilename": "openhermes-2.5-mistral-7b-q6_k.gguf" }, { - "rfilename": "deepseek-r1-distill-qwen-1.5b-q2_k.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_l.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_m.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_s.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-1.5b-q4_k_m.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-1.5b-q4_k_s.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-1.5b-q5_k_m.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-1.5b-q5_k_s.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-1.5b-q6_k.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-1.5b-q8_0.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-14b-q2_k.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_l.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_m.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_s.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-14b-q4_k_m.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-14b-q4_k_s.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-14b-q5_k_m.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-14b-q5_k_s.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-14b-q6_k.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-14b-q8_0.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-32b-q2_k.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_l.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_m.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_s.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-32b-q4_k_m.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-32b-q4_k_s.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-32b-q5_k_m.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-32b-q5_k_s.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-32b-q6_k.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-32b-q8_0.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-7b-q2_k.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_l.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_m.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_s.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-7b-q4_k_m.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-7b-q4_k_s.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-7b-q5_k_m.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-7b-q5_k_s.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-7b-q6_k.gguf" - }, - { - "rfilename": "deepseek-r1-distill-qwen-7b-q8_0.gguf" - }, - { - "rfilename": "metadata.yaml" + "rfilename": "openhermes-2.5-mistral-7b-q8_0.gguf" } ], "spaces": [], "tags": [ "gguf", - "cortexp.cpp", - "featured", + "cortex.cpp", "text-generation", - "license:mit", + "license:apache-2.0", "endpoints_compatible", "region:us", "conversational" ], - "usedStorage": 825182913408, + "usedStorage": 122667617430, "widgetData": [ { "text": "Hi, what can you help me with?" }, { - "text": "Hey, let's have a conversation!" + "text": "What is 84 * 3 / 2?" }, { - "text": "Hello there!" + "text": "Tell me an interesting fact about the universe!" }, { - "text": "Hey my name is Clara! How are you?" + "text": "Explain quantum computing in simple terms." } ] }, "models": [ { - "id": "deepseek-r1:1.5b", - "size": 1117320480 + "id": "openhermes-2.5:7b", + "size": 4368451712 + } + ] + }, + { + "author": "sail", + "id": "cortexso/sailor-2", + "metadata": { + "_id": "674f5d998f1ed02584bf68d8", + "author": "cortexso", + "cardData": { + "license": "apache-2.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-12-03T19:35:53.000Z", + "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nSailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). It is designed to address the growing demand for diverse, robust, and accessible language technologies in the region. Built upon the foundation of Qwen 2.5, Sailor2 is continuously pre-trained on 500B tokens, significantly improving its support for 15 languages with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray.\n\nSailor2 is available in three sizes: 1B, 8B, and 20B, which are expansions from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively. These models serve a wide range of applications, from production use to research and speculative decoding, ensuring accessibility to advanced language technologies across SEA.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Sailor-2-1b](https://huggingface.co/cortexso/sailor-2/tree/1b) | `cortex run sailor-2:1b` |\n| 2 | [Sailor-2-8b](https://huggingface.co/cortexso/sailor-2/tree/8b) | `cortex run sailor-2:8b` |\n| 3 | [Sailor-2-20b](https://huggingface.co/cortexso/sailor-2/tree/20b) | `cortex run sailor-2:20b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/sailor-2\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run sailor-2\n ```\n \n## Credits\n\n- **Author:** Community-driven (Sailor2 Initiative)\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Technical Paper](https://arxiv.org/pdf/2502.12982)", + "disabled": false, + "downloads": 178, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are an AI assistant named Sailor2, created by Sea AI Lab. As an AI assistant, you can answer questions in English, Chinese, and Southeast Asian languages such as Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. Your responses should be friendly, unbiased, informative, detailed, and faithful.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "context_length": 4096, + "eos_token": "<|im_end|>", + "total": 988064640 + }, + "id": "cortexso/sailor-2", + "lastModified": "2025-03-03T02:58:28.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/sailor-2", + "pipeline_tag": "text-generation", + "private": false, + "sha": "89b3079762dedf6ff4fbc94545632b3554c16420", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "sailor2-1b-chat-q2_k.gguf" + }, + { + "rfilename": "sailor2-1b-chat-q3_k_l.gguf" + }, + { + "rfilename": "sailor2-1b-chat-q3_k_m.gguf" + }, + { + "rfilename": "sailor2-1b-chat-q3_k_s.gguf" + }, + { + "rfilename": "sailor2-1b-chat-q4_k_m.gguf" + }, + { + "rfilename": "sailor2-1b-chat-q4_k_s.gguf" + }, + { + "rfilename": "sailor2-1b-chat-q5_k_m.gguf" + }, + { + "rfilename": "sailor2-1b-chat-q5_k_s.gguf" + }, + { + "rfilename": "sailor2-1b-chat-q6_k.gguf" + }, + { + "rfilename": "sailor2-1b-chat-q8_0.gguf" + }, + { + "rfilename": "sailor2-20b-chat-q2_k.gguf" + }, + { + "rfilename": "sailor2-20b-chat-q3_k_l.gguf" + }, + { + "rfilename": "sailor2-20b-chat-q3_k_m.gguf" + }, + { + "rfilename": "sailor2-20b-chat-q3_k_s.gguf" + }, + { + "rfilename": "sailor2-20b-chat-q4_k_m.gguf" + }, + { + "rfilename": "sailor2-20b-chat-q4_k_s.gguf" + }, + { + "rfilename": "sailor2-20b-chat-q5_k_m.gguf" + }, + { + "rfilename": "sailor2-20b-chat-q5_k_s.gguf" + }, + { + "rfilename": "sailor2-20b-chat-q6_k.gguf" + }, + { + "rfilename": "sailor2-20b-chat-q8_0.gguf" + }, + { + "rfilename": "sailor2-8b-chat-q2_k.gguf" + }, + { + "rfilename": "sailor2-8b-chat-q3_k_l.gguf" + }, + { + "rfilename": "sailor2-8b-chat-q3_k_m.gguf" + }, + { + "rfilename": "sailor2-8b-chat-q3_k_s.gguf" + }, + { + "rfilename": "sailor2-8b-chat-q4_k_m.gguf" + }, + { + "rfilename": "sailor2-8b-chat-q4_k_s.gguf" + }, + { + "rfilename": "sailor2-8b-chat-q5_k_m.gguf" + }, + { + "rfilename": "sailor2-8b-chat-q5_k_s.gguf" + }, + { + "rfilename": "sailor2-8b-chat-q6_k.gguf" + }, + { + "rfilename": "sailor2-8b-chat-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2502.12982", + "license:apache-2.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 201040376768, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "sailor-2:1b", + "size": 738628256 }, { - "id": "deepseek-r1:14b", - "size": 8988109920 + "id": "sailor-2:20b", + "size": 11622380384 }, { - "id": "deepseek-r1:70b", - "size": 42520395584 + "id": "sailor-2:8b", + "size": 5242934176 + } + ] + }, + { + "author": "CohereForAI", + "id": "cortexso/aya-expanse", + "metadata": { + "_id": "671ac0aee98f80735b80ce0d", + "author": "cortexso", + "cardData": { + "license": "cc-by-sa-4.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-10-24T21:48:30.000Z", + "description": "---\nlicense: cc-by-sa-4.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nAya Expanse is an open-weight research release of a model with highly advanced multilingual capabilities. It focuses on pairing a highly performant pre-trained Command family of models with the result of a year’s dedicated research from Cohere For AI, including data arbitrage, multilingual preference training, safety tuning, and model merging. The result is a powerful multilingual large language model serving 23 languages.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Aya-expanse-8b](https://huggingface.co/cortexso/aya-expanse/tree/8b) | `cortex run aya-expanse:8b` |\n| 2 | [Aya-expanse-32b](https://huggingface.co/cortexso/aya-expanse/tree/32b) | `cortex run aya-expanse:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/aya-expanse\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run aya-expanse\n ```\n\n## Credits\n\n- **Author:** CohereAI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://cohere.com/c4ai-cc-by-nc-license)\n- **Papers:** [Aya Expanse Blog](https://cohere.com/blog/aya-expanse-connecting-our-world)", + "disabled": false, + "downloads": 219, + "gated": false, + "gguf": { + "architecture": "command-r", + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Aya, a brilliant, sophisticated, multilingual AI-assistant trained to assist human users by providing thorough responses. You are able to interact and respond to questions in 23 languages and you are powered by a multilingual model built by Cohere For AI.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}", + "context_length": 8192, + "eos_token": "<|END_OF_TURN_TOKEN|>", + "total": 32296476672 + }, + "id": "cortexso/aya-expanse", + "lastModified": "2025-03-03T05:45:56.000Z", + "likes": 1, + "model-index": null, + "modelId": "cortexso/aya-expanse", + "pipeline_tag": "text-generation", + "private": false, + "sha": "d3de661105fcf536bac3f1ec747a2d39d25fe08f", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "aya-expanse-32b-q2_k.gguf" + }, + { + "rfilename": "aya-expanse-32b-q3_k_l.gguf" + }, + { + "rfilename": "aya-expanse-32b-q3_k_m.gguf" + }, + { + "rfilename": "aya-expanse-32b-q3_k_s.gguf" + }, + { + "rfilename": "aya-expanse-32b-q4_k_m.gguf" + }, + { + "rfilename": "aya-expanse-32b-q4_k_s.gguf" + }, + { + "rfilename": "aya-expanse-32b-q5_k_m.gguf" + }, + { + "rfilename": "aya-expanse-32b-q5_k_s.gguf" + }, + { + "rfilename": "aya-expanse-32b-q6_k.gguf" + }, + { + "rfilename": "aya-expanse-32b-q8_0.gguf" + }, + { + "rfilename": "aya-expanse-8b-q2_k.gguf" + }, + { + "rfilename": "aya-expanse-8b-q3_k_l.gguf" + }, + { + "rfilename": "aya-expanse-8b-q3_k_m.gguf" + }, + { + "rfilename": "aya-expanse-8b-q3_k_s.gguf" + }, + { + "rfilename": "aya-expanse-8b-q4_k_m.gguf" + }, + { + "rfilename": "aya-expanse-8b-q4_k_s.gguf" + }, + { + "rfilename": "aya-expanse-8b-q5_k_m.gguf" + }, + { + "rfilename": "aya-expanse-8b-q5_k_s.gguf" + }, + { + "rfilename": "aya-expanse-8b-q6_k.gguf" + }, + { + "rfilename": "aya-expanse-8b-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:cc-by-sa-4.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 283759636448, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "aya-expanse:8b", + "size": 5056974624 }, { - "id": "deepseek-r1:8b", - "size": 4920736256 + "id": "aya-expanse:32b", + "size": 19800825408 + } + ] + }, + { + "author": "CohereForAI", + "id": "cortexso/command-r", + "metadata": { + "_id": "66751b98585f2bf57092b2ae", + "author": "cortexso", + "cardData": { + "license": "cc-by-nc-4.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-06-21T06:20:08.000Z", + "description": "---\nlicense: cc-by-nc-4.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nC4AI Command-R is a research release of a 35 billion parameter highly performant generative model. Command-R is a large language model with open weights optimized for a variety of use cases including reasoning, summarization, and question answering. Command-R has the capability for multilingual generation evaluated in 10 languages and highly performant RAG capabilities.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Command-r-32b](https://huggingface.co/cortexhub/command-r/tree/32b) | `cortex run command-r:32b` |\n| 1 | [Command-r-35b](https://huggingface.co/cortexhub/command-r/tree/35b) | `cortex run command-r:35b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/command-r\n ```\n \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run command-r\n ```\n \n## Credits\n\n- **Author:** Cohere For AI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://cohere.com/c4ai-cc-by-nc-license)", + "disabled": false, + "downloads": 613, + "gated": false, + "gguf": { + "architecture": "command-r", + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are a large language model called Command R built by the company Cohere. You act as a brilliant, sophisticated, AI-assistant chatbot trained to assist human users by providing thorough responses.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}", + "context_length": 131072, + "eos_token": "<|END_OF_TURN_TOKEN|>", + "total": 32296476672 + }, + "id": "cortexso/command-r", + "lastModified": "2025-03-03T05:55:03.000Z", + "likes": 1, + "model-index": null, + "modelId": "cortexso/command-r", + "pipeline_tag": "text-generation", + "private": false, + "sha": "829fc0c4d726206187684dcbaf2a53c658d5d34a", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "c4ai-command-r-08-2024-q2_k.gguf" + }, + { + "rfilename": "c4ai-command-r-08-2024-q3_k_l.gguf" + }, + { + "rfilename": "c4ai-command-r-08-2024-q3_k_m.gguf" + }, + { + "rfilename": "c4ai-command-r-08-2024-q3_k_s.gguf" + }, + { + "rfilename": "c4ai-command-r-08-2024-q4_k_m.gguf" + }, + { + "rfilename": "c4ai-command-r-08-2024-q4_k_s.gguf" + }, + { + "rfilename": "c4ai-command-r-08-2024-q5_k_m.gguf" + }, + { + "rfilename": "c4ai-command-r-08-2024-q5_k_s.gguf" + }, + { + "rfilename": "c4ai-command-r-08-2024-q6_k.gguf" + }, + { + "rfilename": "c4ai-command-r-08-2024-q8_0.gguf" + }, + { + "rfilename": "c4ai-command-r-v01-q2_k.gguf" + }, + { + "rfilename": "c4ai-command-r-v01-q3_k_l.gguf" + }, + { + "rfilename": "c4ai-command-r-v01-q3_k_m.gguf" + }, + { + "rfilename": "c4ai-command-r-v01-q3_k_s.gguf" + }, + { + "rfilename": "c4ai-command-r-v01-q4_k_m.gguf" + }, + { + "rfilename": "c4ai-command-r-v01-q4_k_s.gguf" + }, + { + "rfilename": "c4ai-command-r-v01-q5_k_m.gguf" + }, + { + "rfilename": "c4ai-command-r-v01-q5_k_s.gguf" + }, + { + "rfilename": "c4ai-command-r-v01-q6_k.gguf" + }, + { + "rfilename": "c4ai-command-r-v01-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:cc-by-nc-4.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 471257928608, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "command-r:32b", + "size": 19800837184 }, { - "id": "deepseek-r1:main", - "size": 8098524832 + "id": "command-r:35b", + "size": 21527055296 + } + ] + }, + { + "author": "simplescaling", + "id": "cortexso/simplescaling-s1", + "metadata": { + "_id": "67a4e03a6f317f30b9a285b0", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2025-02-06T16:15:54.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\nThe 'simplescaling-s1' model is a refined version of 'simplescaling/s1-32B,' designed to enhance scalability and streamline tasks in AI applications. It focuses on efficiently managing resource allocation while maintaining high performance across various workloads. This model is particularly effective for text generation, summarization, and conversational AI, as it balances speed and accuracy. Users can leverage 'simplescaling-s1' for building scalable applications that require processing large datasets or generating content quickly. Overall, the model achieves impressive results with reduced computational overhead, making it suitable for both research and practical deployments.\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Simplescaling-s1-32b](https://huggingface.co/cortexso/simplescaling-s1/tree/32b) | cortex run simplescaling-s1:32b |\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/simplescaling-s1\n ```\n \n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run simplescaling-s1\n ```\n## Credits\n- **Author:** simplescaling\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)\n- **Paper**: [Paper](https://arxiv.org/abs/2501.19393)", + "disabled": false, + "downloads": 104, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|endoftext|>", + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "context_length": 32768, + "eos_token": "<|im_end|>", + "total": 32763876352 + }, + "id": "cortexso/simplescaling-s1", + "lastModified": "2025-03-03T03:46:24.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/simplescaling-s1", + "pipeline_tag": "text-generation", + "private": false, + "sha": "5755e76ec22a9ca9d0271ce16f5287bb9ad3c1a6", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "s1-32b-q2_k.gguf" + }, + { + "rfilename": "s1-32b-q3_k_l.gguf" + }, + { + "rfilename": "s1-32b-q3_k_m.gguf" + }, + { + "rfilename": "s1-32b-q3_k_s.gguf" + }, + { + "rfilename": "s1-32b-q4_k_m.gguf" + }, + { + "rfilename": "s1-32b-q4_k_s.gguf" + }, + { + "rfilename": "s1-32b-q5_k_m.gguf" + }, + { + "rfilename": "s1-32b-q5_k_s.gguf" + }, + { + "rfilename": "s1-32b-q6_k.gguf" + }, + { + "rfilename": "s1-32b-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2501.19393", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 206130756480, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "simplescaling-s1:32b", + "size": 19851336384 + } + ] + }, + { + "author": "Qwen", + "id": "cortexso/qwq", + "metadata": { + "_id": "67497b496615e96c7c8d6b05", + "author": "cortexso", + "cardData": { + "license": "other", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-11-29T08:28:57.000Z", + "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nQwQ-32B-Preview is an experimental large-scale research model by the Qwen Team, focusing on advanced AI reasoning. While it demonstrates strong analytical capabilities, it also presents notable limitations:\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Qwq-32b](https://huggingface.co/cortexso/qwq/tree/32b) | `cortex run qwq:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/qwq\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run qwq\n ```\n \n## Credits\n\n- **Author:** Qwen\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/Qwen/QwQ-32B-Preview/blob/main/LICENSE)\n- **Papers:** [QwQ Blog](https://qwenlm.github.io/blog/qwq-32b-preview/)", + "disabled": false, + "downloads": 101, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|endoftext|>", + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "context_length": 32768, + "eos_token": "<|im_end|>", + "total": 32763876352 + }, + "id": "cortexso/qwq", + "lastModified": "2025-03-03T02:23:40.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/qwq", + "pipeline_tag": "text-generation", + "private": false, + "sha": "fc6f23c0d5c8faf8b79b11e03aaa7c656fed8dfd", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "qwq-32b-preview-q2_k.gguf" + }, + { + "rfilename": "qwq-32b-preview-q3_k_l.gguf" + }, + { + "rfilename": "qwq-32b-preview-q3_k_m.gguf" + }, + { + "rfilename": "qwq-32b-preview-q3_k_s.gguf" + }, + { + "rfilename": "qwq-32b-preview-q4_k_m.gguf" + }, + { + "rfilename": "qwq-32b-preview-q4_k_s.gguf" + }, + { + "rfilename": "qwq-32b-preview-q5_k_m.gguf" + }, + { + "rfilename": "qwq-32b-preview-q5_k_s.gguf" + }, + { + "rfilename": "qwq-32b-preview-q6_k.gguf" + }, + { + "rfilename": "qwq-32b-preview-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:other", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 206130755200, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "qwq:32b", + "size": 19851336256 + } + ] + }, + { + "author": "Nexusflow", + "id": "cortexso/athene", + "metadata": { + "_id": "6737ae7de6b1d15ff54d0a08", + "author": "cortexso", + "cardData": { + "license": "other", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-11-15T20:26:37.000Z", + "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nAthene-V2-Chat-72B is an open-weight LLM that competes on par with GPT-4o across various benchmarks. It is currently ranked as the best open model on Chatbot Arena, where it outperforms GPT-4o-0513 (the highest-ranked GPT-4o model on Arena) in hard and math categories. It also matches GPT-4o-0513 in coding, instruction following, longer queries, and multi-turn conversations.\n\nTrained through RLHF with Qwen-2.5-72B-Instruct as the base model, Athene-V2-Chat-72B excels in chat, math, and coding. Additionally, its sister model, Athene-V2-Agent-72B, surpasses GPT-4o in complex function calling and agentic applications, further extending its capabilities.\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Athene-72b](https://huggingface.co/cortexso/athene/tree/72b) | `cortex run athene:72b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/athene\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run athene\n ```\n \n## Credits\n\n- **Author:** Nexusflow\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/Nexusflow/Athene-V2-Chat/blob/main/Nexusflow_Research_License_.pdf)\n- **Papers:** [Athene V2 Blog](https://nexusflow.ai/blogs/athene-v2)", + "disabled": false, + "downloads": 13, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|endoftext|>", + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "context_length": 32768, + "eos_token": "<|im_end|>", + "total": 72706203648 + }, + "id": "cortexso/athene", + "lastModified": "2025-03-03T06:04:09.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/athene", + "pipeline_tag": "text-generation", + "private": false, + "sha": "a92447ca675e741541855ac03b8f144dee1067c4", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "athene-v2-chat-q4_k_m.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:other", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 47415715136, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "athene:72b", + "size": 47415715136 + } + ] + }, + { + "author": "MistralAI", + "id": "cortexso/mistral", + "metadata": { + "_id": "6667b1796e382e809d62b9fc", + "author": "cortexso", + "cardData": { + "license": "other", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-06-11T02:07:53.000Z", + "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nMistral 7B, a 7-billion-parameter Large Language Model by Mistral AI. Designed for efficiency and performance, it suits real-time applications requiring swift responses.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Mistra-7b](https://huggingface.co/cortexhub/mistral/tree/7b) | `cortex run mistral:7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/mistral\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run mistral\n ```\n \n## Credits\n\n- **Author:** MistralAI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://mistral.ai/licenses/MNPL-0.1.md)\n- **Papers:** [Mistral paper](https://arxiv.org/abs/2310.06825)", + "disabled": false, + "downloads": 1895, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "", + "chat_template": "{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif %}\n {%- set ns.index = ns.index + 1 %}\n {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if message[\"role\"] == \"user\" %}\n {%- if tools is not none and (message == user_messages[-1]) %}\n {{- \"[AVAILABLE_TOOLS] [\" }}\n {%- for tool in tools %}\n {%- set tool = tool.function %}\n {{- '{\"type\": \"function\", \"function\": {' }}\n {%- for key, val in tool.items() if key != \"return\" %}\n {%- if val is string %}\n {{- '\"' + key + '\": \"' + val + '\"' }}\n {%- else %}\n {{- '\"' + key + '\": ' + val|tojson }}\n {%- endif %}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"}}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %}\n {{- \"[/AVAILABLE_TOOLS]\" }}\n {%- endif %}\n {%- if loop.last and system_message is defined %}\n {{- \"[INST] \" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n {%- else %}\n {{- \"[INST] \" + message[\"content\"] + \"[/INST]\" }}\n {%- endif %}\n {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n {{- \"[TOOL_CALLS] [\" }}\n {%- for tool_call in message.tool_calls %}\n {%- set out = tool_call.function|tojson %}\n {{- out[:-1] }}\n {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" + eos_token }}\n {%- endif %}\n {%- endfor %}\n {%- elif message[\"role\"] == \"assistant\" %}\n {{- \" \" + message[\"content\"]|trim + eos_token}}\n {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {%- if message.content is defined and message.content.content is defined %}\n {%- set content = message.content.content %}\n {%- else %}\n {%- set content = message.content %}\n {%- endif %}\n {{- '[TOOL_RESULTS] {\"content\": ' + content|string + \", \" }}\n {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n {%- else %}\n {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n {%- endif %}\n{%- endfor %}\n", + "context_length": 32768, + "eos_token": "", + "total": 7248023552 + }, + "id": "cortexso/mistral", + "lastModified": "2025-03-03T02:39:43.000Z", + "likes": 1, + "model-index": null, + "modelId": "cortexso/mistral", + "pipeline_tag": "text-generation", + "private": false, + "sha": "125b0ef1bdf6441d5c00f6a6a24a491214e532bd", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "mistral-7b-instruct-v0.3-q2_k.gguf" + }, + { + "rfilename": "mistral-7b-instruct-v0.3-q3_k_l.gguf" + }, + { + "rfilename": "mistral-7b-instruct-v0.3-q3_k_m.gguf" + }, + { + "rfilename": "mistral-7b-instruct-v0.3-q3_k_s.gguf" + }, + { + "rfilename": "mistral-7b-instruct-v0.3-q4_k_m.gguf" + }, + { + "rfilename": "mistral-7b-instruct-v0.3-q4_k_s.gguf" + }, + { + "rfilename": "mistral-7b-instruct-v0.3-q5_k_m.gguf" + }, + { + "rfilename": "mistral-7b-instruct-v0.3-q5_k_s.gguf" + }, + { + "rfilename": "mistral-7b-instruct-v0.3-q6_k.gguf" + }, + { + "rfilename": "mistral-7b-instruct-v0.3-q8_0.gguf" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2310.06825", + "license:other", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 49914826528, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "mistral:7b", + "size": 4372815680 + } + ] + }, + { + "author": "HuggingFaceTB", + "id": "cortexso/smollm2", + "metadata": { + "_id": "672408e4603a8644ff7505f0", + "author": "cortexso", + "cardData": { + "license": "apache-2.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-10-31T22:47:00.000Z", + "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nSmolLM2 is a family of compact language models available in three sizes: 135M, 360M, and 1.7B parameters. These models are designed to solve a wide range of tasks while being lightweight enough for on-device deployment. More details can be found in the [SmolLM2 paper](https://arxiv.org/abs/2502.02737v1).\n\nThe **1.7B variant** demonstrates significant improvements over its predecessor, SmolLM1-1.7B, especially in instruction following, knowledge retention, reasoning, and mathematical problem-solving. It was trained on **11 trillion tokens** using a diverse dataset combination, including **FineWeb-Edu, DCLM, The Stack**, and newly curated mathematics and coding datasets that will be released soon.\n\nThe **instruct version** of SmolLM2 was developed through **supervised fine-tuning (SFT)** using a mix of public datasets and curated proprietary datasets. It further benefits from **Direct Preference Optimization (DPO)** using **UltraFeedback**. \n\nAdditionally, the instruct model supports tasks such as **text rewriting, summarization, and function calling**, enabled by datasets from **Argilla**, including **Synth-APIGen-v0.1**. The SFT dataset is available at: [SmolTalk SFT Dataset](https://huggingface.co/datasets/HuggingFaceTB/smoltalk).\n\nFor further details, visit the [SmolLM2 GitHub repository](https://github.com/huggingface/smollm), where you will find resources for **pre-training, post-training, evaluation, and local inference**.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| -- | ------------------------------------------------------ | ---------------------- |\n| 1 | [Smollm2-1.7b](https://huggingface.co/cortexso/smollm2/tree/1.7b) | `cortex run smollm2:1.7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/smollm2\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run smollm2\n ```\n\n## Credits\n\n- **Author:** SmolLM2 Team\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [SmolLM2 Research](https://arxiv.org/abs/2502.02737v1)", + "disabled": false, + "downloads": 237, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "<|im_start|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "context_length": 8192, + "eos_token": "<|im_end|>", + "total": 1711376384 + }, + "id": "cortexso/smollm2", + "lastModified": "2025-03-03T03:51:13.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/smollm2", + "pipeline_tag": "text-generation", + "private": false, + "sha": "b825edad383d925571b4433f8d6b16eb7cc1e9fc", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "smollm2-1.7b-instruct-q2_k.gguf" + }, + { + "rfilename": "smollm2-1.7b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "smollm2-1.7b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "smollm2-1.7b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "smollm2-1.7b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "smollm2-1.7b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "smollm2-1.7b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "smollm2-1.7b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "smollm2-1.7b-instruct-q6_k.gguf" + }, + { + "rfilename": "smollm2-1.7b-instruct-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2502.02737", + "license:apache-2.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 11998369216, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "smollm2:1.7b", + "size": 1055609728 + } + ] + }, + { + "author": "allenai", + "id": "cortexso/tulu3", + "metadata": { + "_id": "6744a6a2e08fe3da3fcdfb36", + "author": "cortexso", + "cardData": { + "license": "other", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-11-25T16:32:34.000Z", + "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nTülu3 is a state-of-the-art instruction-following model family developed by Allen Institute for AI. It is designed to excel in a wide range of tasks beyond standard chat applications, including complex problem-solving in domains such as MATH, GSM8K, and IFEval. The Tülu3 series provides a fully open-source ecosystem, offering access to datasets, training code, and fine-tuning recipes to facilitate advanced model customization and experimentation.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Tulu3-8b](https://huggingface.co/cortexso/tulu3/tree/8b) | `cortex run tulu3:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/tulu3\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run tulu3\n ```\n \n## Credits\n\n- **Author:** Allenai\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct/blob/main/LICENSE)\n- **Papers:** [Paper](https://arxiv.org/abs/2411.15124)", + "disabled": false, + "downloads": 252, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "<|begin_of_text|>", + "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|assistant|>\n' + message['content'] + eos_token + '\n' }}{% else %}{{ '<|assistant|>\n' + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}{% endfor %}", + "context_length": 131072, + "eos_token": "<|end_of_text|>", + "total": 8030326848 + }, + "id": "cortexso/tulu3", + "lastModified": "2025-03-03T03:48:16.000Z", + "likes": 1, + "model-index": null, + "modelId": "cortexso/tulu3", + "pipeline_tag": "text-generation", + "private": false, + "sha": "693fb27ee973a686d66f33ecc72b41172ec5a7d6", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "llama-3.1-tulu-3-8b-sft-q2_k.gguf" + }, + { + "rfilename": "llama-3.1-tulu-3-8b-sft-q3_k_l.gguf" + }, + { + "rfilename": "llama-3.1-tulu-3-8b-sft-q3_k_m.gguf" + }, + { + "rfilename": "llama-3.1-tulu-3-8b-sft-q3_k_s.gguf" + }, + { + "rfilename": "llama-3.1-tulu-3-8b-sft-q4_k_m.gguf" + }, + { + "rfilename": "llama-3.1-tulu-3-8b-sft-q4_k_s.gguf" + }, + { + "rfilename": "llama-3.1-tulu-3-8b-sft-q5_k_m.gguf" + }, + { + "rfilename": "llama-3.1-tulu-3-8b-sft-q5_k_s.gguf" + }, + { + "rfilename": "llama-3.1-tulu-3-8b-sft-q6_k.gguf" + }, + { + "rfilename": "llama-3.1-tulu-3-8b-sft-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2411.15124", + "license:other", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 56188233120, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "tulu3:8b", + "size": 4920780768 + } + ] + }, + { + "author": "Qwen Team", + "id": "cortexso/qwen3", + "metadata": { + "_id": "6810288ccbe4f92b62636b50", + "author": "cortexso", + "cardData": { + "license": "apache-2.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp", "featured"] + }, + "createdAt": "2025-04-29T01:17:00.000Z", + "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\n**Qwen Team** developed and released the **Qwen3** series, a state-of-the-art family of language models optimized for advanced reasoning, dialogue, instruction-following, and agentic use cases. Qwen3 introduces innovative thinking/non-thinking mode switching, long context capabilities, and multilingual support, all while achieving high efficiency and performance.\n\nThe Qwen3 models span several sizes and include support for seamless reasoning, complex tool usage, and detailed multi-turn conversations, making them ideal for applications such as research assistants, code generation, enterprise chatbots, and more.\n\n## Variants\n\n### Qwen3\n\n| No | Variant | Branch | Cortex CLI command |\n|----|--------------------------------------------------------------------------------------------|--------|-------------------------------|\n| 1 | [Qwen3-0.6B](https://huggingface.co/cortexso/qwen3/tree/0.6b) | 0.6b | `cortex run qwen3:0.6b` |\n| 2 | [Qwen3-1.7B](https://huggingface.co/cortexso/qwen3/tree/1.7b) | 1.7b | `cortex run qwen3:1.7b` |\n| 3 | [Qwen3-4B](https://huggingface.co/cortexso/qwen3/tree/4b) | 4b | `cortex run qwen3:4b` |\n| 4 | [Qwen3-8B](https://huggingface.co/cortexso/qwen3/tree/8b) | 8b | `cortex run qwen3:8b` |\n| 5 | [Qwen3-14B](https://huggingface.co/cortexso/qwen3/tree/14b) | 14b | `cortex run qwen3:14b` |\n| 6 | [Qwen3-32B](https://huggingface.co/cortexso/qwen3/tree/32b) | 32b | `cortex run qwen3:32b` |\n| 7 | [Qwen3-30B-A3B](https://huggingface.co/cortexso/qwen3/tree/30b-a3b) | 30b-a3b| `cortex run qwen3:30b-a3b` |\n\nEach branch contains multiple quantized GGUF versions:\n- **Qwen3-0.6B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-1.7B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-4B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-8B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-32B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-30B-A3B:** *q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/qwen3\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run qwen3\n ```\n\n## Credits\n\n- **Author:** Qwen Team\n- **Converter:** [Menlo Research](https://menlo.ai/)\n- **Original License:** [License](https://www.apache.org/licenses/LICENSE-2.0)\n- **Blogs:** [Qwen3: Think Deeper, Act Faster](https://qwenlm.github.io/blog/qwen3/)", + "disabled": false, + "downloads": 6693, + "gated": false, + "gguf": { + "architecture": "qwen3", + "bos_token": "<|endoftext|>", + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('') and message.content.endswith('')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '' in message.content %}\n {%- set content = message.content.split('')[-1].lstrip('\\n') %}\n {%- set reasoning_content = message.content.split('')[0].rstrip('\\n').split('')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n\\n' + reasoning_content.strip('\\n') + '\\n\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '\\n\\n\\n\\n' }}\n {%- endif %}\n{%- endif %}", + "context_length": 40960, + "eos_token": "<|im_end|>", + "total": 751632384 + }, + "id": "cortexso/qwen3", + "lastModified": "2025-05-08T15:50:21.000Z", + "likes": 1, + "model-index": null, + "modelId": "cortexso/qwen3", + "pipeline_tag": "text-generation", + "private": false, + "sha": "d25d0999fbab8909f16173f21f2db8f9f58c0a28", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "qwen3-0.6b-q2_k.gguf" + }, + { + "rfilename": "qwen3-0.6b-q3_k_l.gguf" + }, + { + "rfilename": "qwen3-0.6b-q3_k_m.gguf" + }, + { + "rfilename": "qwen3-0.6b-q3_k_s.gguf" + }, + { + "rfilename": "qwen3-0.6b-q4_k_m.gguf" + }, + { + "rfilename": "qwen3-0.6b-q4_k_s.gguf" + }, + { + "rfilename": "qwen3-0.6b-q5_k_m.gguf" + }, + { + "rfilename": "qwen3-0.6b-q5_k_s.gguf" + }, + { + "rfilename": "qwen3-0.6b-q6_k.gguf" + }, + { + "rfilename": "qwen3-0.6b-q8_0.gguf" + }, + { + "rfilename": "qwen3-1.7b-q2_k.gguf" + }, + { + "rfilename": "qwen3-1.7b-q3_k_l.gguf" + }, + { + "rfilename": "qwen3-1.7b-q3_k_m.gguf" + }, + { + "rfilename": "qwen3-1.7b-q3_k_s.gguf" + }, + { + "rfilename": "qwen3-1.7b-q4_k_m.gguf" + }, + { + "rfilename": "qwen3-1.7b-q4_k_s.gguf" + }, + { + "rfilename": "qwen3-1.7b-q5_k_m.gguf" + }, + { + "rfilename": "qwen3-1.7b-q5_k_s.gguf" + }, + { + "rfilename": "qwen3-1.7b-q6_k.gguf" + }, + { + "rfilename": "qwen3-1.7b-q8_0.gguf" + }, + { + "rfilename": "qwen3-14b-q2_k.gguf" + }, + { + "rfilename": "qwen3-14b-q3_k_l.gguf" + }, + { + "rfilename": "qwen3-14b-q3_k_m.gguf" + }, + { + "rfilename": "qwen3-14b-q3_k_s.gguf" + }, + { + "rfilename": "qwen3-14b-q4_k_m.gguf" + }, + { + "rfilename": "qwen3-14b-q4_k_s.gguf" + }, + { + "rfilename": "qwen3-14b-q5_k_m.gguf" + }, + { + "rfilename": "qwen3-14b-q5_k_s.gguf" + }, + { + "rfilename": "qwen3-14b-q6_k.gguf" + }, + { + "rfilename": "qwen3-14b-q8_0.gguf" + }, + { + "rfilename": "qwen3-30b-a3b-q2_k.gguf" + }, + { + "rfilename": "qwen3-30b-a3b-q3_k_l.gguf" + }, + { + "rfilename": "qwen3-30b-a3b-q3_k_m.gguf" + }, + { + "rfilename": "qwen3-30b-a3b-q3_k_s.gguf" + }, + { + "rfilename": "qwen3-30b-a3b-q4_k_m.gguf" + }, + { + "rfilename": "qwen3-30b-a3b-q4_k_s.gguf" + }, + { + "rfilename": "qwen3-30b-a3b-q5_k_m.gguf" + }, + { + "rfilename": "qwen3-30b-a3b-q5_k_s.gguf" + }, + { + "rfilename": "qwen3-30b-a3b-q6_k.gguf" + }, + { + "rfilename": "qwen3-30b-a3b-q8_0.gguf" + }, + { + "rfilename": "qwen3-32b-q2_k.gguf" + }, + { + "rfilename": "qwen3-32b-q3_k_l.gguf" + }, + { + "rfilename": "qwen3-32b-q3_k_m.gguf" + }, + { + "rfilename": "qwen3-32b-q3_k_s.gguf" + }, + { + "rfilename": "qwen3-32b-q4_k_m.gguf" + }, + { + "rfilename": "qwen3-32b-q4_k_s.gguf" + }, + { + "rfilename": "qwen3-32b-q5_k_m.gguf" + }, + { + "rfilename": "qwen3-32b-q5_k_s.gguf" + }, + { + "rfilename": "qwen3-32b-q6_k.gguf" + }, + { + "rfilename": "qwen3-32b-q8_0.gguf" + }, + { + "rfilename": "qwen3-4b-q2_k.gguf" + }, + { + "rfilename": "qwen3-4b-q3_k_l.gguf" + }, + { + "rfilename": "qwen3-4b-q3_k_m.gguf" + }, + { + "rfilename": "qwen3-4b-q3_k_s.gguf" + }, + { + "rfilename": "qwen3-4b-q4_k_m.gguf" + }, + { + "rfilename": "qwen3-4b-q4_k_s.gguf" + }, + { + "rfilename": "qwen3-4b-q5_k_m.gguf" + }, + { + "rfilename": "qwen3-4b-q5_k_s.gguf" + }, + { + "rfilename": "qwen3-4b-q6_k.gguf" + }, + { + "rfilename": "qwen3-4b-q8_0.gguf" + }, + { + "rfilename": "qwen3-8b-q2_k.gguf" + }, + { + "rfilename": "qwen3-8b-q3_k_l.gguf" + }, + { + "rfilename": "qwen3-8b-q3_k_m.gguf" + }, + { + "rfilename": "qwen3-8b-q3_k_s.gguf" + }, + { + "rfilename": "qwen3-8b-q4_k_m.gguf" + }, + { + "rfilename": "qwen3-8b-q4_k_s.gguf" + }, + { + "rfilename": "qwen3-8b-q5_k_m.gguf" + }, + { + "rfilename": "qwen3-8b-q5_k_s.gguf" + }, + { + "rfilename": "qwen3-8b-q6_k.gguf" + }, + { + "rfilename": "qwen3-8b-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "featured", + "text-generation", + "license:apache-2.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 588411644672, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "qwen3:32b", + "size": 19762149088 }, { - "id": "deepseek-r1:7b", - "size": 4683073184 + "id": "qwen3:8b", + "size": 5027783808 }, { - "id": "deepseek-r1:32b", - "size": 19851335520 + "id": "qwen3:0.6b", + "size": 484219968 + }, + { + "id": "qwen3:4b", + "size": 2497280608 + }, + { + "id": "qwen3:30b-a3b", + "size": 18556686208 + }, + { + "id": "qwen3:14b", + "size": 9001753280 + }, + { + "id": "qwen3:1.7b", + "size": 1282439232 + } + ] + }, + { + "author": "TinyLlama", + "id": "cortexso/tinyllama", + "metadata": { + "_id": "66791800ca45b9165970f2fe", + "author": "cortexso", + "cardData": { + "license": "apache-2.0", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-06-24T06:53:52.000Z", + "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nThe [TinyLlama](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) project aims to pretrain a 1.1B Llama model on 3 trillion tokens. This is the chat model finetuned on a diverse range of synthetic dialogues generated by ChatGPT.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [TinyLLama-1b](https://huggingface.co/cortexso/tinyllama/tree/1b) | `cortex run tinyllama:1b` |\n\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/tinyllama\n ```\n \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run tinyllama\n ```\n \n## Credits\n\n- **Author:** Microsoft\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Tinyllama Paper](https://arxiv.org/abs/2401.02385)", + "disabled": false, + "downloads": 562, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "", + "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}", + "context_length": 2048, + "eos_token": "", + "total": 1100048384 + }, + "id": "cortexso/tinyllama", + "lastModified": "2025-03-03T06:16:24.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/tinyllama", + "pipeline_tag": "text-generation", + "private": false, + "sha": "953054fd3565023c2bbd2381f2566f904f5bdc1f", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + }, + { + "rfilename": "tinyllama-1.1b-chat-v1.0-q2_k.gguf" + }, + { + "rfilename": "tinyllama-1.1b-chat-v1.0-q3_k_l.gguf" + }, + { + "rfilename": "tinyllama-1.1b-chat-v1.0-q3_k_m.gguf" + }, + { + "rfilename": "tinyllama-1.1b-chat-v1.0-q3_k_s.gguf" + }, + { + "rfilename": "tinyllama-1.1b-chat-v1.0-q4_k_m.gguf" + }, + { + "rfilename": "tinyllama-1.1b-chat-v1.0-q4_k_s.gguf" + }, + { + "rfilename": "tinyllama-1.1b-chat-v1.0-q5_k_m.gguf" + }, + { + "rfilename": "tinyllama-1.1b-chat-v1.0-q5_k_s.gguf" + }, + { + "rfilename": "tinyllama-1.1b-chat-v1.0-q6_k.gguf" + }, + { + "rfilename": "tinyllama-1.1b-chat-v1.0-q8_0.gguf" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2401.02385", + "license:apache-2.0", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 8451229056, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "tinyllama:1b", + "size": 782045248 + } + ] + }, + { + "author": "meta-llama", + "id": "cortexso/llama3", + "metadata": { + "_id": "6667a6d52e5f1c08ec14469c", + "author": "cortexso", + "cardData": { + "license": "llama3", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-06-11T01:22:29.000Z", + "description": "---\nlicense: llama3\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nMeta developed and released the [Meta Llama 3](https://huggingface.co/meta-llama/Meta-Llama-3-8B) family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Llama3-8b](https://huggingface.co/cortexso/llama3/tree/8b) | `cortex run llama3:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/llama3\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run llama3\n ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://llama.meta.com/llama3/license/)\n- **Papers:** [Llama-3 Blog](https://llama.meta.com/llama3/)", + "disabled": false, + "downloads": 646, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "context_length": 131072, + "eos_token": "<|eot_id|>", + "total": 8030261312 + }, + "id": "cortexso/llama3", + "lastModified": "2025-03-03T06:19:24.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/llama3", + "pipeline_tag": "text-generation", + "private": false, + "sha": "fcf18c0b14bb2dc64c7f78da40ca88a8ff759fd5", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "llama-3.1-8b-instruct-q2_k.gguf" + }, + { + "rfilename": "llama-3.1-8b-instruct-q3_k_l.gguf" + }, + { + "rfilename": "llama-3.1-8b-instruct-q3_k_m.gguf" + }, + { + "rfilename": "llama-3.1-8b-instruct-q3_k_s.gguf" + }, + { + "rfilename": "llama-3.1-8b-instruct-q4_k_m.gguf" + }, + { + "rfilename": "llama-3.1-8b-instruct-q4_k_s.gguf" + }, + { + "rfilename": "llama-3.1-8b-instruct-q5_k_m.gguf" + }, + { + "rfilename": "llama-3.1-8b-instruct-q5_k_s.gguf" + }, + { + "rfilename": "llama-3.1-8b-instruct-q6_k.gguf" + }, + { + "rfilename": "llama-3.1-8b-instruct-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:llama3", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 70949951936, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "llama3:8b", + "size": 4920739072 } ] }, @@ -1155,7 +6028,7 @@ "createdAt": "2024-07-29T10:25:05.000Z", "description": "---\nlicense: llama3.1\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nMeta developed and released the [Meta Llama 3.1](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Llama3.1-8b](https://huggingface.co/cortexso/llama3.1/tree/8b) | `cortex run llama3.1:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/llama3.1\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run llama3.1\n ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B/blob/main/LICENSE)\n- **Papers:** [Llama-3.1 Blog](https://ai.meta.com/blog/meta-llama-3-1/)", "disabled": false, - "downloads": 275, + "downloads": 1048, "gated": false, "gguf": { "architecture": "llama", @@ -1165,13 +6038,13 @@ "total": 8030261312 }, "id": "cortexso/llama3.1", - "lastModified": "2025-02-25T07:41:12.000Z", + "lastModified": "2025-03-02T14:27:57.000Z", "likes": 0, "model-index": null, "modelId": "cortexso/llama3.1", "pipeline_tag": "text-generation", "private": false, - "sha": "f83805762b13bfe9aaa071c065edb74c48281367", + "sha": "256c4f2118a75d93a1dc368ac4ccf1fea16751c2", "siblings": [ { "rfilename": ".gitattributes" @@ -1225,7 +6098,7 @@ "endpoints_compatible", "region:us" ], - "usedStorage": 227069905920, + "usedStorage": 66029173888, "widgetData": [ { "text": "My name is Julien and I like to" @@ -1245,10 +6118,334 @@ { "id": "llama3.1:8b", "size": 4920734176 + } + ] + }, + { + "author": "AIDC-AI", + "id": "cortexso/marco-o1", + "metadata": { + "_id": "6743b6140d46fa30e6ff2879", + "author": "cortexso", + "cardData": { + "license": "other", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] }, + "createdAt": "2024-11-24T23:26:12.000Z", + "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\nMarco-o1 not only focuses on disciplines with standard answers, such as mathematics, physics, and coding—which are well-suited for reinforcement learning (RL)—but also places greater emphasis on open-ended resolutions. We aim to address the question: \"Can the o1 model effectively generalize to broader domains where clear standards are absent and rewards are challenging to quantify?\"\n\nCurrently, Marco-o1 Large Language Model (LLM) is powered by Chain-of-Thought (CoT) fine-tuning, Monte Carlo Tree Search (MCTS), reflection mechanisms, and innovative reasoning strategies—optimized for complex real-world problem-solving tasks.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Marco-o1-8b](https://huggingface.co/cortexso/marco-o1/tree/8b) | `cortex run marco-o1:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/marco-o1\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run marco-o1\n ```\n \n## Credits\n\n- **Author:** AIDC-AI\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/AIDC-AI/Marco-o1/blob/main/LICENSE)\n- **Papers:** [Paper](https://arxiv.org/abs/2411.14405)", + "disabled": false, + "downloads": 122, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手,你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n \n## 重要!!!!!\n当你回答问题时,你的思考应该在内完成,内输出你的结果。\n应该尽可能是英文,但是有2个特例,一个是对原文中的引用,另一个是是数学应该使用markdown格式,内的输出需要遵循用户输入的语言。\n <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "context_length": 32768, + "eos_token": "<|im_end|>", + "total": 7615616512 + }, + "id": "cortexso/marco-o1", + "lastModified": "2025-03-03T02:27:27.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/marco-o1", + "pipeline_tag": "text-generation", + "private": false, + "sha": "0c8e0cdbfb898e000cad200b2694c5c6e6710fc6", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "marco-o1-q2_k.gguf" + }, + { + "rfilename": "marco-o1-q3_k_l.gguf" + }, + { + "rfilename": "marco-o1-q3_k_m.gguf" + }, + { + "rfilename": "marco-o1-q3_k_s.gguf" + }, + { + "rfilename": "marco-o1-q4_k_m.gguf" + }, + { + "rfilename": "marco-o1-q4_k_s.gguf" + }, + { + "rfilename": "marco-o1-q5_k_m.gguf" + }, + { + "rfilename": "marco-o1-q5_k_s.gguf" + }, + { + "rfilename": "marco-o1-q6_k.gguf" + }, + { + "rfilename": "marco-o1-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "arxiv:2411.14405", + "license:other", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 53341785824, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ { - "id": "llama3.1:main", - "size": 8540770784 + "id": "marco-o1:8b", + "size": 4683071648 + } + ] + }, + { + "author": "DeepSeek-AI", + "id": "cortexso/deepseek-r1-distill-qwen-1.5b", + "metadata": { + "_id": "678e84d99d66241aabee008a", + "author": "cortexso", + "cardData": { + "license": "mit", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2025-01-20T17:16:09.000Z", + "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Qwen 1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) model, a distilled version of the Qwen 1.5B language model. It is fine-tuned for high-performance text generation and optimized for dialogue and information-seeking tasks. This model achieves a balance of efficiency and accuracy while maintaining a smaller footprint compared to the original Qwen 1.5B.\n\nThe model is designed for applications in customer support, conversational AI, and research, prioritizing both helpfulness and safety.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-qwen-1.5b-1.5b](https://huggingface.co/cortexso/deepseek-r1-distill-qwen-1.5b/tree/1.5b) | `cortex run deepseek-r1-distill-qwen-1.5b:1.5b` |\n\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexso/deepseek-r1-distill-qwen-1.5b\n ```\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run deepseek-r1-distill-qwen-1.5b\n ```\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)", + "disabled": false, + "downloads": 539, + "gated": false, + "gguf": { + "architecture": "qwen2", + "bos_token": "<|begin▁of▁sentence|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\\n'}}{% endif %}", + "context_length": 131072, + "eos_token": "<|end▁of▁sentence|>", + "total": 1777088000 + }, + "id": "cortexso/deepseek-r1-distill-qwen-1.5b", + "lastModified": "2025-03-03T05:24:13.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/deepseek-r1-distill-qwen-1.5b", + "pipeline_tag": "text-generation", + "private": false, + "sha": "14cbd3c8ac57a346c35f676fd5fe55befebd911e", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "deepseek-r1-distill-qwen-1.5b-q2_k.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_l.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-1.5b-q4_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-1.5b-q4_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-1.5b-q5_k_m.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-1.5b-q5_k_s.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-1.5b-q6_k.gguf" + }, + { + "rfilename": "deepseek-r1-distill-qwen-1.5b-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:mit", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 12728600096, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "deepseek-r1-distill-qwen-1.5b:1.5b", + "size": 1117320480 + } + ] + }, + { + "author": "PrimeIntellect", + "id": "cortexso/intellect-1", + "metadata": { + "_id": "674e48fc24f1ef616cd485de", + "author": "cortexso", + "cardData": { + "license": "other", + "pipeline_tag": "text-generation", + "tags": ["cortex.cpp"] + }, + "createdAt": "2024-12-02T23:55:40.000Z", + "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nIntellect-1 is a high-performance instruction-tuned model developed by Qwen, designed to handle a broad range of natural language processing tasks with efficiency and precision. Optimized for dialogue, reasoning, and knowledge-intensive applications, Intellect-1 excels in structured generation, summarization, and retrieval-augmented tasks. It is part of an open ecosystem, providing transparency in training data, model architecture, and fine-tuning methodologies.\n\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Intellect-1-10b](https://huggingface.co/cortexso/intellect-1/tree/10b) | `cortex run intellect-1:10b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n ```bash\n cortexhub/intellect-1\n ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n ```bash\n cortex run intellect-1\n ```\n \n## Credits\n\n- **Author:** Qwen\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Technical Paper](https://github.com/PrimeIntellect-ai/prime)", + "disabled": false, + "downloads": 182, + "gated": false, + "gguf": { + "architecture": "llama", + "bos_token": "<|begin_of_text|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "context_length": 8192, + "eos_token": "<|eot_id|>", + "total": 10211381248 + }, + "id": "cortexso/intellect-1", + "lastModified": "2025-03-03T02:32:47.000Z", + "likes": 0, + "model-index": null, + "modelId": "cortexso/intellect-1", + "pipeline_tag": "text-generation", + "private": false, + "sha": "f46fd8109130aab2969fd9229d390051f774a761", + "siblings": [ + { + "rfilename": ".gitattributes" + }, + { + "rfilename": "README.md" + }, + { + "rfilename": "intellect-1-instruct-q2_k.gguf" + }, + { + "rfilename": "intellect-1-instruct-q3_k_l.gguf" + }, + { + "rfilename": "intellect-1-instruct-q3_k_m.gguf" + }, + { + "rfilename": "intellect-1-instruct-q3_k_s.gguf" + }, + { + "rfilename": "intellect-1-instruct-q4_k_m.gguf" + }, + { + "rfilename": "intellect-1-instruct-q4_k_s.gguf" + }, + { + "rfilename": "intellect-1-instruct-q5_k_m.gguf" + }, + { + "rfilename": "intellect-1-instruct-q5_k_s.gguf" + }, + { + "rfilename": "intellect-1-instruct-q6_k.gguf" + }, + { + "rfilename": "intellect-1-instruct-q8_0.gguf" + }, + { + "rfilename": "metadata.yml" + }, + { + "rfilename": "model.yml" + } + ], + "spaces": [], + "tags": [ + "gguf", + "cortex.cpp", + "text-generation", + "license:other", + "endpoints_compatible", + "region:us", + "conversational" + ], + "usedStorage": 71113603904, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "intellect-1:10b", + "size": 6229006784 } ] } diff --git a/web-app/src/hooks/useAssistant.ts b/web-app/src/hooks/useAssistant.ts index 12feacdee..1c3181276 100644 --- a/web-app/src/hooks/useAssistant.ts +++ b/web-app/src/hooks/useAssistant.ts @@ -21,7 +21,7 @@ export const defaultAssistant: Assistant = { description: 'Jan is a helpful desktop assistant that can reason through complex tasks and use tools to complete them on the user’s behalf.', instructions: - 'Jan is a helpful desktop assistant that can reason through complex tasks and use tools to complete them on the user’s behalf. Respond naturally and concisely, take actions when needed, and guide the user toward their goals.', + 'You have access to a set of tools to help you answer the user’s question. You can use only one tool per message, and you’ll receive the result of that tool in the user’s next response. To complete a task, use tools step by step—each step should be guided by the outcome of the previous one.\nTool Usage Rules:\n1. Always provide the correct values as arguments when using tools. Do not pass variable names—use actual values instead.\n2. You may perform multiple tool steps to complete a task.\n3. Avoid repeating a tool call with exactly the same parameters to prevent infinite loops.', } export const useAssistant = create()((set, get) => ({ diff --git a/web-app/src/utils/models.ts b/web-app/src/utils/models.ts index d2d2cb9ac..c21b4a3a9 100644 --- a/web-app/src/utils/models.ts +++ b/web-app/src/utils/models.ts @@ -1,6 +1,6 @@ export const hardcodedModel = { author: 'Menlo', - id: 'https://huggingface.co/Menlo/Jan-nano', + id: 'Menlo/Jan-nano', metadata: { '_id': '68492cd9cada68b1d11ca1bd', 'author': 'Menlo', @@ -12,42 +12,39 @@ export const hardcodedModel = { 'description': '---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)', 'disabled': false, - 'downloads': 0, + 'downloads': 939, 'gated': false, 'gguf': { architecture: 'qwen3', bos_token: '<|endoftext|>', chat_template: - "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '' in content %}\n {%- set reasoning_content = content.split('')[0].rstrip('\\n').split('')[-1].lstrip('\\n') %}\n {%- set content = content.split('')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n\\n' + reasoning_content.strip('\\n') + '\\n\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n\\n\\n\\n\\n' }}\n{%- endif %}", + "{%- if tools %} {{- '<|im_start|>system\\n' }} {%- if messages[0].role == 'system' %} {{- messages[0].content + '\\n\\n' }} {%- endif %} {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }} {%- for tool in tools %} {{- \"\\n\" }} {{- tool | tojson }} {%- endfor %} {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }} {%- else %} {%- if messages[0].role == 'system' %} {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} {%- for message in messages[::-1] %} {%- set index = (messages|length - 1) - loop.index0 %} {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} {%- set ns.multi_step_tool = false %} {%- set ns.last_query_index = index %} {%- endif %} {%- endfor %} {%- for message in messages %} {%- if message.content is string %} {%- set content = message.content %} {%- else %} {%- set content = '' %} {%- endif %} {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %} {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }} {%- elif message.role == \"assistant\" %} {%- set reasoning_content = '' %} {%- if message.reasoning_content is string %} {%- set reasoning_content = message.reasoning_content %} {%- else %} {%- if '' in content %} {%- set reasoning_content = content.split('')[0].rstrip('\\n').split('')[-1].lstrip('\\n') %} {%- set content = content.split('')[-1].lstrip('\\n') %} {%- endif %} {%- endif %} {%- if loop.index0 > ns.last_query_index %} {%- if loop.last or (not loop.last and reasoning_content) %} {{- '<|im_start|>' + message.role + '\\n\\n' + reasoning_content.strip('\\n') + '\\n\\n\\n' + content.lstrip('\\n') }} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- if message.tool_calls %} {%- for tool_call in message.tool_calls %} {%- if (loop.first and content) or (not loop.first) %} {{- '\\n' }} {%- endif %} {%- if tool_call.function %} {%- set tool_call = tool_call.function %} {%- endif %} {{- '\\n{\"name\": \"' }} {{- tool_call.name }} {{- '\", \"arguments\": ' }} {%- if tool_call.arguments is string %} {{- tool_call.arguments }} {%- else %} {{- tool_call.arguments | tojson }} {%- endif %} {{- '}\\n' }} {%- endfor %} {%- endif %} {{- '<|im_end|>\\n' }} {%- elif message.role == \"tool\" %} {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %} {{- '<|im_start|>user' }} {%- endif %} {{- '\\n\\n' }} {{- content }} {{- '\\n' }} {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %} {{- '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {{- '<|im_start|>assistant\\n' }} {{- '\\n\\n\\n\\n' }} {%- endif %}", context_length: 40960, eos_token: '<|im_end|>', + quantize_imatrix_file: 'imatrix.dat', total: 4022468096, }, 'id': 'Menlo/Jan-nano', - 'lastModified': '2025-06-11T10:42:16.000Z', + 'lastModified': '2025-06-13T05:53:33.000Z', 'likes': 2, 'model-index': null, 'modelId': 'Menlo/Jan-nano', 'pipeline_tag': 'text-generation', 'private': false, - 'sha': 'f05b9e798d3cb66394a25d2a45cdc77fd1d5a3ba', + 'sha': '782985633ac4080dfdaa52e62d61dcf637e9ff0d', 'siblings': [ { rfilename: '.gitattributes', - size: 1681, - }, - { - rfilename: 'Jan-nano_q4_k_m.gguf', - size: 2497280288, - }, - { - rfilename: 'Jan-nano_q8_0.gguf', - size: 4280400640, + size: 1742, }, { rfilename: 'README.md', size: 776, }, + { + rfilename: 'jan-nano-0.4-iQ4_XS.gguf', + size: 2270750400, + }, ], 'spaces': [], 'tags': [ @@ -56,9 +53,10 @@ export const hardcodedModel = { 'license:apache-2.0', 'endpoints_compatible', 'region:us', + 'imatrix', 'conversational', ], - 'usedStorage': 11772241536, + 'usedStorage': 20820673088, 'widgetData': [ { text: 'Hi, what can you help me with?', @@ -76,12 +74,8 @@ export const hardcodedModel = { }, models: [ { - id: 'Menlo:Jan-nano:Jan-nano_q4_k_m.gguf', - size: 2497280288, - }, - { - id: 'Menlo:Jan-nano:Jan-nano_q8_0.gguf', - size: 4280400640, + id: 'Menlo:Jan-nano:jan-nano-0.4-iQ4_XS.gguf', + size: 2270750400, }, ], } From 1e17cc6ec7285f2679074719f0e99663cb712df7 Mon Sep 17 00:00:00 2001 From: Louis Date: Sat, 14 Jun 2025 16:32:15 +0700 Subject: [PATCH 04/48] =?UTF-8?q?=E2=9C=A8enhancement:=20model=20run=20imp?= =?UTF-8?q?rovement=20(#5268)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: mcp tool error handling * fix: error message * fix: trigger download from recommend model * fix: can't scroll hub * fix: show progress * ✨enhancement: prompt users to increase context size * ✨enhancement: rearrange action buttons for a better UX * 🔧chore: clean up logics --------- Co-authored-by: Faisal Amir --- .../inference-cortex-extension/src/index.ts | 7 +- src-tauri/src/core/mcp.rs | 7 +- web-app/src/containers/ChatInput.tsx | 6 +- web-app/src/containers/ThreadContent.tsx | 24 +++- .../containers/dialogs/OutOfContextDialog.tsx | 104 +++++++++++++++ web-app/src/hooks/useAppState.ts | 7 + web-app/src/hooks/useChat.ts | 94 +++++++++++++- web-app/src/lib/completion.ts | 18 ++- web-app/src/locales/en/common.json | 1 + web-app/src/routes/hub.tsx | 30 ++--- web-app/src/routes/threads/$threadId.tsx | 7 + web-app/src/utils/error.ts | 2 + web-app/src/utils/models.ts | 122 ++++++++++++++++-- 13 files changed, 387 insertions(+), 42 deletions(-) create mode 100644 web-app/src/containers/dialogs/OutOfContextDialog.tsx create mode 100644 web-app/src/utils/error.ts diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index a49b1a852..626d53696 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -62,6 +62,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { cache_type: string = 'q8' cpu_threads?: number auto_unload_models: boolean = true + reasoning_budget = -1 // Default reasoning budget in seconds /** * The URL for making inference requests. */ @@ -230,8 +231,6 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { const loadedModels = await this.activeModels() - console.log('Loaded models:', loadedModels) - // This is to avoid loading the same model multiple times if (loadedModels.some((e: { id: string }) => e.id === model.id)) { console.log(`Model ${model.id} already loaded`) @@ -269,6 +268,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { ...(this.cont_batching && this.n_parallel && this.n_parallel > 1 ? { cont_batching: this.cont_batching } : {}), + ...(model.id.toLowerCase().includes('jan-nano') + ? { reasoning_budget: 0 } + : { reasoning_budget: this.reasoning_budget }), + ...{ 'no-context-shift': true }, }, timeout: false, signal, diff --git a/src-tauri/src/core/mcp.rs b/src-tauri/src/core/mcp.rs index 642505486..f9509c8e5 100644 --- a/src-tauri/src/core/mcp.rs +++ b/src-tauri/src/core/mcp.rs @@ -377,7 +377,12 @@ pub async fn call_tool( }); return match timeout(MCP_TOOL_CALL_TIMEOUT, tool_call).await { - Ok(result) => result.map_err(|e| e.to_string()), + Ok(result) => { + match result { + Ok(ok_result) => Ok(ok_result), + Err(e) => Err(e.to_string()), + } + } Err(_) => Err(format!( "Tool call '{}' timed out after {} seconds", tool_name, diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx index af5da384c..a83adc59e 100644 --- a/web-app/src/containers/ChatInput.tsx +++ b/web-app/src/containers/ChatInput.tsx @@ -36,6 +36,7 @@ import { ModelLoader } from '@/containers/loaders/ModelLoader' import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable' import { getConnectedServers } from '@/services/mcp' import { stopAllModels } from '@/services/models' +import { useOutOfContextPromiseModal } from './dialogs/OutOfContextDialog' type ChatInputProps = { className?: string @@ -60,6 +61,8 @@ const ChatInput = ({ const { t } = useTranslation() const { spellCheckChatInput } = useGeneralSetting() const { tokenSpeed } = useAppState() + const { showModal, PromiseModal: OutOfContextModal } = + useOutOfContextPromiseModal() const maxRows = 10 const { selectedModel } = useModelProvider() @@ -110,7 +113,7 @@ const ChatInput = ({ return } setMessage('') - sendMessage(prompt) + sendMessage(prompt, showModal) } useEffect(() => { @@ -611,6 +614,7 @@ const ChatInput = ({
)} +
) } diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx index f067a4431..833846db1 100644 --- a/web-app/src/containers/ThreadContent.tsx +++ b/web-app/src/containers/ThreadContent.tsx @@ -1,6 +1,6 @@ import { ThreadMessage } from '@janhq/core' import { RenderMarkdown } from './RenderMarkdown' -import { Fragment, memo, useCallback, useMemo, useState } from 'react' +import React, { Fragment, memo, useCallback, useMemo, useState } from 'react' import { IconCopy, IconCopyCheck, @@ -79,6 +79,8 @@ export const ThreadContent = memo( showAssistant?: boolean // eslint-disable-next-line @typescript-eslint/no-explicit-any streamTools?: any + contextOverflowModal?: React.ReactNode | null + showContextOverflowModal?: () => Promise } ) => { const [message, setMessage] = useState(item.content?.[0]?.text?.value || '') @@ -129,7 +131,10 @@ export const ThreadContent = memo( } if (toSendMessage) { deleteMessage(toSendMessage.thread_id, toSendMessage.id ?? '') - sendMessage(toSendMessage.content?.[0]?.text?.value || '') + sendMessage( + toSendMessage.content?.[0]?.text?.value || '', + item.showContextOverflowModal + ) } }, [deleteMessage, getMessages, item, sendMessage]) @@ -162,15 +167,25 @@ export const ThreadContent = memo( const editMessage = useCallback( (messageId: string) => { const threadMessages = getMessages(item.thread_id) + const index = threadMessages.findIndex((msg) => msg.id === messageId) if (index === -1) return + // Delete all messages after the edited message for (let i = threadMessages.length - 1; i >= index; i--) { deleteMessage(threadMessages[i].thread_id, threadMessages[i].id) } - sendMessage(message) + + sendMessage(message, item.showContextOverflowModal) }, - [deleteMessage, getMessages, item.thread_id, message, sendMessage] + [ + deleteMessage, + getMessages, + item.thread_id, + message, + sendMessage, + item.showContextOverflowModal, + ] ) const isToolCalls = @@ -445,6 +460,7 @@ export const ThreadContent = memo( {image.detail &&

{image.detail}

} )} + {item.contextOverflowModal && item.contextOverflowModal}
) } diff --git a/web-app/src/containers/dialogs/OutOfContextDialog.tsx b/web-app/src/containers/dialogs/OutOfContextDialog.tsx new file mode 100644 index 000000000..fb01d7907 --- /dev/null +++ b/web-app/src/containers/dialogs/OutOfContextDialog.tsx @@ -0,0 +1,104 @@ +import { t } from 'i18next' +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from '@/components/ui/dialog' + +import { ReactNode, useCallback, useState } from 'react' +import { Button } from '@/components/ui/button' + +export function useOutOfContextPromiseModal() { + const [isOpen, setIsOpen] = useState(false) + const [modalProps, setModalProps] = useState<{ + resolveRef: ((value: unknown) => void) | null + }>({ + resolveRef: null, + }) + // Function to open the modal and return a Promise + const showModal = useCallback(() => { + return new Promise((resolve) => { + setModalProps({ + resolveRef: resolve, + }) + setIsOpen(true) + }) + }, []) + + const PromiseModal = useCallback((): ReactNode => { + if (!isOpen) { + return null + } + + const handleConfirm = () => { + setIsOpen(false) + if (modalProps.resolveRef) { + modalProps.resolveRef(true) + } + } + + const handleCancel = () => { + setIsOpen(false) + if (modalProps.resolveRef) { + modalProps.resolveRef(false) + } + } + + return ( + { + setIsOpen(open) + if (!open) handleCancel() + }} + > + + + + {t('outOfContextError.title', 'Out of context error')} + + + + {t( + 'outOfContextError.description', + 'This chat is reaching the AI’s memory limit, like a whiteboard filling up. We can expand the memory window (called context size) so it remembers more, but it may use more of your computer’s memory.' + )} +
+
+ {t( + 'outOfContextError.increaseContextSizeDescription', + 'Do you want to increase the context size?' + )} +
+ + + + +
+
+ ) + }, [isOpen, modalProps]) + return { showModal, PromiseModal } +} diff --git a/web-app/src/hooks/useAppState.ts b/web-app/src/hooks/useAppState.ts index ace57d8d2..dc29f7f8a 100644 --- a/web-app/src/hooks/useAppState.ts +++ b/web-app/src/hooks/useAppState.ts @@ -12,6 +12,7 @@ type AppState = { abortControllers: Record tokenSpeed?: TokenSpeed currentToolCall?: ChatCompletionMessageToolCall + showOutOfContextDialog?: boolean setServerStatus: (value: 'running' | 'stopped' | 'pending') => void updateStreamingContent: (content: ThreadMessage | undefined) => void updateCurrentToolCall: ( @@ -22,6 +23,7 @@ type AppState = { setAbortController: (threadId: string, controller: AbortController) => void updateTokenSpeed: (message: ThreadMessage) => void resetTokenSpeed: () => void + setOutOfContextDialog: (show: boolean) => void } export const useAppState = create()((set) => ({ @@ -99,4 +101,9 @@ export const useAppState = create()((set) => ({ set({ tokenSpeed: undefined, }), + setOutOfContextDialog: (show) => { + set(() => ({ + showOutOfContextDialog: show, + })) + }, })) diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts index 3073ececf..164555563 100644 --- a/web-app/src/hooks/useChat.ts +++ b/web-app/src/hooks/useChat.ts @@ -24,10 +24,11 @@ import { getTools } from '@/services/mcp' import { MCPTool } from '@/types/completion' import { listen } from '@tauri-apps/api/event' import { SystemEvent } from '@/types/events' -import { stopModel, startModel } from '@/services/models' +import { stopModel, startModel, stopAllModels } from '@/services/models' import { useToolApproval } from '@/hooks/useToolApproval' import { useToolAvailable } from '@/hooks/useToolAvailable' +import { OUT_OF_CONTEXT_SIZE } from '@/utils/error' export const useChat = () => { const { prompt, setPrompt } = usePrompt() @@ -41,6 +42,7 @@ export const useChat = () => { setAbortController, } = useAppState() const { currentAssistant } = useAssistant() + const { updateProvider } = useModelProvider() const { approvedTools, showApprovalModal, allowAllMCPPermissions } = useToolApproval() @@ -108,8 +110,60 @@ export const useChat = () => { currentAssistant, ]) + const increaseModelContextSize = useCallback( + (model: Model, provider: ProviderObject) => { + /** + * Should increase the context size of the model by 2x + * If the context size is not set or too low, it defaults to 8192. + */ + const ctxSize = Math.max( + model.settings?.ctx_len?.controller_props.value + ? typeof model.settings.ctx_len.controller_props.value === 'string' + ? parseInt(model.settings.ctx_len.controller_props.value as string) + : (model.settings.ctx_len.controller_props.value as number) + : 8192, + 8192 + ) + const updatedModel = { + ...model, + settings: { + ...model.settings, + ctx_len: { + ...(model.settings?.ctx_len != null ? model.settings?.ctx_len : {}), + controller_props: { + ...(model.settings?.ctx_len?.controller_props ?? {}), + value: ctxSize * 2, + }, + }, + }, + } + + // Find the model index in the provider's models array + const modelIndex = provider.models.findIndex((m) => m.id === model.id) + + if (modelIndex !== -1) { + // Create a copy of the provider's models array + const updatedModels = [...provider.models] + + // Update the specific model in the array + updatedModels[modelIndex] = updatedModel as Model + + // Update the provider with the new models array + updateProvider(provider.provider, { + models: updatedModels, + }) + } + stopAllModels() + }, + [updateProvider] + ) + const sendMessage = useCallback( - async (message: string) => { + async ( + message: string, + showModal?: () => Promise, + troubleshooting = true + ) => { const activeThread = await getCurrentThread() resetTokenSpeed() @@ -121,7 +175,9 @@ export const useChat = () => { const abortController = new AbortController() setAbortController(activeThread.id, abortController) updateStreamingContent(emptyThreadContent) - addMessage(newUserThreadContent(activeThread.id, message)) + // Do not add new message on retry + if (troubleshooting) + addMessage(newUserThreadContent(activeThread.id, message)) updateThreadTimestamp(activeThread.id) setPrompt('') try { @@ -180,6 +236,14 @@ export const useChat = () => { } } else { for await (const part of completion) { + // Error message + if (!part.choices) { + throw new Error( + 'message' in part + ? (part.message as string) + : (JSON.stringify(part) ?? '') + ) + } const delta = part.choices[0]?.delta?.content || '' if (part.choices[0]?.delta?.tool_calls) { @@ -252,9 +316,26 @@ export const useChat = () => { if (!followUpWithToolUse) availableTools = [] } } catch (error) { - toast.error( - `Error sending message: ${error && typeof error === 'object' && 'message' in error ? error.message : error}` - ) + const errorMessage = + error && typeof error === 'object' && 'message' in error + ? error.message + : error + if ( + typeof errorMessage === 'string' && + errorMessage.includes(OUT_OF_CONTEXT_SIZE) && + selectedModel && + troubleshooting + ) { + showModal?.().then((confirmed) => { + if (confirmed) { + increaseModelContextSize(selectedModel, activeProvider) + setTimeout(() => { + sendMessage(message, showModal, false) // Retry sending the message without troubleshooting + }, 1000) + } + }) + } + toast.error(`Error sending message: ${errorMessage}`) console.error('Error sending message:', error) } finally { updateLoadingModel(false) @@ -282,6 +363,7 @@ export const useChat = () => { allowAllMCPPermissions, showApprovalModal, updateTokenSpeed, + increaseModelContextSize, ] ) diff --git a/web-app/src/lib/completion.ts b/web-app/src/lib/completion.ts index 1e8a5ec55..24daec3cd 100644 --- a/web-app/src/lib/completion.ts +++ b/web-app/src/lib/completion.ts @@ -134,7 +134,8 @@ export const sendCompletion = async ( thread.model.id && !(thread.model.id in Object.values(models).flat()) && // eslint-disable-next-line @typescript-eslint/no-explicit-any - !tokenJS.extendedModelExist(providerName as any, thread.model?.id) + !tokenJS.extendedModelExist(providerName as any, thread.model?.id) && + provider.provider !== 'llama.cpp' ) { try { tokenJS.extendModelList( @@ -323,7 +324,7 @@ export const postMessageProcessing = async ( ? await showModal(toolCall.function.name, message.thread_id) : true) - const result = approved + let result = approved ? await callTool({ toolName: toolCall.function.name, arguments: toolCall.function.arguments.length @@ -335,7 +336,7 @@ export const postMessageProcessing = async ( content: [ { type: 'text', - text: `Error calling tool ${toolCall.function.name}: ${e.message}`, + text: `Error calling tool ${toolCall.function.name}: ${e.message ?? e}`, }, ], error: true, @@ -350,7 +351,16 @@ export const postMessageProcessing = async ( ], } - if ('error' in result && result.error) break + if (typeof result === 'string') { + result = { + content: [ + { + type: 'text', + text: result, + }, + ], + } + } message.metadata = { ...(message.metadata ?? {}), diff --git a/web-app/src/locales/en/common.json b/web-app/src/locales/en/common.json index 296c0162c..9abd447ad 100644 --- a/web-app/src/locales/en/common.json +++ b/web-app/src/locales/en/common.json @@ -23,6 +23,7 @@ "reset": "Reset", "search": "Search", "name": "Name", + "cancel": "Cancel", "placeholder": { "chatInput": "Ask me anything..." diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx index 8c299bd5f..f58723ac4 100644 --- a/web-app/src/routes/hub.tsx +++ b/web-app/src/routes/hub.tsx @@ -77,6 +77,7 @@ function Hub() { const addModelSourceTimeoutRef = useRef | null>( null ) + const downloadButtonRef = useRef(null) const { getProviderByName } = useModelProvider() const llamaProvider = getProviderByName('llama.cpp') @@ -233,18 +234,14 @@ function Hub() { isRecommended && 'hub-download-button-step' )} > -
- - - {Math.round(downloadProgress * 100)}% - -
+ {isDownloading && !isDownloaded && ( +
+ + + {Math.round(downloadProgress * 100)}% + +
+ )} {isDownloaded ? ( @@ -266,6 +264,7 @@ function Hub() { llamaProvider?.models, handleUseModel, isRecommendedModel, + downloadButtonRef, ]) const { step } = useSearch({ from: Route.id }) @@ -291,8 +290,9 @@ function Hub() { isRecommendedModel(model.metadata?.id) ) if (recommendedModel && recommendedModel.models[0]?.id) { - downloadModel(recommendedModel.models[0].id) - + if (downloadButtonRef.current) { + downloadButtonRef.current.click() + } return } } @@ -413,7 +413,7 @@ function Hub() { -
+
{loading ? (
diff --git a/web-app/src/routes/threads/$threadId.tsx b/web-app/src/routes/threads/$threadId.tsx index 1190dfc1c..2f18f6094 100644 --- a/web-app/src/routes/threads/$threadId.tsx +++ b/web-app/src/routes/threads/$threadId.tsx @@ -18,6 +18,7 @@ import { useAppState } from '@/hooks/useAppState' import DropdownAssistant from '@/containers/DropdownAssistant' import { useAssistant } from '@/hooks/useAssistant' import { useAppearance } from '@/hooks/useAppearance' +import { useOutOfContextPromiseModal } from '@/containers/dialogs/OutOfContextDialog' // as route.threadsDetail export const Route = createFileRoute('/threads/$threadId')({ @@ -47,6 +48,8 @@ function ThreadDetail() { const scrollContainerRef = useRef(null) const isFirstRender = useRef(true) const messagesCount = useMemo(() => messages?.length ?? 0, [messages]) + const { showModal, PromiseModal: OutOfContextModal } = + useOutOfContextPromiseModal() // Function to check scroll position and scrollbar presence const checkScrollState = () => { @@ -193,6 +196,8 @@ function ThreadDetail() { if (!messages || !threadModel) return null + const contextOverflowModalComponent = + return (
@@ -233,6 +238,8 @@ function ThreadDetail() { )) } index={index} + showContextOverflowModal={showModal} + contextOverflowModal={contextOverflowModalComponent} />
) diff --git a/web-app/src/utils/error.ts b/web-app/src/utils/error.ts new file mode 100644 index 000000000..1019d2853 --- /dev/null +++ b/web-app/src/utils/error.ts @@ -0,0 +1,2 @@ +export const OUT_OF_CONTEXT_SIZE = + 'the request exceeds the available context size.' diff --git a/web-app/src/utils/models.ts b/web-app/src/utils/models.ts index c21b4a3a9..7f8756f67 100644 --- a/web-app/src/utils/models.ts +++ b/web-app/src/utils/models.ts @@ -1,6 +1,6 @@ export const hardcodedModel = { - author: 'Menlo', - id: 'Menlo/Jan-nano', + author: 'menlo', + id: 'menlo/jan-nano', metadata: { '_id': '68492cd9cada68b1d11ca1bd', 'author': 'Menlo', @@ -12,7 +12,7 @@ export const hardcodedModel = { 'description': '---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)', 'disabled': false, - 'downloads': 939, + 'downloads': 1434, 'gated': false, 'gguf': { architecture: 'qwen3', @@ -25,17 +25,17 @@ export const hardcodedModel = { total: 4022468096, }, 'id': 'Menlo/Jan-nano', - 'lastModified': '2025-06-13T05:53:33.000Z', - 'likes': 2, + 'lastModified': '2025-06-13T16:57:55.000Z', + 'likes': 3, 'model-index': null, 'modelId': 'Menlo/Jan-nano', 'pipeline_tag': 'text-generation', 'private': false, - 'sha': '782985633ac4080dfdaa52e62d61dcf637e9ff0d', + 'sha': 'a04aab0878648d8f284c63a52664a482ead16f06', 'siblings': [ { rfilename: '.gitattributes', - size: 1742, + size: 3460, }, { rfilename: 'README.md', @@ -45,6 +45,58 @@ export const hardcodedModel = { rfilename: 'jan-nano-0.4-iQ4_XS.gguf', size: 2270750400, }, + { + rfilename: 'jan-nano-4b-Q3_K_L.gguf', + size: 2239784384, + }, + { + rfilename: 'jan-nano-4b-Q3_K_M.gguf', + size: 2075616704, + }, + { + rfilename: 'jan-nano-4b-Q3_K_S.gguf', + size: 1886995904, + }, + { + rfilename: 'jan-nano-4b-Q4_0.gguf', + size: 2369545664, + }, + { + rfilename: 'jan-nano-4b-Q4_1.gguf', + size: 2596627904, + }, + { + rfilename: 'jan-nano-4b-Q4_K_M.gguf', + size: 2497279424, + }, + { + rfilename: 'jan-nano-4b-Q4_K_S.gguf', + size: 2383308224, + }, + { + rfilename: 'jan-nano-4b-Q5_0.gguf', + size: 2823710144, + }, + { + rfilename: 'jan-nano-4b-Q5_1.gguf', + size: 3050792384, + }, + { + rfilename: 'jan-nano-4b-Q5_K_M.gguf', + size: 2889512384, + }, + { + rfilename: 'jan-nano-4b-Q5_K_S.gguf', + size: 2823710144, + }, + { + rfilename: 'jan-nano-4b-Q6_K.gguf', + size: 3306259904, + }, + { + rfilename: 'jan-nano-4b-Q8_0.gguf', + size: 4280403904, + }, ], 'spaces': [], 'tags': [ @@ -56,7 +108,7 @@ export const hardcodedModel = { 'imatrix', 'conversational', ], - 'usedStorage': 20820673088, + 'usedStorage': 93538518464, 'widgetData': [ { text: 'Hi, what can you help me with?', @@ -74,8 +126,60 @@ export const hardcodedModel = { }, models: [ { - id: 'Menlo:Jan-nano:jan-nano-0.4-iQ4_XS.gguf', + id: 'menlo:jan-nano:jan-nano-0.4-iQ4_XS.gguf', size: 2270750400, }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q3_K_L.gguf', + size: 2239784384, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q3_K_M.gguf', + size: 2075616704, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q3_K_S.gguf', + size: 1886995904, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q4_0.gguf', + size: 2369545664, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q4_1.gguf', + size: 2596627904, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q4_K_M.gguf', + size: 2497279424, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q4_K_S.gguf', + size: 2383308224, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q5_0.gguf', + size: 2823710144, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q5_1.gguf', + size: 3050792384, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q5_K_M.gguf', + size: 2889512384, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q5_K_S.gguf', + size: 2823710144, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q6_K.gguf', + size: 3306259904, + }, + { + id: 'menlo:jan-nano:jan-nano-4b-Q8_0.gguf', + size: 4280403904, + }, ], } From b5bdf3cfd2c162d72516a1cfc8a146532e555fe4 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Sat, 14 Jun 2025 17:38:14 +0700 Subject: [PATCH 05/48] fix: glitch download from onboarding (#5269) --- web-app/src/routes/hub.tsx | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx index f58723ac4..22a747d70 100644 --- a/web-app/src/routes/hub.tsx +++ b/web-app/src/routes/hub.tsx @@ -78,6 +78,7 @@ function Hub() { null ) const downloadButtonRef = useRef(null) + const hasTriggeredDownload = useRef(false) const { getProviderByName } = useModelProvider() const llamaProvider = getProviderByName('llama.cpp') @@ -285,12 +286,18 @@ function Hub() { const handleJoyrideCallback = (data: CallBackProps) => { const { status, index } = data - if (status === STATUS.FINISHED && !isDownloading && isLastStep) { + if ( + status === STATUS.FINISHED && + !isDownloading && + isLastStep && + !hasTriggeredDownload.current + ) { const recommendedModel = filteredModels.find((model) => isRecommendedModel(model.metadata?.id) ) if (recommendedModel && recommendedModel.models[0]?.id) { if (downloadButtonRef.current) { + hasTriggeredDownload.current = true downloadButtonRef.current.click() } return From a10dbef2c81e555748739c9363f9d9f2707a167a Mon Sep 17 00:00:00 2001 From: Louis Date: Sat, 14 Jun 2025 22:00:43 +0700 Subject: [PATCH 06/48] =?UTF-8?q?=E2=9C=A8enhancement:=20Model=20sources?= =?UTF-8?q?=20should=20not=20be=20hard=20coded=20from=20frontend=20(#5270)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../model-extension/resources/default.json | 183 ++++++++++++++++++ extensions/model-extension/src/index.ts | 12 +- web-app/src/services/models.ts | 9 +- 3 files changed, 198 insertions(+), 6 deletions(-) diff --git a/extensions/model-extension/resources/default.json b/extensions/model-extension/resources/default.json index dbd9d906e..8884af70f 100644 --- a/extensions/model-extension/resources/default.json +++ b/extensions/model-extension/resources/default.json @@ -1,4 +1,187 @@ [ + { + "author": "Menlo", + "id": "Menlo/Jan-nano", + "metadata": { + "_id": "68492cd9cada68b1d11ca1bd", + "author": "Menlo", + "cardData": { + "license": "apache-2.0", + "pipeline_tag": "text-generation" + }, + "createdAt": "2025-06-11T07:14:33.000Z", + "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)", + "disabled": false, + "downloads": 1434, + "gated": false, + "gguf": { + "architecture": "qwen3", + "bos_token": "<|endoftext|>", + "chat_template": "{%- if tools %} {{- '<|im_start|>system\\n' }} {%- if messages[0].role == 'system' %} {{- messages[0].content + '\\n\\n' }} {%- endif %} {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }} {%- for tool in tools %} {{- \"\\n\" }} {{- tool | tojson }} {%- endfor %} {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }} {%- else %} {%- if messages[0].role == 'system' %} {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} {%- for message in messages[::-1] %} {%- set index = (messages|length - 1) - loop.index0 %} {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} {%- set ns.multi_step_tool = false %} {%- set ns.last_query_index = index %} {%- endif %} {%- endfor %} {%- for message in messages %} {%- if message.content is string %} {%- set content = message.content %} {%- else %} {%- set content = '' %} {%- endif %} {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %} {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }} {%- elif message.role == \"assistant\" %} {%- set reasoning_content = '' %} {%- if message.reasoning_content is string %} {%- set reasoning_content = message.reasoning_content %} {%- else %} {%- if '' in content %} {%- set reasoning_content = content.split('')[0].rstrip('\\n').split('')[-1].lstrip('\\n') %} {%- set content = content.split('')[-1].lstrip('\\n') %} {%- endif %} {%- endif %} {%- if loop.index0 > ns.last_query_index %} {%- if loop.last or (not loop.last and reasoning_content) %} {{- '<|im_start|>' + message.role + '\\n\\n' + reasoning_content.strip('\\n') + '\\n\\n\\n' + content.lstrip('\\n') }} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- if message.tool_calls %} {%- for tool_call in message.tool_calls %} {%- if (loop.first and content) or (not loop.first) %} {{- '\\n' }} {%- endif %} {%- if tool_call.function %} {%- set tool_call = tool_call.function %} {%- endif %} {{- '\\n{\"name\": \"' }} {{- tool_call.name }} {{- '\", \"arguments\": ' }} {%- if tool_call.arguments is string %} {{- tool_call.arguments }} {%- else %} {{- tool_call.arguments | tojson }} {%- endif %} {{- '}\\n' }} {%- endfor %} {%- endif %} {{- '<|im_end|>\\n' }} {%- elif message.role == \"tool\" %} {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %} {{- '<|im_start|>user' }} {%- endif %} {{- '\\n\\n' }} {{- content }} {{- '\\n' }} {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %} {{- '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {{- '<|im_start|>assistant\\n' }} {{- '\\n\\n\\n\\n' }} {%- endif %}", + "context_length": 40960, + "eos_token": "<|im_end|>", + "quantize_imatrix_file": "imatrix.dat", + "total": 4022468096 + }, + "id": "Menlo/Jan-nano", + "lastModified": "2025-06-13T16:57:55.000Z", + "likes": 3, + "model-index": null, + "modelId": "Menlo/Jan-nano", + "pipeline_tag": "text-generation", + "private": false, + "sha": "a04aab0878648d8f284c63a52664a482ead16f06", + "siblings": [ + { + "rfilename": ".gitattributes", + "size": 3460 + }, + { + "rfilename": "README.md", + "size": 776 + }, + { + "rfilename": "jan-nano-4b-iQ4_XS.gguf", + "size": 2270750400 + }, + { + "rfilename": "jan-nano-4b-Q3_K_L.gguf", + "size": 2239784384 + }, + { + "rfilename": "jan-nano-4b-Q3_K_M.gguf", + "size": 2075616704 + }, + { + "rfilename": "jan-nano-4b-Q3_K_S.gguf", + "size": 1886995904 + }, + { + "rfilename": "jan-nano-4b-Q4_0.gguf", + "size": 2369545664 + }, + { + "rfilename": "jan-nano-4b-Q4_1.gguf", + "size": 2596627904 + }, + { + "rfilename": "jan-nano-4b-Q4_K_M.gguf", + "size": 2497279424 + }, + { + "rfilename": "jan-nano-4b-Q4_K_S.gguf", + "size": 2383308224 + }, + { + "rfilename": "jan-nano-4b-Q5_0.gguf", + "size": 2823710144 + }, + { + "rfilename": "jan-nano-4b-Q5_1.gguf", + "size": 3050792384 + }, + { + "rfilename": "jan-nano-4b-Q5_K_M.gguf", + "size": 2889512384 + }, + { + "rfilename": "jan-nano-4b-Q5_K_S.gguf", + "size": 2823710144 + }, + { + "rfilename": "jan-nano-4b-Q6_K.gguf", + "size": 3306259904 + }, + { + "rfilename": "jan-nano-4b-Q8_0.gguf", + "size": 4280403904 + } + ], + "spaces": [], + "tags": [ + "gguf", + "text-generation", + "license:apache-2.0", + "endpoints_compatible", + "region:us", + "imatrix", + "conversational" + ], + "usedStorage": 93538518464, + "widgetData": [ + { + "text": "Hi, what can you help me with?" + }, + { + "text": "What is 84 * 3 / 2?" + }, + { + "text": "Tell me an interesting fact about the universe!" + }, + { + "text": "Explain quantum computing in simple terms." + } + ] + }, + "models": [ + { + "id": "Menlo:Jan-nano:jan-nano-4b-iQ4_XS.gguf", + "size": 2270750400 + }, + { + "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_L.gguf", + "size": 2239784384 + }, + { + "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_M.gguf", + "size": 2075616704 + }, + { + "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_S.gguf", + "size": 1886995904 + }, + { + "id": "Menlo:Jan-nano:jan-nano-4b-Q4_0.gguf", + "size": 2369545664 + }, + { + "id": "Menlo:Jan-nano:jan-nano-4b-Q4_1.gguf", + "size": 2596627904 + }, + { + "id": "Menlo:Jan-nano:jan-nano-4b-Q4_K_M.gguf", + "size": 2497279424 + }, + { + "id": "Menlo:Jan-nano:jan-nano-4b-Q4_K_S.gguf", + "size": 2383308224 + }, + { + "id": "Menlo:Jan-nano:jan-nano-4b-Q5_0.gguf", + "size": 2823710144 + }, + { + "id": "Menlo:Jan-nano:jan-nano-4b-Q5_1.gguf", + "size": 3050792384 + }, + { + "id": "Menlo:Jan-nano:jan-nano-4b-Q5_K_M.gguf", + "size": 2889512384 + }, + { + "id": "Menlo:Jan-nano:jan-nano-4b-Q5_K_S.gguf", + "size": 2823710144 + }, + { + "id": "Menlo:Jan-nano:jan-nano-4b-Q6_K.gguf", + "size": 3306259904 + }, + { + "id": "Menlo:Jan-nano:jan-nano-4b-Q8_0.gguf", + "size": 4280403904 + } + ] + }, { "author": "PrimeIntellect", "id": "cortexso/intellect-2", diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts index 09d1252ce..9fa6132c5 100644 --- a/extensions/model-extension/src/index.ts +++ b/extensions/model-extension/src/index.ts @@ -25,6 +25,11 @@ type Data = { data: T[] } +/** + * Defaul mode sources + */ +const defaultModelSources = ['Menlo/Jan-nano'] + /** * A extension for models */ @@ -396,6 +401,11 @@ export default class JanModelExtension extends ModelExtension { fetchModelsHub = async () => { const models = await this.fetchModels() + defaultModelSources.forEach((model) => { + this.addSource(model).catch((e) => { + console.debug(`Failed to add default model source ${model}:`, e) + }) + }) return this.apiInstance() .then((api) => api @@ -403,7 +413,7 @@ export default class JanModelExtension extends ModelExtension { .json>() .then(async (e) => { await Promise.all( - e.data?.map((model) => { + [...(e.data ?? []), ...defaultModelSources].map((model) => { if ( !models.some( (e) => 'modelSource' in e && e.modelSource === model diff --git a/web-app/src/services/models.ts b/web-app/src/services/models.ts index 071f032b8..77fbde6de 100644 --- a/web-app/src/services/models.ts +++ b/web-app/src/services/models.ts @@ -1,7 +1,6 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { ExtensionManager } from '@/lib/extension' import { normalizeProvider } from '@/lib/models' -import { hardcodedModel } from '@/utils/models' import { EngineManager, ExtensionTypeEnum, ModelExtension } from '@janhq/core' import { Model as CoreModel } from '@janhq/core' @@ -24,7 +23,7 @@ export const fetchModelSources = async (): Promise => { ExtensionTypeEnum.Model ) - if (!extension) return [hardcodedModel] + if (!extension) return [] try { const sources = await extension.getSources() @@ -34,10 +33,10 @@ export const fetchModelSources = async (): Promise => { })) // Prepend the hardcoded model to the sources - return [hardcodedModel, ...mappedSources] + return [...mappedSources] } catch (error) { console.error('Failed to fetch model sources:', error) - return [hardcodedModel] + return [] } } @@ -51,7 +50,7 @@ export const fetchModelHub = async (): Promise => { ?.fetchModelsHub() // Prepend the hardcoded model to the hub data - return hubData ? [hardcodedModel, ...hubData] : [hardcodedModel] + return hubData ? [...hubData] : [] } /** From 2634659366d43cec3527d0cca16c7d5a8feb67f4 Mon Sep 17 00:00:00 2001 From: Louis Date: Sun, 15 Jun 2025 02:34:14 +0700 Subject: [PATCH 07/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20default=20onboarding?= =?UTF-8?q?=20model=20should=20use=20recommended=20quantizations=20(#5273)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🐛fix: default onboarding model should use recommended quantizations * ✨enhancement: show context shift option in provider settings * 🔧chore: wording --- extensions/download-extension/package.json | 2 +- .../resources/default_settings.json | 9 + .../inference-cortex-extension/src/index.ts | 12 +- web-app/src/routes/hub.tsx | 16 +- web-app/src/utils/models.ts | 185 ------------------ 5 files changed, 35 insertions(+), 189 deletions(-) delete mode 100644 web-app/src/utils/models.ts diff --git a/extensions/download-extension/package.json b/extensions/download-extension/package.json index 750934594..58fe42289 100644 --- a/extensions/download-extension/package.json +++ b/extensions/download-extension/package.json @@ -2,7 +2,7 @@ "name": "@janhq/download-extension", "productName": "Download Manager", "version": "1.0.0", - "description": "Handle downloads", + "description": "Download and manage files and AI models in Jan.", "main": "dist/index.js", "author": "Jan ", "license": "AGPL-3.0", diff --git a/extensions/inference-cortex-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json index 451596842..d27624639 100644 --- a/extensions/inference-cortex-extension/resources/default_settings.json +++ b/extensions/inference-cortex-extension/resources/default_settings.json @@ -8,6 +8,15 @@ "value": true } }, + { + "key": "context_shift", + "title": "Context Shift", + "description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.", + "controllerType": "checkbox", + "controllerProps": { + "value": true + } + }, { "key": "cont_batching", "title": "Continuous Batching", diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index 626d53696..3e8b60ebe 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -37,6 +37,7 @@ enum Settings { cpu_threads = 'cpu_threads', huggingfaceToken = 'hugging-face-access-token', auto_unload_models = 'auto_unload_models', + context_shift = 'context_shift', } type LoadedModelResponse = { data: { engine: string; id: string }[] } @@ -63,6 +64,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { cpu_threads?: number auto_unload_models: boolean = true reasoning_budget = -1 // Default reasoning budget in seconds + context_shift = true /** * The URL for making inference requests. */ @@ -128,6 +130,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { true ) this.flash_attn = await this.getSetting(Settings.flash_attn, true) + this.context_shift = await this.getSetting( + Settings.context_shift, + true + ) this.use_mmap = await this.getSetting(Settings.use_mmap, true) if (this.caching_enabled) this.cache_type = await this.getSetting(Settings.cache_type, 'q8') @@ -209,6 +215,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { this.updateCortexConfig({ huggingface_token: value }) } else if (key === Settings.auto_unload_models) { this.auto_unload_models = value as boolean + } else if (key === Settings.context_shift && typeof value === 'boolean') { + this.context_shift = value } } @@ -271,7 +279,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { ...(model.id.toLowerCase().includes('jan-nano') ? { reasoning_budget: 0 } : { reasoning_budget: this.reasoning_budget }), - ...{ 'no-context-shift': true }, + ...(this.context_shift === false + ? { 'no-context-shift': true } + : {}), }, timeout: false, signal, diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx index 22a747d70..ed7fb669b 100644 --- a/web-app/src/routes/hub.tsx +++ b/web-app/src/routes/hub.tsx @@ -49,6 +49,7 @@ type ModelProps = { type SearchParams = { repo: string } +const defaultModelQuantizations = ['iq4_xs.gguf', 'q4_k_m.gguf'] export const Route = createFileRoute(route.hub as any)({ component: Hub, @@ -219,7 +220,10 @@ function Hub() { const DownloadButtonPlaceholder = useMemo(() => { return ({ model }: ModelProps) => { - const modelId = model.models[0]?.id + const modelId = + model.models.find((e) => + defaultModelQuantizations.some((m) => e.id.toLowerCase().includes(m)) + )?.id ?? model.models[0]?.id const isDownloading = downloadProcesses.some((e) => e.id === modelId) const downloadProgress = downloadProcesses.find((e) => e.id === modelId)?.progress || 0 @@ -460,7 +464,15 @@ function Hub() {
- {toGigabytes(model.models?.[0]?.size)} + {toGigabytes( + ( + model.models.find((m) => + defaultModelQuantizations.some((e) => + m.id.toLowerCase().includes(e) + ) + ) ?? model.models?.[0] + )?.size + )}
diff --git a/web-app/src/utils/models.ts b/web-app/src/utils/models.ts deleted file mode 100644 index 7f8756f67..000000000 --- a/web-app/src/utils/models.ts +++ /dev/null @@ -1,185 +0,0 @@ -export const hardcodedModel = { - author: 'menlo', - id: 'menlo/jan-nano', - metadata: { - '_id': '68492cd9cada68b1d11ca1bd', - 'author': 'Menlo', - 'cardData': { - license: 'apache-2.0', - pipeline_tag: 'text-generation', - }, - 'createdAt': '2025-06-11T07:14:33.000Z', - 'description': - '---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)', - 'disabled': false, - 'downloads': 1434, - 'gated': false, - 'gguf': { - architecture: 'qwen3', - bos_token: '<|endoftext|>', - chat_template: - "{%- if tools %} {{- '<|im_start|>system\\n' }} {%- if messages[0].role == 'system' %} {{- messages[0].content + '\\n\\n' }} {%- endif %} {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }} {%- for tool in tools %} {{- \"\\n\" }} {{- tool | tojson }} {%- endfor %} {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }} {%- else %} {%- if messages[0].role == 'system' %} {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} {%- for message in messages[::-1] %} {%- set index = (messages|length - 1) - loop.index0 %} {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} {%- set ns.multi_step_tool = false %} {%- set ns.last_query_index = index %} {%- endif %} {%- endfor %} {%- for message in messages %} {%- if message.content is string %} {%- set content = message.content %} {%- else %} {%- set content = '' %} {%- endif %} {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %} {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }} {%- elif message.role == \"assistant\" %} {%- set reasoning_content = '' %} {%- if message.reasoning_content is string %} {%- set reasoning_content = message.reasoning_content %} {%- else %} {%- if '' in content %} {%- set reasoning_content = content.split('')[0].rstrip('\\n').split('')[-1].lstrip('\\n') %} {%- set content = content.split('')[-1].lstrip('\\n') %} {%- endif %} {%- endif %} {%- if loop.index0 > ns.last_query_index %} {%- if loop.last or (not loop.last and reasoning_content) %} {{- '<|im_start|>' + message.role + '\\n\\n' + reasoning_content.strip('\\n') + '\\n\\n\\n' + content.lstrip('\\n') }} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- if message.tool_calls %} {%- for tool_call in message.tool_calls %} {%- if (loop.first and content) or (not loop.first) %} {{- '\\n' }} {%- endif %} {%- if tool_call.function %} {%- set tool_call = tool_call.function %} {%- endif %} {{- '\\n{\"name\": \"' }} {{- tool_call.name }} {{- '\", \"arguments\": ' }} {%- if tool_call.arguments is string %} {{- tool_call.arguments }} {%- else %} {{- tool_call.arguments | tojson }} {%- endif %} {{- '}\\n' }} {%- endfor %} {%- endif %} {{- '<|im_end|>\\n' }} {%- elif message.role == \"tool\" %} {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %} {{- '<|im_start|>user' }} {%- endif %} {{- '\\n\\n' }} {{- content }} {{- '\\n' }} {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %} {{- '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {{- '<|im_start|>assistant\\n' }} {{- '\\n\\n\\n\\n' }} {%- endif %}", - context_length: 40960, - eos_token: '<|im_end|>', - quantize_imatrix_file: 'imatrix.dat', - total: 4022468096, - }, - 'id': 'Menlo/Jan-nano', - 'lastModified': '2025-06-13T16:57:55.000Z', - 'likes': 3, - 'model-index': null, - 'modelId': 'Menlo/Jan-nano', - 'pipeline_tag': 'text-generation', - 'private': false, - 'sha': 'a04aab0878648d8f284c63a52664a482ead16f06', - 'siblings': [ - { - rfilename: '.gitattributes', - size: 3460, - }, - { - rfilename: 'README.md', - size: 776, - }, - { - rfilename: 'jan-nano-0.4-iQ4_XS.gguf', - size: 2270750400, - }, - { - rfilename: 'jan-nano-4b-Q3_K_L.gguf', - size: 2239784384, - }, - { - rfilename: 'jan-nano-4b-Q3_K_M.gguf', - size: 2075616704, - }, - { - rfilename: 'jan-nano-4b-Q3_K_S.gguf', - size: 1886995904, - }, - { - rfilename: 'jan-nano-4b-Q4_0.gguf', - size: 2369545664, - }, - { - rfilename: 'jan-nano-4b-Q4_1.gguf', - size: 2596627904, - }, - { - rfilename: 'jan-nano-4b-Q4_K_M.gguf', - size: 2497279424, - }, - { - rfilename: 'jan-nano-4b-Q4_K_S.gguf', - size: 2383308224, - }, - { - rfilename: 'jan-nano-4b-Q5_0.gguf', - size: 2823710144, - }, - { - rfilename: 'jan-nano-4b-Q5_1.gguf', - size: 3050792384, - }, - { - rfilename: 'jan-nano-4b-Q5_K_M.gguf', - size: 2889512384, - }, - { - rfilename: 'jan-nano-4b-Q5_K_S.gguf', - size: 2823710144, - }, - { - rfilename: 'jan-nano-4b-Q6_K.gguf', - size: 3306259904, - }, - { - rfilename: 'jan-nano-4b-Q8_0.gguf', - size: 4280403904, - }, - ], - 'spaces': [], - 'tags': [ - 'gguf', - 'text-generation', - 'license:apache-2.0', - 'endpoints_compatible', - 'region:us', - 'imatrix', - 'conversational', - ], - 'usedStorage': 93538518464, - 'widgetData': [ - { - text: 'Hi, what can you help me with?', - }, - { - text: 'What is 84 * 3 / 2?', - }, - { - text: 'Tell me an interesting fact about the universe!', - }, - { - text: 'Explain quantum computing in simple terms.', - }, - ], - }, - models: [ - { - id: 'menlo:jan-nano:jan-nano-0.4-iQ4_XS.gguf', - size: 2270750400, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q3_K_L.gguf', - size: 2239784384, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q3_K_M.gguf', - size: 2075616704, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q3_K_S.gguf', - size: 1886995904, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q4_0.gguf', - size: 2369545664, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q4_1.gguf', - size: 2596627904, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q4_K_M.gguf', - size: 2497279424, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q4_K_S.gguf', - size: 2383308224, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q5_0.gguf', - size: 2823710144, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q5_1.gguf', - size: 3050792384, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q5_K_M.gguf', - size: 2889512384, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q5_K_S.gguf', - size: 2823710144, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q6_K.gguf', - size: 3306259904, - }, - { - id: 'menlo:jan-nano:jan-nano-4b-Q8_0.gguf', - size: 4280403904, - }, - ], -} From c896398fabb10c8c83a17c6bb8479ed0437ba7db Mon Sep 17 00:00:00 2001 From: LazyYuuki Date: Sun, 15 Jun 2025 10:45:37 +0800 Subject: [PATCH 08/48] =?UTF-8?q?=F0=9F=94=A7=20config:=20add=20to=20gitig?= =?UTF-8?q?nore?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index df59e4644..f702c6512 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,7 @@ src-tauri/icons !src-tauri/icons/icon.png src-tauri/gen/apple src-tauri/resources/bin + +# Helper tools +.opencode +OpenCode.md From 9ab69b157babb74976ef12269c6f53f1a68f2ce1 Mon Sep 17 00:00:00 2001 From: Louis Date: Sun, 15 Jun 2025 11:17:41 +0700 Subject: [PATCH 09/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20Jan-nano=20repo=20name?= =?UTF-8?q?=20changed=20(#5274)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- extensions/model-extension/resources/default.json | 6 +++--- extensions/model-extension/src/index.ts | 4 +++- web-app/src/routes/hub.tsx | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/extensions/model-extension/resources/default.json b/extensions/model-extension/resources/default.json index 8884af70f..32bc278e4 100644 --- a/extensions/model-extension/resources/default.json +++ b/extensions/model-extension/resources/default.json @@ -1,7 +1,7 @@ [ { "author": "Menlo", - "id": "Menlo/Jan-nano", + "id": "Menlo/Jan-nano-gguf", "metadata": { "_id": "68492cd9cada68b1d11ca1bd", "author": "Menlo", @@ -23,11 +23,11 @@ "quantize_imatrix_file": "imatrix.dat", "total": 4022468096 }, - "id": "Menlo/Jan-nano", + "id": "Menlo/Jan-nano-gguf", "lastModified": "2025-06-13T16:57:55.000Z", "likes": 3, "model-index": null, - "modelId": "Menlo/Jan-nano", + "modelId": "Menlo/Jan-nano-gguf", "pipeline_tag": "text-generation", "private": false, "sha": "a04aab0878648d8f284c63a52664a482ead16f06", diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts index 9fa6132c5..f0f0589df 100644 --- a/extensions/model-extension/src/index.ts +++ b/extensions/model-extension/src/index.ts @@ -28,7 +28,7 @@ type Data = { /** * Defaul mode sources */ -const defaultModelSources = ['Menlo/Jan-nano'] +const defaultModelSources = ['Menlo/Jan-nano-gguf'] /** * A extension for models @@ -291,6 +291,8 @@ export default class JanModelExtension extends ModelExtension { const sources = await this.apiInstance() .then((api) => api.get('v1/models/sources').json>()) .then((e) => (typeof e === 'object' ? (e.data as ModelSource[]) : [])) + // Deprecated source - filter out from legacy sources + .then((e) => e.filter((x) => x.id.toLowerCase() !== 'menlo/jan-nano')) .catch(() => []) return sources.concat( DEFAULT_MODEL_SOURCES.filter((e) => !sources.some((x) => x.id === e.id)) diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx index ed7fb669b..c4327fb04 100644 --- a/web-app/src/routes/hub.tsx +++ b/web-app/src/routes/hub.tsx @@ -199,7 +199,8 @@ function Hub() { const navigate = useNavigate() const isRecommendedModel = useCallback((modelId: string) => { - return (extractModelName(modelId) === 'Jan-nano') as boolean + return (extractModelName(modelId)?.toLowerCase() === + 'jan-nano-gguf') as boolean }, []) const handleUseModel = useCallback( From 2ff177637970e5a6675e69bcd5a7c8456ef2f88b Mon Sep 17 00:00:00 2001 From: LazyYuuki Date: Sun, 15 Jun 2025 16:07:08 +0800 Subject: [PATCH 10/48] =?UTF-8?q?=F0=9F=9A=A7=20wip:=20disable=20showSpeed?= =?UTF-8?q?Token=20in=20ChatInput?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/containers/ChatInput.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx index a83adc59e..de4f9d9ad 100644 --- a/web-app/src/containers/ChatInput.tsx +++ b/web-app/src/containers/ChatInput.tsx @@ -21,7 +21,7 @@ import { IconTool, IconCodeCircle2, IconPlayerStopFilled, - IconBrandSpeedtest, + // IconBrandSpeedtest, IconX, } from '@tabler/icons-react' import { useTranslation } from 'react-i18next' @@ -48,7 +48,7 @@ type ChatInputProps = { const ChatInput = ({ model, className, - showSpeedToken = true, + showSpeedToken = false, initialMessage, }: ChatInputProps) => { const textareaRef = useRef(null) From 3ae4d12f6079eb68475e2a73d22ad7f3bafb8972 Mon Sep 17 00:00:00 2001 From: LazyYuuki Date: Sun, 15 Jun 2025 16:08:58 +0800 Subject: [PATCH 11/48] =?UTF-8?q?=F0=9F=90=9B=20fix:=20commented=20out=20t?= =?UTF-8?q?he=20wrong=20import?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/containers/ChatInput.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx index de4f9d9ad..291642a60 100644 --- a/web-app/src/containers/ChatInput.tsx +++ b/web-app/src/containers/ChatInput.tsx @@ -21,7 +21,7 @@ import { IconTool, IconCodeCircle2, IconPlayerStopFilled, - // IconBrandSpeedtest, + IconBrandSpeedtest, IconX, } from '@tabler/icons-react' import { useTranslation } from 'react-i18next' From d131752419815832bbd9a629a399d51ea9166e79 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Sun, 15 Jun 2025 16:18:43 +0700 Subject: [PATCH 12/48] fix: masking value MCP env field (#5276) --- web-app/src/routes/settings/mcp-servers.tsx | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/web-app/src/routes/settings/mcp-servers.tsx b/web-app/src/routes/settings/mcp-servers.tsx index 02e51dfa4..f545ef5d0 100644 --- a/web-app/src/routes/settings/mcp-servers.tsx +++ b/web-app/src/routes/settings/mcp-servers.tsx @@ -22,6 +22,13 @@ import { useToolApproval } from '@/hooks/useToolApproval' import { toast } from 'sonner' import { invoke } from '@tauri-apps/api/core' +// Function to mask sensitive values +const maskSensitiveValue = (value: string) => { + if (!value) return value + if (value.length <= 8) return '*'.repeat(value.length) + return value.slice(0, 4) + '*'.repeat(value.length - 8) + value.slice(-4) +} + // eslint-disable-next-line @typescript-eslint/no-explicit-any export const Route = createFileRoute(route.settings.mcp_servers as any)({ component: MCPServers, @@ -322,7 +329,10 @@ function MCPServers() {
Env:{' '} {Object.entries(config.env) - .map(([key, value]) => `${key}=${value}`) + .map( + ([key, value]) => + `${key}=${maskSensitiveValue(value)}` + ) .join(', ')}
)} From 665de7df5543107527895b537cdbfe3daf686ac1 Mon Sep 17 00:00:00 2001 From: LazyYuuki Date: Sun, 15 Jun 2025 18:30:39 +0800 Subject: [PATCH 13/48] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20token=20speed?= =?UTF-8?q?=20to=20each=20message=20that=20persist?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/containers/ChatInput.tsx | 2 +- web-app/src/containers/ThreadContent.tsx | 12 +- .../src/containers/TokenSpeedIndicator.tsx | 22 ++ web-app/src/hooks/useAppState.ts | 77 ++--- web-app/src/hooks/useChat.ts | 282 +++++++++--------- web-app/src/hooks/useMessages.ts | 41 +-- 6 files changed, 236 insertions(+), 200 deletions(-) create mode 100644 web-app/src/containers/TokenSpeedIndicator.tsx diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx index 291642a60..a83adc59e 100644 --- a/web-app/src/containers/ChatInput.tsx +++ b/web-app/src/containers/ChatInput.tsx @@ -48,7 +48,7 @@ type ChatInputProps = { const ChatInput = ({ model, className, - showSpeedToken = false, + showSpeedToken = true, initialMessage, }: ChatInputProps) => { const textareaRef = useRef(null) diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx index 833846db1..40c26993b 100644 --- a/web-app/src/containers/ThreadContent.tsx +++ b/web-app/src/containers/ThreadContent.tsx @@ -34,6 +34,9 @@ import { } from '@/components/ui/tooltip' import { formatDate } from '@/utils/formatDate' import { AvatarEmoji } from '@/containers/AvatarEmoji' + +import TokenSpeedIndicator from '@/containers/TokenSpeedIndicator' + import CodeEditor from '@uiw/react-textarea-code-editor' import '@uiw/react-textarea-code-editor/dist.css' @@ -360,8 +363,8 @@ export const ThreadContent = memo( className={cn( 'flex items-center gap-2', item.isLastMessage && - streamingContent && - 'opacity-0 visibility-hidden pointer-events-none' + streamingContent && + 'opacity-0 visibility-hidden pointer-events-none' )} > @@ -445,6 +448,11 @@ export const ThreadContent = memo( )} + +
)} diff --git a/web-app/src/containers/TokenSpeedIndicator.tsx b/web-app/src/containers/TokenSpeedIndicator.tsx new file mode 100644 index 000000000..b1dfb841c --- /dev/null +++ b/web-app/src/containers/TokenSpeedIndicator.tsx @@ -0,0 +1,22 @@ +import { IconBrandSpeedtest } from '@tabler/icons-react' + +interface TokenSpeedIndicatorProps { + metadata?: Record +} + +export const TokenSpeedIndicator = ({ + metadata +}: TokenSpeedIndicatorProps) => { + const persistedTokenSpeed = (metadata?.tokenSpeed as { tokenSpeed: number })?.tokenSpeed + + return ( +
+ + + {Math.round(persistedTokenSpeed)} tokens/sec + +
+ ) +} + +export default TokenSpeedIndicator diff --git a/web-app/src/hooks/useAppState.ts b/web-app/src/hooks/useAppState.ts index dc29f7f8a..ccf044a4f 100644 --- a/web-app/src/hooks/useAppState.ts +++ b/web-app/src/hooks/useAppState.ts @@ -1,36 +1,36 @@ -import { create } from 'zustand' -import { ThreadMessage } from '@janhq/core' -import { MCPTool } from '@/types/completion' -import { useAssistant } from './useAssistant' -import { ChatCompletionMessageToolCall } from 'openai/resources' +import { create } from "zustand"; +import { ThreadMessage } from "@janhq/core"; +import { MCPTool } from "@/types/completion"; +import { useAssistant } from "./useAssistant"; +import { ChatCompletionMessageToolCall } from "openai/resources"; type AppState = { - streamingContent?: ThreadMessage - loadingModel?: boolean - tools: MCPTool[] - serverStatus: 'running' | 'stopped' | 'pending' - abortControllers: Record - tokenSpeed?: TokenSpeed - currentToolCall?: ChatCompletionMessageToolCall - showOutOfContextDialog?: boolean - setServerStatus: (value: 'running' | 'stopped' | 'pending') => void - updateStreamingContent: (content: ThreadMessage | undefined) => void + streamingContent?: ThreadMessage; + loadingModel?: boolean; + tools: MCPTool[]; + serverStatus: "running" | "stopped" | "pending"; + abortControllers: Record; + tokenSpeed?: TokenSpeed; + currentToolCall?: ChatCompletionMessageToolCall; + showOutOfContextDialog?: boolean; + setServerStatus: (value: "running" | "stopped" | "pending") => void; + updateStreamingContent: (content: ThreadMessage | undefined) => void; updateCurrentToolCall: ( - toolCall: ChatCompletionMessageToolCall | undefined - ) => void - updateLoadingModel: (loading: boolean) => void - updateTools: (tools: MCPTool[]) => void - setAbortController: (threadId: string, controller: AbortController) => void - updateTokenSpeed: (message: ThreadMessage) => void - resetTokenSpeed: () => void - setOutOfContextDialog: (show: boolean) => void -} + toolCall: ChatCompletionMessageToolCall | undefined, + ) => void; + updateLoadingModel: (loading: boolean) => void; + updateTools: (tools: MCPTool[]) => void; + setAbortController: (threadId: string, controller: AbortController) => void; + updateTokenSpeed: (message: ThreadMessage) => void; + resetTokenSpeed: () => void; + setOutOfContextDialog: (show: boolean) => void; +}; export const useAppState = create()((set) => ({ streamingContent: undefined, loadingModel: false, tools: [], - serverStatus: 'stopped', + serverStatus: "stopped", abortControllers: {}, tokenSpeed: undefined, currentToolCall: undefined, @@ -46,18 +46,19 @@ export const useAppState = create()((set) => ({ }, } : undefined, - })) + })); + console.log(useAppState.getState().streamingContent); }, updateCurrentToolCall: (toolCall) => { set(() => ({ currentToolCall: toolCall, - })) + })); }, updateLoadingModel: (loading) => { - set({ loadingModel: loading }) + set({ loadingModel: loading }); }, updateTools: (tools) => { - set({ tools }) + set({ tools }); }, setServerStatus: (value) => set({ serverStatus: value }), setAbortController: (threadId, controller) => { @@ -66,11 +67,11 @@ export const useAppState = create()((set) => ({ ...state.abortControllers, [threadId]: controller, }, - })) + })); }, updateTokenSpeed: (message) => set((state) => { - const currentTimestamp = new Date().getTime() // Get current time in milliseconds + const currentTimestamp = new Date().getTime(); // Get current time in milliseconds if (!state.tokenSpeed) { // If this is the first update, just set the lastTimestamp and return return { @@ -80,14 +81,14 @@ export const useAppState = create()((set) => ({ tokenCount: 1, message: message.id, }, - } + }; } const timeDiffInSeconds = - (currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000 // Time difference in seconds - const totalTokenCount = state.tokenSpeed.tokenCount + 1 + (currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000; // Time difference in seconds + const totalTokenCount = state.tokenSpeed.tokenCount + 1; const averageTokenSpeed = - totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1) // Calculate average token speed + totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1); // Calculate average token speed return { tokenSpeed: { ...state.tokenSpeed, @@ -95,7 +96,7 @@ export const useAppState = create()((set) => ({ tokenCount: totalTokenCount, message: message.id, }, - } + }; }), resetTokenSpeed: () => set({ @@ -104,6 +105,6 @@ export const useAppState = create()((set) => ({ setOutOfContextDialog: (show) => { set(() => ({ showOutOfContextDialog: show, - })) + })); }, -})) +})); diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts index 164555563..0fbfeb5d9 100644 --- a/web-app/src/hooks/useChat.ts +++ b/web-app/src/hooks/useChat.ts @@ -1,12 +1,12 @@ -import { useCallback, useEffect, useMemo } from 'react' -import { usePrompt } from './usePrompt' -import { useModelProvider } from './useModelProvider' -import { useThreads } from './useThreads' -import { useAppState } from './useAppState' -import { useMessages } from './useMessages' -import { useRouter } from '@tanstack/react-router' -import { defaultModel } from '@/lib/models' -import { route } from '@/constants/routes' +import { useCallback, useEffect, useMemo } from "react"; +import { usePrompt } from "./usePrompt"; +import { useModelProvider } from "./useModelProvider"; +import { useThreads } from "./useThreads"; +import { useAppState } from "./useAppState"; +import { useMessages } from "./useMessages"; +import { useRouter } from "@tanstack/react-router"; +import { defaultModel } from "@/lib/models"; +import { route } from "@/constants/routes"; import { emptyThreadContent, extractToolCall, @@ -15,23 +15,23 @@ import { newUserThreadContent, postMessageProcessing, sendCompletion, -} from '@/lib/completion' -import { CompletionMessagesBuilder } from '@/lib/messages' -import { ChatCompletionMessageToolCall } from 'openai/resources' -import { useAssistant } from './useAssistant' -import { toast } from 'sonner' -import { getTools } from '@/services/mcp' -import { MCPTool } from '@/types/completion' -import { listen } from '@tauri-apps/api/event' -import { SystemEvent } from '@/types/events' -import { stopModel, startModel, stopAllModels } from '@/services/models' +} from "@/lib/completion"; +import { CompletionMessagesBuilder } from "@/lib/messages"; +import { ChatCompletionMessageToolCall } from "openai/resources"; +import { useAssistant } from "./useAssistant"; +import { toast } from "sonner"; +import { getTools } from "@/services/mcp"; +import { MCPTool } from "@/types/completion"; +import { listen } from "@tauri-apps/api/event"; +import { SystemEvent } from "@/types/events"; +import { stopModel, startModel, stopAllModels } from "@/services/models"; -import { useToolApproval } from '@/hooks/useToolApproval' -import { useToolAvailable } from '@/hooks/useToolAvailable' -import { OUT_OF_CONTEXT_SIZE } from '@/utils/error' +import { useToolApproval } from "@/hooks/useToolApproval"; +import { useToolAvailable } from "@/hooks/useToolAvailable"; +import { OUT_OF_CONTEXT_SIZE } from "@/utils/error"; export const useChat = () => { - const { prompt, setPrompt } = usePrompt() + const { prompt, setPrompt } = usePrompt(); const { tools, updateTokenSpeed, @@ -40,51 +40,51 @@ export const useChat = () => { updateStreamingContent, updateLoadingModel, setAbortController, - } = useAppState() - const { currentAssistant } = useAssistant() - const { updateProvider } = useModelProvider() + } = useAppState(); + const { currentAssistant } = useAssistant(); + const { updateProvider } = useModelProvider(); const { approvedTools, showApprovalModal, allowAllMCPPermissions } = - useToolApproval() - const { getDisabledToolsForThread } = useToolAvailable() + useToolApproval(); + const { getDisabledToolsForThread } = useToolAvailable(); const { getProviderByName, selectedModel, selectedProvider } = - useModelProvider() + useModelProvider(); const { getCurrentThread: retrieveThread, createThread, updateThreadTimestamp, - } = useThreads() - const { getMessages, addMessage } = useMessages() - const router = useRouter() + } = useThreads(); + const { getMessages, addMessage } = useMessages(); + const router = useRouter(); const provider = useMemo(() => { - return getProviderByName(selectedProvider) - }, [selectedProvider, getProviderByName]) + return getProviderByName(selectedProvider); + }, [selectedProvider, getProviderByName]); const currentProviderId = useMemo(() => { - return provider?.provider || selectedProvider - }, [provider, selectedProvider]) + return provider?.provider || selectedProvider; + }, [provider, selectedProvider]); useEffect(() => { function setTools() { getTools().then((data: MCPTool[]) => { - updateTools(data) - }) + updateTools(data); + }); } - setTools() + setTools(); - let unsubscribe = () => {} + let unsubscribe = () => {}; listen(SystemEvent.MCP_UPDATE, setTools).then((unsub) => { // Unsubscribe from the event when the component unmounts - unsubscribe = unsub - }) - return unsubscribe - }, [updateTools]) + unsubscribe = unsub; + }); + return unsubscribe; + }, [updateTools]); const getCurrentThread = useCallback(async () => { - let currentThread = retrieveThread() + let currentThread = retrieveThread(); if (!currentThread) { currentThread = await createThread( { @@ -92,14 +92,14 @@ export const useChat = () => { provider: selectedProvider, }, prompt, - currentAssistant - ) + currentAssistant, + ); router.navigate({ to: route.threadsDetail, params: { threadId: currentThread.id }, - }) + }); } - return currentThread + return currentThread; }, [ createThread, prompt, @@ -108,7 +108,7 @@ export const useChat = () => { selectedModel?.id, selectedProvider, currentAssistant, - ]) + ]); const increaseModelContextSize = useCallback( (model: Model, provider: ProviderObject) => { @@ -118,12 +118,12 @@ export const useChat = () => { */ const ctxSize = Math.max( model.settings?.ctx_len?.controller_props.value - ? typeof model.settings.ctx_len.controller_props.value === 'string' + ? typeof model.settings.ctx_len.controller_props.value === "string" ? parseInt(model.settings.ctx_len.controller_props.value as string) : (model.settings.ctx_len.controller_props.value as number) : 8192, - 8192 - ) + 8192, + ); const updatedModel = { ...model, settings: { @@ -136,80 +136,80 @@ export const useChat = () => { }, }, }, - } + }; // Find the model index in the provider's models array - const modelIndex = provider.models.findIndex((m) => m.id === model.id) + const modelIndex = provider.models.findIndex((m) => m.id === model.id); if (modelIndex !== -1) { // Create a copy of the provider's models array - const updatedModels = [...provider.models] + const updatedModels = [...provider.models]; // Update the specific model in the array - updatedModels[modelIndex] = updatedModel as Model + updatedModels[modelIndex] = updatedModel as Model; // Update the provider with the new models array updateProvider(provider.provider, { models: updatedModels, - }) + }); } - stopAllModels() + stopAllModels(); }, - [updateProvider] - ) + [updateProvider], + ); const sendMessage = useCallback( async ( message: string, showModal?: () => Promise, - troubleshooting = true + troubleshooting = true, ) => { - const activeThread = await getCurrentThread() + const activeThread = await getCurrentThread(); - resetTokenSpeed() + resetTokenSpeed(); const activeProvider = currentProviderId ? getProviderByName(currentProviderId) - : provider - if (!activeThread || !activeProvider) return - const messages = getMessages(activeThread.id) - const abortController = new AbortController() - setAbortController(activeThread.id, abortController) - updateStreamingContent(emptyThreadContent) + : provider; + if (!activeThread || !activeProvider) return; + const messages = getMessages(activeThread.id); + const abortController = new AbortController(); + setAbortController(activeThread.id, abortController); + updateStreamingContent(emptyThreadContent); // Do not add new message on retry if (troubleshooting) - addMessage(newUserThreadContent(activeThread.id, message)) - updateThreadTimestamp(activeThread.id) - setPrompt('') + addMessage(newUserThreadContent(activeThread.id, message)); + updateThreadTimestamp(activeThread.id); + setPrompt(""); try { if (selectedModel?.id) { - updateLoadingModel(true) + updateLoadingModel(true); await startModel( activeProvider, selectedModel.id, - abortController - ).catch(console.error) - updateLoadingModel(false) + abortController, + ).catch(console.error); + updateLoadingModel(false); } const builder = new CompletionMessagesBuilder( messages, - currentAssistant?.instructions - ) + currentAssistant?.instructions, + ); - builder.addUserMessage(message) + builder.addUserMessage(message); - let isCompleted = false + let isCompleted = false; // Filter tools based on model capabilities and available tools for this thread - let availableTools = selectedModel?.capabilities?.includes('tools') + let availableTools = selectedModel?.capabilities?.includes("tools") ? tools.filter((tool) => { - const disabledTools = getDisabledToolsForThread(activeThread.id) - return !disabledTools.includes(tool.name) + const disabledTools = getDisabledToolsForThread(activeThread.id); + return !disabledTools.includes(tool.name); }) - : [] + : []; // TODO: Later replaced by Agent setup? - const followUpWithToolUse = true + const followUpWithToolUse = true; while (!isCompleted && !abortController.signal.aborted) { const completion = await sendCompletion( activeThread, @@ -218,51 +218,51 @@ export const useChat = () => { abortController, availableTools, currentAssistant.parameters?.stream === false ? false : true, - currentAssistant.parameters as unknown as Record + currentAssistant.parameters as unknown as Record, // TODO: replace it with according provider setting later on // selectedProvider === 'llama.cpp' && availableTools.length > 0 // ? false // : true - ) + ); - if (!completion) throw new Error('No completion received') - let accumulatedText = '' - const currentCall: ChatCompletionMessageToolCall | null = null - const toolCalls: ChatCompletionMessageToolCall[] = [] + if (!completion) throw new Error("No completion received"); + let accumulatedText = ""; + const currentCall: ChatCompletionMessageToolCall | null = null; + const toolCalls: ChatCompletionMessageToolCall[] = []; if (isCompletionResponse(completion)) { - accumulatedText = completion.choices[0]?.message?.content || '' + accumulatedText = completion.choices[0]?.message?.content || ""; if (completion.choices[0]?.message?.tool_calls) { - toolCalls.push(...completion.choices[0].message.tool_calls) + toolCalls.push(...completion.choices[0].message.tool_calls); } } else { for await (const part of completion) { // Error message if (!part.choices) { throw new Error( - 'message' in part + "message" in part ? (part.message as string) - : (JSON.stringify(part) ?? '') - ) + : (JSON.stringify(part) ?? ""), + ); } - const delta = part.choices[0]?.delta?.content || '' + const delta = part.choices[0]?.delta?.content || ""; if (part.choices[0]?.delta?.tool_calls) { - const calls = extractToolCall(part, currentCall, toolCalls) + const calls = extractToolCall(part, currentCall, toolCalls); const currentContent = newAssistantThreadContent( activeThread.id, accumulatedText, { tool_calls: calls.map((e) => ({ ...e, - state: 'pending', + state: "pending", })), - } - ) - updateStreamingContent(currentContent) - await new Promise((resolve) => setTimeout(resolve, 0)) + }, + ); + updateStreamingContent(currentContent); + await new Promise((resolve) => setTimeout(resolve, 0)); } if (delta) { - accumulatedText += delta + accumulatedText += delta; // Create a new object each time to avoid reference issues // Use a timeout to prevent React from batching updates too quickly const currentContent = newAssistantThreadContent( @@ -271,13 +271,13 @@ export const useChat = () => { { tool_calls: toolCalls.map((e) => ({ ...e, - state: 'pending', + state: "pending", })), - } - ) - updateStreamingContent(currentContent) - updateTokenSpeed(currentContent) - await new Promise((resolve) => setTimeout(resolve, 0)) + }, + ); + updateStreamingContent(currentContent); + updateTokenSpeed(currentContent); + await new Promise((resolve) => setTimeout(resolve, 0)); } } } @@ -286,18 +286,22 @@ export const useChat = () => { accumulatedText.length === 0 && toolCalls.length === 0 && activeThread.model?.id && - activeProvider.provider === 'llama.cpp' + activeProvider.provider === "llama.cpp" ) { - await stopModel(activeThread.model.id, 'cortex') - throw new Error('No response received from the model') + await stopModel(activeThread.model.id, "cortex"); + throw new Error("No response received from the model"); } // Create a final content object for adding to the thread const finalContent = newAssistantThreadContent( activeThread.id, - accumulatedText - ) - builder.addAssistantMessage(accumulatedText, undefined, toolCalls) + accumulatedText, + { + tokenSpeed: useAppState.getState().tokenSpeed, + }, + ); + + builder.addAssistantMessage(accumulatedText, undefined, toolCalls); const updatedMessage = await postMessageProcessing( toolCalls, builder, @@ -305,41 +309,41 @@ export const useChat = () => { abortController, approvedTools, allowAllMCPPermissions ? undefined : showApprovalModal, - allowAllMCPPermissions - ) - addMessage(updatedMessage ?? finalContent) - updateStreamingContent(emptyThreadContent) - updateThreadTimestamp(activeThread.id) + allowAllMCPPermissions, + ); + addMessage(updatedMessage ?? finalContent); + updateStreamingContent(emptyThreadContent); + updateThreadTimestamp(activeThread.id); - isCompleted = !toolCalls.length + isCompleted = !toolCalls.length; // Do not create agent loop if there is no need for it - if (!followUpWithToolUse) availableTools = [] + if (!followUpWithToolUse) availableTools = []; } } catch (error) { const errorMessage = - error && typeof error === 'object' && 'message' in error + error && typeof error === "object" && "message" in error ? error.message - : error + : error; if ( - typeof errorMessage === 'string' && + typeof errorMessage === "string" && errorMessage.includes(OUT_OF_CONTEXT_SIZE) && selectedModel && troubleshooting ) { showModal?.().then((confirmed) => { if (confirmed) { - increaseModelContextSize(selectedModel, activeProvider) + increaseModelContextSize(selectedModel, activeProvider); setTimeout(() => { - sendMessage(message, showModal, false) // Retry sending the message without troubleshooting - }, 1000) + sendMessage(message, showModal, false); // Retry sending the message without troubleshooting + }, 1000); } - }) + }); } - toast.error(`Error sending message: ${errorMessage}`) - console.error('Error sending message:', error) + toast.error(`Error sending message: ${errorMessage}`); + console.error("Error sending message:", error); } finally { - updateLoadingModel(false) - updateStreamingContent(undefined) + updateLoadingModel(false); + updateStreamingContent(undefined); } }, [ @@ -364,8 +368,8 @@ export const useChat = () => { showApprovalModal, updateTokenSpeed, increaseModelContextSize, - ] - ) + ], + ); - return { sendMessage } -} + return { sendMessage }; +}; diff --git a/web-app/src/hooks/useMessages.ts b/web-app/src/hooks/useMessages.ts index 3a83b5a48..251d67438 100644 --- a/web-app/src/hooks/useMessages.ts +++ b/web-app/src/hooks/useMessages.ts @@ -1,23 +1,23 @@ -import { create } from 'zustand' -import { ThreadMessage } from '@janhq/core' +import { create } from "zustand"; +import { ThreadMessage } from "@janhq/core"; import { createMessage, deleteMessage as deleteMessageExt, -} from '@/services/messages' -import { useAssistant } from './useAssistant' +} from "@/services/messages"; +import { useAssistant } from "./useAssistant"; type MessageState = { - messages: Record - getMessages: (threadId: string) => ThreadMessage[] - setMessages: (threadId: string, messages: ThreadMessage[]) => void - addMessage: (message: ThreadMessage) => void - deleteMessage: (threadId: string, messageId: string) => void -} + messages: Record; + getMessages: (threadId: string) => ThreadMessage[]; + setMessages: (threadId: string, messages: ThreadMessage[]) => void; + addMessage: (message: ThreadMessage) => void; + deleteMessage: (threadId: string, messageId: string) => void; +}; export const useMessages = create()((set, get) => ({ messages: {}, getMessages: (threadId) => { - return get().messages[threadId] || [] + return get().messages[threadId] || []; }, setMessages: (threadId, messages) => { set((state) => ({ @@ -25,10 +25,11 @@ export const useMessages = create()((set, get) => ({ ...state.messages, [threadId]: messages, }, - })) + })); }, addMessage: (message) => { - const currentAssistant = useAssistant.getState().currentAssistant + console.log("addMessage: ", message); + const currentAssistant = useAssistant.getState().currentAssistant; const newMessage = { ...message, created_at: message.created_at || Date.now(), @@ -36,7 +37,7 @@ export const useMessages = create()((set, get) => ({ ...message.metadata, assistant: currentAssistant, }, - } + }; createMessage(newMessage).then((createdMessage) => { set((state) => ({ messages: { @@ -46,19 +47,19 @@ export const useMessages = create()((set, get) => ({ createdMessage, ], }, - })) - }) + })); + }); }, deleteMessage: (threadId, messageId) => { - deleteMessageExt(threadId, messageId) + deleteMessageExt(threadId, messageId); set((state) => ({ messages: { ...state.messages, [threadId]: state.messages[threadId]?.filter( - (message) => message.id !== messageId + (message) => message.id !== messageId, ) || [], }, - })) + })); }, -})) +})); From 4b3a0918fef87192a495991fed7f35f918c9f241 Mon Sep 17 00:00:00 2001 From: LazyYuuki Date: Sun, 15 Jun 2025 18:54:22 +0800 Subject: [PATCH 14/48] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20to=20fol?= =?UTF-8?q?low=20prettier=20convention?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/hooks/useAppState.ts | 78 ++++----- web-app/src/hooks/useChat.ts | 278 +++++++++++++++---------------- 2 files changed, 178 insertions(+), 178 deletions(-) diff --git a/web-app/src/hooks/useAppState.ts b/web-app/src/hooks/useAppState.ts index ccf044a4f..e3fa1753b 100644 --- a/web-app/src/hooks/useAppState.ts +++ b/web-app/src/hooks/useAppState.ts @@ -1,36 +1,36 @@ -import { create } from "zustand"; -import { ThreadMessage } from "@janhq/core"; -import { MCPTool } from "@/types/completion"; -import { useAssistant } from "./useAssistant"; -import { ChatCompletionMessageToolCall } from "openai/resources"; +import { create } from 'zustand' +import { ThreadMessage } from '@janhq/core' +import { MCPTool } from '@/types/completion' +import { useAssistant } from './useAssistant' +import { ChatCompletionMessageToolCall } from 'openai/resources' type AppState = { - streamingContent?: ThreadMessage; - loadingModel?: boolean; - tools: MCPTool[]; - serverStatus: "running" | "stopped" | "pending"; - abortControllers: Record; - tokenSpeed?: TokenSpeed; - currentToolCall?: ChatCompletionMessageToolCall; - showOutOfContextDialog?: boolean; - setServerStatus: (value: "running" | "stopped" | "pending") => void; - updateStreamingContent: (content: ThreadMessage | undefined) => void; + streamingContent?: ThreadMessage + loadingModel?: boolean + tools: MCPTool[] + serverStatus: 'running' | 'stopped' | 'pending' + abortControllers: Record + tokenSpeed?: TokenSpeed + currentToolCall?: ChatCompletionMessageToolCall + showOutOfContextDialog?: boolean + setServerStatus: (value: 'running' | 'stopped' | 'pending') => void + updateStreamingContent: (content: ThreadMessage | undefined) => void updateCurrentToolCall: ( - toolCall: ChatCompletionMessageToolCall | undefined, - ) => void; - updateLoadingModel: (loading: boolean) => void; - updateTools: (tools: MCPTool[]) => void; - setAbortController: (threadId: string, controller: AbortController) => void; - updateTokenSpeed: (message: ThreadMessage) => void; - resetTokenSpeed: () => void; - setOutOfContextDialog: (show: boolean) => void; -}; + toolCall: ChatCompletionMessageToolCall | undefined + ) => void + updateLoadingModel: (loading: boolean) => void + updateTools: (tools: MCPTool[]) => void + setAbortController: (threadId: string, controller: AbortController) => void + updateTokenSpeed: (message: ThreadMessage) => void + resetTokenSpeed: () => void + setOutOfContextDialog: (show: boolean) => void +} export const useAppState = create()((set) => ({ streamingContent: undefined, loadingModel: false, tools: [], - serverStatus: "stopped", + serverStatus: 'stopped', abortControllers: {}, tokenSpeed: undefined, currentToolCall: undefined, @@ -46,19 +46,19 @@ export const useAppState = create()((set) => ({ }, } : undefined, - })); - console.log(useAppState.getState().streamingContent); + })) + console.log(useAppState.getState().streamingContent) }, updateCurrentToolCall: (toolCall) => { set(() => ({ currentToolCall: toolCall, - })); + })) }, updateLoadingModel: (loading) => { - set({ loadingModel: loading }); + set({ loadingModel: loading }) }, updateTools: (tools) => { - set({ tools }); + set({ tools }) }, setServerStatus: (value) => set({ serverStatus: value }), setAbortController: (threadId, controller) => { @@ -67,11 +67,11 @@ export const useAppState = create()((set) => ({ ...state.abortControllers, [threadId]: controller, }, - })); + })) }, updateTokenSpeed: (message) => set((state) => { - const currentTimestamp = new Date().getTime(); // Get current time in milliseconds + const currentTimestamp = new Date().getTime() // Get current time in milliseconds if (!state.tokenSpeed) { // If this is the first update, just set the lastTimestamp and return return { @@ -81,14 +81,14 @@ export const useAppState = create()((set) => ({ tokenCount: 1, message: message.id, }, - }; + } } const timeDiffInSeconds = - (currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000; // Time difference in seconds - const totalTokenCount = state.tokenSpeed.tokenCount + 1; + (currentTimestamp - state.tokenSpeed.lastTimestamp) / 1000 // Time difference in seconds + const totalTokenCount = state.tokenSpeed.tokenCount + 1 const averageTokenSpeed = - totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1); // Calculate average token speed + totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1) // Calculate average token speed return { tokenSpeed: { ...state.tokenSpeed, @@ -96,7 +96,7 @@ export const useAppState = create()((set) => ({ tokenCount: totalTokenCount, message: message.id, }, - }; + } }), resetTokenSpeed: () => set({ @@ -105,6 +105,6 @@ export const useAppState = create()((set) => ({ setOutOfContextDialog: (show) => { set(() => ({ showOutOfContextDialog: show, - })); + })) }, -})); +})) diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts index 0fbfeb5d9..c8e0fe9f1 100644 --- a/web-app/src/hooks/useChat.ts +++ b/web-app/src/hooks/useChat.ts @@ -1,12 +1,12 @@ -import { useCallback, useEffect, useMemo } from "react"; -import { usePrompt } from "./usePrompt"; -import { useModelProvider } from "./useModelProvider"; -import { useThreads } from "./useThreads"; -import { useAppState } from "./useAppState"; -import { useMessages } from "./useMessages"; -import { useRouter } from "@tanstack/react-router"; -import { defaultModel } from "@/lib/models"; -import { route } from "@/constants/routes"; +import { useCallback, useEffect, useMemo } from 'react' +import { usePrompt } from './usePrompt' +import { useModelProvider } from './useModelProvider' +import { useThreads } from './useThreads' +import { useAppState } from './useAppState' +import { useMessages } from './useMessages' +import { useRouter } from '@tanstack/react-router' +import { defaultModel } from '@/lib/models' +import { route } from '@/constants/routes' import { emptyThreadContent, extractToolCall, @@ -15,23 +15,23 @@ import { newUserThreadContent, postMessageProcessing, sendCompletion, -} from "@/lib/completion"; -import { CompletionMessagesBuilder } from "@/lib/messages"; -import { ChatCompletionMessageToolCall } from "openai/resources"; -import { useAssistant } from "./useAssistant"; -import { toast } from "sonner"; -import { getTools } from "@/services/mcp"; -import { MCPTool } from "@/types/completion"; -import { listen } from "@tauri-apps/api/event"; -import { SystemEvent } from "@/types/events"; -import { stopModel, startModel, stopAllModels } from "@/services/models"; +} from '@/lib/completion' +import { CompletionMessagesBuilder } from '@/lib/messages' +import { ChatCompletionMessageToolCall } from 'openai/resources' +import { useAssistant } from './useAssistant' +import { toast } from 'sonner' +import { getTools } from '@/services/mcp' +import { MCPTool } from '@/types/completion' +import { listen } from '@tauri-apps/api/event' +import { SystemEvent } from '@/types/events' +import { stopModel, startModel, stopAllModels } from '@/services/models' -import { useToolApproval } from "@/hooks/useToolApproval"; -import { useToolAvailable } from "@/hooks/useToolAvailable"; -import { OUT_OF_CONTEXT_SIZE } from "@/utils/error"; +import { useToolApproval } from '@/hooks/useToolApproval' +import { useToolAvailable } from '@/hooks/useToolAvailable' +import { OUT_OF_CONTEXT_SIZE } from '@/utils/error' export const useChat = () => { - const { prompt, setPrompt } = usePrompt(); + const { prompt, setPrompt } = usePrompt() const { tools, updateTokenSpeed, @@ -40,51 +40,51 @@ export const useChat = () => { updateStreamingContent, updateLoadingModel, setAbortController, - } = useAppState(); - const { currentAssistant } = useAssistant(); - const { updateProvider } = useModelProvider(); + } = useAppState() + const { currentAssistant } = useAssistant() + const { updateProvider } = useModelProvider() const { approvedTools, showApprovalModal, allowAllMCPPermissions } = - useToolApproval(); - const { getDisabledToolsForThread } = useToolAvailable(); + useToolApproval() + const { getDisabledToolsForThread } = useToolAvailable() const { getProviderByName, selectedModel, selectedProvider } = - useModelProvider(); + useModelProvider() const { getCurrentThread: retrieveThread, createThread, updateThreadTimestamp, - } = useThreads(); - const { getMessages, addMessage } = useMessages(); - const router = useRouter(); + } = useThreads() + const { getMessages, addMessage } = useMessages() + const router = useRouter() const provider = useMemo(() => { - return getProviderByName(selectedProvider); - }, [selectedProvider, getProviderByName]); + return getProviderByName(selectedProvider) + }, [selectedProvider, getProviderByName]) const currentProviderId = useMemo(() => { - return provider?.provider || selectedProvider; - }, [provider, selectedProvider]); + return provider?.provider || selectedProvider + }, [provider, selectedProvider]) useEffect(() => { function setTools() { getTools().then((data: MCPTool[]) => { - updateTools(data); - }); + updateTools(data) + }) } - setTools(); + setTools() - let unsubscribe = () => {}; + let unsubscribe = () => {} listen(SystemEvent.MCP_UPDATE, setTools).then((unsub) => { // Unsubscribe from the event when the component unmounts - unsubscribe = unsub; - }); - return unsubscribe; - }, [updateTools]); + unsubscribe = unsub + }) + return unsubscribe + }, [updateTools]) const getCurrentThread = useCallback(async () => { - let currentThread = retrieveThread(); + let currentThread = retrieveThread() if (!currentThread) { currentThread = await createThread( { @@ -92,14 +92,14 @@ export const useChat = () => { provider: selectedProvider, }, prompt, - currentAssistant, - ); + currentAssistant + ) router.navigate({ to: route.threadsDetail, params: { threadId: currentThread.id }, - }); + }) } - return currentThread; + return currentThread }, [ createThread, prompt, @@ -108,7 +108,7 @@ export const useChat = () => { selectedModel?.id, selectedProvider, currentAssistant, - ]); + ]) const increaseModelContextSize = useCallback( (model: Model, provider: ProviderObject) => { @@ -118,12 +118,12 @@ export const useChat = () => { */ const ctxSize = Math.max( model.settings?.ctx_len?.controller_props.value - ? typeof model.settings.ctx_len.controller_props.value === "string" + ? typeof model.settings.ctx_len.controller_props.value === 'string' ? parseInt(model.settings.ctx_len.controller_props.value as string) : (model.settings.ctx_len.controller_props.value as number) : 8192, - 8192, - ); + 8192 + ) const updatedModel = { ...model, settings: { @@ -136,80 +136,80 @@ export const useChat = () => { }, }, }, - }; + } // Find the model index in the provider's models array - const modelIndex = provider.models.findIndex((m) => m.id === model.id); + const modelIndex = provider.models.findIndex((m) => m.id === model.id) if (modelIndex !== -1) { // Create a copy of the provider's models array - const updatedModels = [...provider.models]; + const updatedModels = [...provider.models] // Update the specific model in the array - updatedModels[modelIndex] = updatedModel as Model; + updatedModels[modelIndex] = updatedModel as Model // Update the provider with the new models array updateProvider(provider.provider, { models: updatedModels, - }); + }) } - stopAllModels(); + stopAllModels() }, - [updateProvider], - ); + [updateProvider] + ) const sendMessage = useCallback( async ( message: string, showModal?: () => Promise, - troubleshooting = true, + troubleshooting = true ) => { - const activeThread = await getCurrentThread(); + const activeThread = await getCurrentThread() - resetTokenSpeed(); + resetTokenSpeed() const activeProvider = currentProviderId ? getProviderByName(currentProviderId) - : provider; - if (!activeThread || !activeProvider) return; - const messages = getMessages(activeThread.id); - const abortController = new AbortController(); - setAbortController(activeThread.id, abortController); - updateStreamingContent(emptyThreadContent); + : provider + if (!activeThread || !activeProvider) return + const messages = getMessages(activeThread.id) + const abortController = new AbortController() + setAbortController(activeThread.id, abortController) + updateStreamingContent(emptyThreadContent) // Do not add new message on retry if (troubleshooting) - addMessage(newUserThreadContent(activeThread.id, message)); - updateThreadTimestamp(activeThread.id); - setPrompt(""); + addMessage(newUserThreadContent(activeThread.id, message)) + updateThreadTimestamp(activeThread.id) + setPrompt('') try { if (selectedModel?.id) { - updateLoadingModel(true); + updateLoadingModel(true) await startModel( activeProvider, selectedModel.id, - abortController, - ).catch(console.error); - updateLoadingModel(false); + abortController + ).catch(console.error) + updateLoadingModel(false) } const builder = new CompletionMessagesBuilder( messages, - currentAssistant?.instructions, - ); + currentAssistant?.instructions + ) - builder.addUserMessage(message); + builder.addUserMessage(message) - let isCompleted = false; + let isCompleted = false // Filter tools based on model capabilities and available tools for this thread - let availableTools = selectedModel?.capabilities?.includes("tools") + let availableTools = selectedModel?.capabilities?.includes('tools') ? tools.filter((tool) => { - const disabledTools = getDisabledToolsForThread(activeThread.id); - return !disabledTools.includes(tool.name); + const disabledTools = getDisabledToolsForThread(activeThread.id) + return !disabledTools.includes(tool.name) }) - : []; + : [] // TODO: Later replaced by Agent setup? - const followUpWithToolUse = true; + const followUpWithToolUse = true while (!isCompleted && !abortController.signal.aborted) { const completion = await sendCompletion( activeThread, @@ -218,51 +218,51 @@ export const useChat = () => { abortController, availableTools, currentAssistant.parameters?.stream === false ? false : true, - currentAssistant.parameters as unknown as Record, + currentAssistant.parameters as unknown as Record // TODO: replace it with according provider setting later on // selectedProvider === 'llama.cpp' && availableTools.length > 0 // ? false // : true - ); + ) - if (!completion) throw new Error("No completion received"); - let accumulatedText = ""; - const currentCall: ChatCompletionMessageToolCall | null = null; - const toolCalls: ChatCompletionMessageToolCall[] = []; + if (!completion) throw new Error('No completion received') + let accumulatedText = '' + const currentCall: ChatCompletionMessageToolCall | null = null + const toolCalls: ChatCompletionMessageToolCall[] = [] if (isCompletionResponse(completion)) { - accumulatedText = completion.choices[0]?.message?.content || ""; + accumulatedText = completion.choices[0]?.message?.content || '' if (completion.choices[0]?.message?.tool_calls) { - toolCalls.push(...completion.choices[0].message.tool_calls); + toolCalls.push(...completion.choices[0].message.tool_calls) } } else { for await (const part of completion) { // Error message if (!part.choices) { throw new Error( - "message" in part + 'message' in part ? (part.message as string) - : (JSON.stringify(part) ?? ""), - ); + : (JSON.stringify(part) ?? '') + ) } - const delta = part.choices[0]?.delta?.content || ""; + const delta = part.choices[0]?.delta?.content || '' if (part.choices[0]?.delta?.tool_calls) { - const calls = extractToolCall(part, currentCall, toolCalls); + const calls = extractToolCall(part, currentCall, toolCalls) const currentContent = newAssistantThreadContent( activeThread.id, accumulatedText, { tool_calls: calls.map((e) => ({ ...e, - state: "pending", + state: 'pending', })), - }, - ); - updateStreamingContent(currentContent); - await new Promise((resolve) => setTimeout(resolve, 0)); + } + ) + updateStreamingContent(currentContent) + await new Promise((resolve) => setTimeout(resolve, 0)) } if (delta) { - accumulatedText += delta; + accumulatedText += delta // Create a new object each time to avoid reference issues // Use a timeout to prevent React from batching updates too quickly const currentContent = newAssistantThreadContent( @@ -271,13 +271,13 @@ export const useChat = () => { { tool_calls: toolCalls.map((e) => ({ ...e, - state: "pending", + state: 'pending', })), - }, - ); - updateStreamingContent(currentContent); - updateTokenSpeed(currentContent); - await new Promise((resolve) => setTimeout(resolve, 0)); + } + ) + updateStreamingContent(currentContent) + updateTokenSpeed(currentContent) + await new Promise((resolve) => setTimeout(resolve, 0)) } } } @@ -286,10 +286,10 @@ export const useChat = () => { accumulatedText.length === 0 && toolCalls.length === 0 && activeThread.model?.id && - activeProvider.provider === "llama.cpp" + activeProvider.provider === 'llama.cpp' ) { - await stopModel(activeThread.model.id, "cortex"); - throw new Error("No response received from the model"); + await stopModel(activeThread.model.id, 'cortex') + throw new Error('No response received from the model') } // Create a final content object for adding to the thread @@ -298,10 +298,10 @@ export const useChat = () => { accumulatedText, { tokenSpeed: useAppState.getState().tokenSpeed, - }, - ); + } + ) - builder.addAssistantMessage(accumulatedText, undefined, toolCalls); + builder.addAssistantMessage(accumulatedText, undefined, toolCalls) const updatedMessage = await postMessageProcessing( toolCalls, builder, @@ -309,41 +309,41 @@ export const useChat = () => { abortController, approvedTools, allowAllMCPPermissions ? undefined : showApprovalModal, - allowAllMCPPermissions, - ); - addMessage(updatedMessage ?? finalContent); - updateStreamingContent(emptyThreadContent); - updateThreadTimestamp(activeThread.id); + allowAllMCPPermissions + ) + addMessage(updatedMessage ?? finalContent) + updateStreamingContent(emptyThreadContent) + updateThreadTimestamp(activeThread.id) - isCompleted = !toolCalls.length; + isCompleted = !toolCalls.length // Do not create agent loop if there is no need for it - if (!followUpWithToolUse) availableTools = []; + if (!followUpWithToolUse) availableTools = [] } } catch (error) { const errorMessage = - error && typeof error === "object" && "message" in error + error && typeof error === 'object' && 'message' in error ? error.message - : error; + : error if ( - typeof errorMessage === "string" && + typeof errorMessage === 'string' && errorMessage.includes(OUT_OF_CONTEXT_SIZE) && selectedModel && troubleshooting ) { showModal?.().then((confirmed) => { if (confirmed) { - increaseModelContextSize(selectedModel, activeProvider); + increaseModelContextSize(selectedModel, activeProvider) setTimeout(() => { - sendMessage(message, showModal, false); // Retry sending the message without troubleshooting - }, 1000); + sendMessage(message, showModal, false) // Retry sending the message without troubleshooting + }, 1000) } - }); + }) } - toast.error(`Error sending message: ${errorMessage}`); - console.error("Error sending message:", error); + toast.error(`Error sending message: ${errorMessage}`) + console.error('Error sending message:', error) } finally { - updateLoadingModel(false); - updateStreamingContent(undefined); + updateLoadingModel(false) + updateStreamingContent(undefined) } }, [ @@ -368,8 +368,8 @@ export const useChat = () => { showApprovalModal, updateTokenSpeed, increaseModelContextSize, - ], - ); + ] + ) - return { sendMessage }; -}; + return { sendMessage } +} From b93f8afda2205f5b7c8f88cfda1ae9a74cd511c5 Mon Sep 17 00:00:00 2001 From: LazyYuuki Date: Sun, 15 Jun 2025 19:05:11 +0800 Subject: [PATCH 15/48] =?UTF-8?q?=F0=9F=90=9B=20fix:=20exclude=20deleted?= =?UTF-8?q?=20field?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/containers/ThreadContent.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx index 40c26993b..66205248a 100644 --- a/web-app/src/containers/ThreadContent.tsx +++ b/web-app/src/containers/ThreadContent.tsx @@ -450,7 +450,6 @@ export const ThreadContent = memo( )}
From de5ddaac6d8879c7bd57b6ecda98052b0f7da617 Mon Sep 17 00:00:00 2001 From: LazyYuuki Date: Sun, 15 Jun 2025 19:12:19 +0800 Subject: [PATCH 16/48] =?UTF-8?q?=F0=9F=A7=B9=20clean:=20all=20the=20misse?= =?UTF-8?q?d=20console.log?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/hooks/useAppState.ts | 1 - web-app/src/hooks/useMessages.ts | 41 ++++++++++++++++---------------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/web-app/src/hooks/useAppState.ts b/web-app/src/hooks/useAppState.ts index e3fa1753b..dc29f7f8a 100644 --- a/web-app/src/hooks/useAppState.ts +++ b/web-app/src/hooks/useAppState.ts @@ -47,7 +47,6 @@ export const useAppState = create()((set) => ({ } : undefined, })) - console.log(useAppState.getState().streamingContent) }, updateCurrentToolCall: (toolCall) => { set(() => ({ diff --git a/web-app/src/hooks/useMessages.ts b/web-app/src/hooks/useMessages.ts index 251d67438..3a83b5a48 100644 --- a/web-app/src/hooks/useMessages.ts +++ b/web-app/src/hooks/useMessages.ts @@ -1,23 +1,23 @@ -import { create } from "zustand"; -import { ThreadMessage } from "@janhq/core"; +import { create } from 'zustand' +import { ThreadMessage } from '@janhq/core' import { createMessage, deleteMessage as deleteMessageExt, -} from "@/services/messages"; -import { useAssistant } from "./useAssistant"; +} from '@/services/messages' +import { useAssistant } from './useAssistant' type MessageState = { - messages: Record; - getMessages: (threadId: string) => ThreadMessage[]; - setMessages: (threadId: string, messages: ThreadMessage[]) => void; - addMessage: (message: ThreadMessage) => void; - deleteMessage: (threadId: string, messageId: string) => void; -}; + messages: Record + getMessages: (threadId: string) => ThreadMessage[] + setMessages: (threadId: string, messages: ThreadMessage[]) => void + addMessage: (message: ThreadMessage) => void + deleteMessage: (threadId: string, messageId: string) => void +} export const useMessages = create()((set, get) => ({ messages: {}, getMessages: (threadId) => { - return get().messages[threadId] || []; + return get().messages[threadId] || [] }, setMessages: (threadId, messages) => { set((state) => ({ @@ -25,11 +25,10 @@ export const useMessages = create()((set, get) => ({ ...state.messages, [threadId]: messages, }, - })); + })) }, addMessage: (message) => { - console.log("addMessage: ", message); - const currentAssistant = useAssistant.getState().currentAssistant; + const currentAssistant = useAssistant.getState().currentAssistant const newMessage = { ...message, created_at: message.created_at || Date.now(), @@ -37,7 +36,7 @@ export const useMessages = create()((set, get) => ({ ...message.metadata, assistant: currentAssistant, }, - }; + } createMessage(newMessage).then((createdMessage) => { set((state) => ({ messages: { @@ -47,19 +46,19 @@ export const useMessages = create()((set, get) => ({ createdMessage, ], }, - })); - }); + })) + }) }, deleteMessage: (threadId, messageId) => { - deleteMessageExt(threadId, messageId); + deleteMessageExt(threadId, messageId) set((state) => ({ messages: { ...state.messages, [threadId]: state.messages[threadId]?.filter( - (message) => message.id !== messageId, + (message) => message.id !== messageId ) || [], }, - })); + })) }, -})); +})) From e20c801ff0904ea1763a742c8f4f370643d4c374 Mon Sep 17 00:00:00 2001 From: Louis Date: Sun, 15 Jun 2025 18:20:17 +0700 Subject: [PATCH 17/48] =?UTF-8?q?=E2=9C=A8enhancement:=20out=20of=20contex?= =?UTF-8?q?t=20troubleshooting=20(#5275)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨enhancement: out of context troubleshooting * 🔧refactor: clean up --- .../containers/dialogs/OutOfContextDialog.tsx | 27 +- web-app/src/hooks/useChat.ts | 232 ++++++++++++------ .../settings/providers/$providerName.tsx | 3 + web-app/src/services/models.ts | 4 +- 4 files changed, 188 insertions(+), 78 deletions(-) diff --git a/web-app/src/containers/dialogs/OutOfContextDialog.tsx b/web-app/src/containers/dialogs/OutOfContextDialog.tsx index fb01d7907..92e72950a 100644 --- a/web-app/src/containers/dialogs/OutOfContextDialog.tsx +++ b/web-app/src/containers/dialogs/OutOfContextDialog.tsx @@ -14,7 +14,9 @@ import { Button } from '@/components/ui/button' export function useOutOfContextPromiseModal() { const [isOpen, setIsOpen] = useState(false) const [modalProps, setModalProps] = useState<{ - resolveRef: ((value: unknown) => void) | null + resolveRef: + | ((value: 'ctx_len' | 'context_shift' | undefined) => void) + | null }>({ resolveRef: null, }) @@ -33,17 +35,23 @@ export function useOutOfContextPromiseModal() { return null } - const handleConfirm = () => { + const handleContextLength = () => { setIsOpen(false) if (modalProps.resolveRef) { - modalProps.resolveRef(true) + modalProps.resolveRef('ctx_len') } } + const handleContextShift = () => { + setIsOpen(false) + if (modalProps.resolveRef) { + modalProps.resolveRef('context_shift') + } + } const handleCancel = () => { setIsOpen(false) if (modalProps.resolveRef) { - modalProps.resolveRef(false) + modalProps.resolveRef(undefined) } } @@ -64,7 +72,7 @@ export function useOutOfContextPromiseModal() { {t( 'outOfContextError.description', - 'This chat is reaching the AI’s memory limit, like a whiteboard filling up. We can expand the memory window (called context size) so it remembers more, but it may use more of your computer’s memory.' + 'This chat is reaching the AI’s memory limit, like a whiteboard filling up. We can expand the memory window (called context size) so it remembers more, but it may use more of your computer’s memory. We can also truncate the input, which means it will forget some of the chat history to make room for new messages.' )}

@@ -77,14 +85,17 @@ export function useOutOfContextPromiseModal() {
{streamingContent ? ( diff --git a/web-app/src/containers/ChatWidthSwitcher.tsx b/web-app/src/containers/ChatWidthSwitcher.tsx new file mode 100644 index 000000000..ddaf4d4fe --- /dev/null +++ b/web-app/src/containers/ChatWidthSwitcher.tsx @@ -0,0 +1,61 @@ +import { Skeleton } from '@/components/ui/skeleton' +import { useAppearance } from '@/hooks/useAppearance' +import { cn } from '@/lib/utils' +import { IconCircleCheckFilled } from '@tabler/icons-react' + +export function ChatWidthSwitcher() { + const { chatWidth, setChatWidth } = useAppearance() + + return ( +
+ + +
+ ) +} diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx index 66205248a..af359d32e 100644 --- a/web-app/src/containers/ThreadContent.tsx +++ b/web-app/src/containers/ThreadContent.tsx @@ -359,97 +359,98 @@ export const ThreadContent = memo( {!isToolCalls && (
-
- - - - - - -

Delete

-
-
- - - - -
- -
-
- -

Metadata

-
-
-
- - - Message Metadata -
-
- -
-
- - - - - -
-
-
- - {item.isLastMessage && ( +
+
+ -

Regenerate

+

Delete

- )} + + + + +
+ +
+
+ +

Metadata

+
+
+
+ + + Message Metadata +
+
+ +
+
+ + + + + +
+
+
+ + {item.isLastMessage && ( + + + + + +

Regenerate

+
+
+ )} +
diff --git a/web-app/src/containers/TokenSpeedIndicator.tsx b/web-app/src/containers/TokenSpeedIndicator.tsx index b1dfb841c..5309d890c 100644 --- a/web-app/src/containers/TokenSpeedIndicator.tsx +++ b/web-app/src/containers/TokenSpeedIndicator.tsx @@ -1,19 +1,28 @@ -import { IconBrandSpeedtest } from '@tabler/icons-react' +import { useAppState } from '@/hooks/useAppState' +import { Gauge } from 'lucide-react' interface TokenSpeedIndicatorProps { metadata?: Record + streaming?: boolean } export const TokenSpeedIndicator = ({ - metadata + metadata, + streaming, }: TokenSpeedIndicatorProps) => { - const persistedTokenSpeed = (metadata?.tokenSpeed as { tokenSpeed: number })?.tokenSpeed + const { tokenSpeed } = useAppState() + const persistedTokenSpeed = (metadata?.tokenSpeed as { tokenSpeed: number }) + ?.tokenSpeed return (
- + + - {Math.round(persistedTokenSpeed)} tokens/sec + {Math.round( + streaming ? Number(tokenSpeed?.tokenSpeed) : persistedTokenSpeed + )} +  tokens/sec
) diff --git a/web-app/src/containers/dialogs/AppUpdater.tsx b/web-app/src/containers/dialogs/AppUpdater.tsx index 02a84accb..a17876a15 100644 --- a/web-app/src/containers/dialogs/AppUpdater.tsx +++ b/web-app/src/containers/dialogs/AppUpdater.tsx @@ -22,8 +22,8 @@ const DialogAppUpdater = () => { setRemindMeLater(true) } - const beta = VERSION.includes('beta') const nightly = VERSION.includes('-') + const beta = VERSION.includes('beta') const { release, fetchLatestRelease } = useReleaseNotes() diff --git a/web-app/src/hooks/useAppearance.ts b/web-app/src/hooks/useAppearance.ts index 60340e542..51a1ce10e 100644 --- a/web-app/src/hooks/useAppearance.ts +++ b/web-app/src/hooks/useAppearance.ts @@ -6,8 +6,10 @@ import { rgb, oklch, formatCss } from 'culori' import { useTheme } from './useTheme' export type FontSize = '14px' | '15px' | '16px' | '18px' +export type ChatWidth = 'full' | 'compact' interface AppearanceState { + chatWidth: ChatWidth fontSize: FontSize appBgColor: RgbaColor appMainViewBgColor: RgbaColor @@ -19,6 +21,7 @@ interface AppearanceState { appAccentTextColor: string appDestructiveTextColor: string appLeftPanelTextColor: string + setChatWidth: (size: ChatWidth) => void setFontSize: (size: FontSize) => void setAppBgColor: (color: RgbaColor) => void setAppMainViewBgColor: (color: RgbaColor) => void @@ -129,6 +132,7 @@ export const useAppearance = create()( persist( (set) => { return { + chatWidth: 'compact', fontSize: defaultFontSize, appBgColor: defaultAppBgColor, appMainViewBgColor: defaultAppMainViewBgColor, @@ -270,6 +274,10 @@ export const useAppearance = create()( }) }, + setChatWidth: (value: ChatWidth) => { + set({ chatWidth: value }) + }, + setFontSize: (size: FontSize) => { // Update CSS variable document.documentElement.style.setProperty('--font-size-base', size) diff --git a/web-app/src/routes/settings/appearance.tsx b/web-app/src/routes/settings/appearance.tsx index d59abb9b3..21b99c73e 100644 --- a/web-app/src/routes/settings/appearance.tsx +++ b/web-app/src/routes/settings/appearance.tsx @@ -18,6 +18,7 @@ import CodeBlockStyleSwitcher from '@/containers/CodeBlockStyleSwitcher' import { LineNumbersSwitcher } from '@/containers/LineNumbersSwitcher' import { CodeBlockExample } from '@/containers/CodeBlockExample' import { toast } from 'sonner' +import { ChatWidthSwitcher } from '@/containers/ChatWidthSwitcher' // eslint-disable-next-line @typescript-eslint/no-explicit-any export const Route = createFileRoute(route.settings.appearance as any)({ @@ -98,6 +99,15 @@ function Appareances() { /> + {/* Chat Message */} + + + + + {/* Codeblock */} ({ @@ -213,7 +213,12 @@ function ThreadDetail() { 'flex flex-col h-full w-full overflow-auto px-4 pt-4 pb-3' )} > -
+
{messages && messages.map((item, index) => { // Only pass isLastMessage to the last message in the array @@ -247,7 +252,12 @@ function ThreadDetail() {
-
+
Date: Mon, 16 Jun 2025 16:28:54 +0700 Subject: [PATCH 19/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20optional=20additional?= =?UTF-8?q?=5Finformation=20gpu=20(#5291)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/routes/settings/hardware.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web-app/src/routes/settings/hardware.tsx b/web-app/src/routes/settings/hardware.tsx index 3b35ddc65..dcd301773 100644 --- a/web-app/src/routes/settings/hardware.tsx +++ b/web-app/src/routes/settings/hardware.tsx @@ -97,7 +97,7 @@ function SortableGPUItem({ gpu, index }: { gpu: GPU; index: number }) { title="Driver Version" actions={ - {gpu.additional_information.driver_version} + {gpu.additional_information?.driver_version} } /> @@ -105,7 +105,7 @@ function SortableGPUItem({ gpu, index }: { gpu: GPU; index: number }) { title="Compute Capability" actions={ - {gpu.additional_information.compute_cap} + {gpu.additional_information?.compute_cap} } /> From 9b1f206cc63ec8e472661ccf9bcf85a962d8c899 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Mon, 16 Jun 2025 17:14:38 +0700 Subject: [PATCH 20/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20showing=20release=20no?= =?UTF-8?q?tes=20for=20beta=20and=20prod=20(#5292)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🐛fix: showing release notes for beta and prod * ♻️refactor: make an utils env * ♻️refactor: hide MCP for production * ♻️refactor: simplify the boolean expression fetch release note --- web-app/src/containers/SettingsMenu.tsx | 14 ++++++++++---- web-app/src/containers/dialogs/AppUpdater.tsx | 16 +++++++--------- web-app/src/lib/version.ts | 5 +++++ 3 files changed, 22 insertions(+), 13 deletions(-) create mode 100644 web-app/src/lib/version.ts diff --git a/web-app/src/containers/SettingsMenu.tsx b/web-app/src/containers/SettingsMenu.tsx index 4494aa44c..c23ed6acf 100644 --- a/web-app/src/containers/SettingsMenu.tsx +++ b/web-app/src/containers/SettingsMenu.tsx @@ -2,6 +2,7 @@ import { Link, useMatches } from '@tanstack/react-router' import { route } from '@/constants/routes' import { useTranslation } from 'react-i18next' import { useModelProvider } from '@/hooks/useModelProvider' +import { isProd } from '@/lib/version' const menuSettings = [ { @@ -24,10 +25,15 @@ const menuSettings = [ title: 'Hardware', route: route.settings.hardware, }, - { - title: 'MCP Servers', - route: route.settings.mcp_servers, - }, + // Only show MCP Servers in non-production environment + ...(!isProd + ? [ + { + title: 'MCP Servers', + route: route.settings.mcp_servers, + }, + ] + : []), { title: 'Local API Server', route: route.settings.local_api_server, diff --git a/web-app/src/containers/dialogs/AppUpdater.tsx b/web-app/src/containers/dialogs/AppUpdater.tsx index a17876a15..313937c34 100644 --- a/web-app/src/containers/dialogs/AppUpdater.tsx +++ b/web-app/src/containers/dialogs/AppUpdater.tsx @@ -7,6 +7,7 @@ import { useState, useEffect } from 'react' import { useReleaseNotes } from '@/hooks/useReleaseNotes' import { RenderMarkdown } from '../RenderMarkdown' import { cn, isDev } from '@/lib/utils' +import { isNightly, isBeta } from '@/lib/version' const DialogAppUpdater = () => { const { @@ -22,16 +23,13 @@ const DialogAppUpdater = () => { setRemindMeLater(true) } - const nightly = VERSION.includes('-') - const beta = VERSION.includes('beta') - const { release, fetchLatestRelease } = useReleaseNotes() useEffect(() => { if (!isDev()) { - fetchLatestRelease(beta ? true : false) + fetchLatestRelease(isBeta) } - }, [beta, fetchLatestRelease]) + }, [fetchLatestRelease]) // Check for updates when component mounts useEffect(() => { @@ -71,7 +69,7 @@ const DialogAppUpdater = () => {
New Version: Jan {updateState.updateInfo?.version}
-
+
There's a new app update available to download.
@@ -79,9 +77,9 @@ const DialogAppUpdater = () => {
{showReleaseNotes && ( -
- {nightly ? ( -

+

+ {isNightly && !isBeta ? ( +

You are using a nightly build. This version is built from the latest development branch and may not have release notes. diff --git a/web-app/src/lib/version.ts b/web-app/src/lib/version.ts new file mode 100644 index 000000000..f8e7df5b0 --- /dev/null +++ b/web-app/src/lib/version.ts @@ -0,0 +1,5 @@ +import { isDev } from './utils' + +export const isNightly = VERSION.includes('-') +export const isBeta = VERSION.includes('beta') +export const isProd = !isNightly && !isBeta && !isDev From 41814547995c319d069f36ed139e95b79eb1f775 Mon Sep 17 00:00:00 2001 From: Louis Date: Mon, 16 Jun 2025 18:45:26 +0700 Subject: [PATCH 21/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20typo=20in=20build=20ty?= =?UTF-8?q?pe=20check=20(#5297)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/lib/version.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web-app/src/lib/version.ts b/web-app/src/lib/version.ts index f8e7df5b0..fbbe217b3 100644 --- a/web-app/src/lib/version.ts +++ b/web-app/src/lib/version.ts @@ -2,4 +2,4 @@ import { isDev } from './utils' export const isNightly = VERSION.includes('-') export const isBeta = VERSION.includes('beta') -export const isProd = !isNightly && !isBeta && !isDev +export const isProd = !isNightly && !isBeta && !isDev() From 4dfc9f604a5a2dc6229d4bede83cee8f72ede6d1 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Mon, 16 Jun 2025 20:53:48 +0700 Subject: [PATCH 22/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20remove=20onboarding=20?= =?UTF-8?q?local=20model=20and=20hide=20the=20edit=20capabilities=20model?= =?UTF-8?q?=20(#5301)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🐛fix: remove onboarding local model and hide the edit capabilities model * ♻️refactor: conditional search params setup screen --- web-app/src/containers/SetupScreen.tsx | 5 +++-- .../src/routes/settings/providers/$providerName.tsx | 11 +++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/web-app/src/containers/SetupScreen.tsx b/web-app/src/containers/SetupScreen.tsx index 6f3f07873..cf8e32c84 100644 --- a/web-app/src/containers/SetupScreen.tsx +++ b/web-app/src/containers/SetupScreen.tsx @@ -3,6 +3,7 @@ import { useModelProvider } from '@/hooks/useModelProvider' import { Link } from '@tanstack/react-router' import { route } from '@/constants/routes' import HeaderPage from './HeaderPage' +import { isProd } from '@/lib/version' function SetupScreen() { const { providers } = useModelProvider() @@ -19,7 +20,7 @@ function SetupScreen() { Welcome to Jan

- To get started, you’ll need to either download a local AI model or + To get started, you'll need to either download a local AI model or connect to a cloud model using an API key

@@ -29,7 +30,7 @@ function SetupScreen() {
diff --git a/web-app/src/routes/settings/providers/$providerName.tsx b/web-app/src/routes/settings/providers/$providerName.tsx index 7ed4e3969..3d8f67af5 100644 --- a/web-app/src/routes/settings/providers/$providerName.tsx +++ b/web-app/src/routes/settings/providers/$providerName.tsx @@ -39,6 +39,7 @@ import { toast } from 'sonner' import { ActiveModel } from '@/types/models' import { useEffect, useState } from 'react' import { predefinedProviders } from '@/mock/data' +import { isProd } from '@/lib/version' // as route.threadsDetail export const Route = createFileRoute('/settings/providers/$providerName')({ @@ -459,10 +460,12 @@ function ProviderDetail() { } actions={
- + {!isProd && ( + + )} {model.settings && ( Date: Mon, 16 Jun 2025 21:50:23 +0700 Subject: [PATCH 23/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20hide=20token=20speed?= =?UTF-8?q?=20when=20assistant=20params=20stream=20false=20(#5302)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/containers/ThreadContent.tsx | 11 ----------- web-app/src/containers/TokenSpeedIndicator.tsx | 11 ++++++++++- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx index af359d32e..3079f3833 100644 --- a/web-app/src/containers/ThreadContent.tsx +++ b/web-app/src/containers/ThreadContent.tsx @@ -417,17 +417,6 @@ export const ThreadContent = memo( />
- - - - - diff --git a/web-app/src/containers/TokenSpeedIndicator.tsx b/web-app/src/containers/TokenSpeedIndicator.tsx index 5309d890c..8ca08d707 100644 --- a/web-app/src/containers/TokenSpeedIndicator.tsx +++ b/web-app/src/containers/TokenSpeedIndicator.tsx @@ -14,10 +14,19 @@ export const TokenSpeedIndicator = ({ const persistedTokenSpeed = (metadata?.tokenSpeed as { tokenSpeed: number }) ?.tokenSpeed + const nonStreamingAssistantParam = + typeof metadata?.assistant === 'object' && + metadata?.assistant !== null && + 'parameters' in metadata.assistant + ? (metadata.assistant as { parameters?: { stream?: boolean } }).parameters + ?.stream === false + : undefined + + if (nonStreamingAssistantParam) return + return (
- {Math.round( streaming ? Number(tokenSpeed?.tokenSpeed) : persistedTokenSpeed From a745d24fbe7d42aaba87c407f05288567de3db90 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Tue, 17 Jun 2025 10:00:55 +0700 Subject: [PATCH 24/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20glitch=20padding=20spe?= =?UTF-8?q?ed=20token=20(#5307)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/containers/ThreadContent.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx index 3079f3833..9e491bf55 100644 --- a/web-app/src/containers/ThreadContent.tsx +++ b/web-app/src/containers/ThreadContent.tsx @@ -358,7 +358,7 @@ export const ThreadContent = memo( ) : null} {!isToolCalls && ( -
+
Date: Tue, 17 Jun 2025 12:46:23 +0700 Subject: [PATCH 25/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20immediately=20show=20d?= =?UTF-8?q?ownload=20progress=20(#5308)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/containers/DownloadManegement.tsx | 69 +++++++++++++------ web-app/src/hooks/useDownloadStore.ts | 18 +++++ web-app/src/routes/hub.tsx | 30 ++++++-- 3 files changed, 90 insertions(+), 27 deletions(-) diff --git a/web-app/src/containers/DownloadManegement.tsx b/web-app/src/containers/DownloadManegement.tsx index 47b448485..db78181c6 100644 --- a/web-app/src/containers/DownloadManegement.tsx +++ b/web-app/src/containers/DownloadManegement.tsx @@ -19,7 +19,13 @@ export function DownloadManagement() { const { setProviders } = useModelProvider() const { open: isLeftPanelOpen } = useLeftPanel() const [isPopoverOpen, setIsPopoverOpen] = useState(false) - const { downloads, updateProgress, removeDownload } = useDownloadStore() + const { + downloads, + updateProgress, + localDownloadingModels, + removeDownload, + removeLocalDownloadingModel, + } = useDownloadStore() const { updateState } = useAppUpdater() const [appUpdateState, setAppUpdateState] = useState({ @@ -76,23 +82,36 @@ export function DownloadManagement() { }) }, []) + const downloadProcesses = useMemo(() => { + // Get downloads with progress data + const downloadsWithProgress = Object.values(downloads).map((download) => ({ + id: download.name, + name: download.name, + progress: download.progress, + current: download.current, + total: download.total, + })) + + // Add local downloading models that don't have progress data yet + const localDownloadsWithoutProgress = Array.from(localDownloadingModels) + .filter((modelId) => !downloads[modelId]) // Only include models not in downloads + .map((modelId) => ({ + id: modelId, + name: modelId, + progress: 0, + current: 0, + total: 0, + })) + + return [...downloadsWithProgress, ...localDownloadsWithoutProgress] + }, [downloads, localDownloadingModels]) + const downloadCount = useMemo(() => { - const modelDownloads = Object.keys(downloads).length + const modelDownloads = downloadProcesses.length const appUpdateDownload = appUpdateState.isDownloading ? 1 : 0 const total = modelDownloads + appUpdateDownload return total - }, [downloads, appUpdateState.isDownloading]) - const downloadProcesses = useMemo( - () => - Object.values(downloads).map((download) => ({ - id: download.name, - name: download.name, - progress: download.progress, - current: download.current, - total: download.total, - })), - [downloads] - ) + }, [downloadProcesses, appUpdateState.isDownloading]) const overallProgress = useMemo(() => { const modelTotal = downloadProcesses.reduce((acc, download) => { @@ -139,29 +158,32 @@ export function DownloadManagement() { (state: DownloadState) => { console.debug('onFileDownloadError', state) removeDownload(state.modelId) + removeLocalDownloadingModel(state.modelId) }, - [removeDownload] + [removeDownload, removeLocalDownloadingModel] ) const onFileDownloadStopped = useCallback( (state: DownloadState) => { console.debug('onFileDownloadError', state) removeDownload(state.modelId) + removeLocalDownloadingModel(state.modelId) }, - [removeDownload] + [removeDownload, removeLocalDownloadingModel] ) const onFileDownloadSuccess = useCallback( async (state: DownloadState) => { console.debug('onFileDownloadSuccess', state) removeDownload(state.modelId) + removeLocalDownloadingModel(state.modelId) getProviders().then(setProviders) toast.success('Download Complete', { id: 'download-complete', description: `The model ${state.modelId} has been downloaded`, }) }, - [removeDownload, setProviders] + [removeDownload, removeLocalDownloadingModel, setProviders] ) useEffect(() => { @@ -264,12 +286,16 @@ export function DownloadManagement() { />

{`${renderGB(appUpdateState.downloadedBytes)} / ${renderGB(appUpdateState.totalBytes)}`}{' '} - GB ({Math.round(appUpdateState.downloadProgress * 100)}%) + GB ({Math.round(appUpdateState.downloadProgress * 100)} + %)

)} {downloadProcesses.map((download) => ( -
+

{download.name} @@ -299,8 +325,9 @@ export function DownloadManagement() { className="my-2" />

- {`${renderGB(download.current)} / ${renderGB(download.total)}`}{' '} - GB ({Math.round(download.progress * 100)}%) + {download.total > 0 + ? `${renderGB(download.current)} / ${renderGB(download.total)} GB (${Math.round(download.progress * 100)}%)` + : 'Initializing download...'}

))} diff --git a/web-app/src/hooks/useDownloadStore.ts b/web-app/src/hooks/useDownloadStore.ts index 8a0e6ac19..48a5a347c 100644 --- a/web-app/src/hooks/useDownloadStore.ts +++ b/web-app/src/hooks/useDownloadStore.ts @@ -11,6 +11,7 @@ export interface DownloadProgressProps { // Zustand store for thinking block state export type DownloadState = { downloads: { [id: string]: DownloadProgressProps } + localDownloadingModels: Set removeDownload: (id: string) => void updateProgress: ( id: string, @@ -19,6 +20,8 @@ export type DownloadState = { current?: number, total?: number ) => void + addLocalDownloadingModel: (modelId: string) => void + removeLocalDownloadingModel: (modelId: string) => void } /** @@ -26,6 +29,7 @@ export type DownloadState = { */ export const useDownloadStore = create((set) => ({ downloads: {}, + localDownloadingModels: new Set(), removeDownload: (id: string) => set((state) => { // eslint-disable-next-line @typescript-eslint/no-unused-vars @@ -46,4 +50,18 @@ export const useDownloadStore = create((set) => ({ }, }, })), + + addLocalDownloadingModel: (modelId: string) => + set((state) => ({ + localDownloadingModels: new Set(state.localDownloadingModels).add( + modelId + ), + })), + + removeLocalDownloadingModel: (modelId: string) => + set((state) => { + const newSet = new Set(state.localDownloadingModels) + newSet.delete(modelId) + return { localDownloadingModels: newSet } + }), })) diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx index c4327fb04..42147d0f8 100644 --- a/web-app/src/routes/hub.tsx +++ b/web-app/src/routes/hub.tsx @@ -182,7 +182,8 @@ function Hub() { } } - const { downloads } = useDownloadStore() + const { downloads, localDownloadingModels, addLocalDownloadingModel } = + useDownloadStore() const downloadProcesses = useMemo( () => @@ -225,7 +226,9 @@ function Hub() { model.models.find((e) => defaultModelQuantizations.some((m) => e.id.toLowerCase().includes(m)) )?.id ?? model.models[0]?.id - const isDownloading = downloadProcesses.some((e) => e.id === modelId) + const isDownloading = + localDownloadingModels.has(modelId) || + downloadProcesses.some((e) => e.id === modelId) const downloadProgress = downloadProcesses.find((e) => e.id === modelId)?.progress || 0 const isDownloaded = llamaProvider?.models.some( @@ -233,6 +236,12 @@ function Hub() { ) const isRecommended = isRecommendedModel(model.metadata?.id) + const handleDownload = () => { + // Immediately set local downloading state + addLocalDownloadingModel(modelId) + downloadModel(modelId) + } + return (
downloadModel(modelId)} + onClick={handleDownload} className={cn(isDownloading && 'hidden')} ref={isRecommended ? downloadButtonRef : undefined} > @@ -271,6 +280,8 @@ function Hub() { handleUseModel, isRecommendedModel, downloadButtonRef, + localDownloadingModels, + addLocalDownloadingModel, ]) const { step } = useSearch({ from: Route.id }) @@ -320,7 +331,8 @@ function Hub() { } // Check if any model is currently downloading - const isDownloading = downloadProcesses.length > 0 + const isDownloading = + localDownloadingModels.size > 0 || downloadProcesses.length > 0 const steps = [ { @@ -553,6 +565,9 @@ function Hub() {

{(() => { const isDownloading = + localDownloadingModels.has( + variant.id + ) || downloadProcesses.some( (e) => e.id === variant.id ) @@ -607,9 +622,12 @@ function Hub() {
+ onClick={() => { + addLocalDownloadingModel( + variant.id + ) downloadModel(variant.id) - } + }} > Date: Tue, 17 Jun 2025 13:15:40 +0700 Subject: [PATCH 26/48] =?UTF-8?q?=F0=9F=90=9Bfix:safely=20convert=20values?= =?UTF-8?q?=20to=20numbers=20and=20handle=20NaN=20cases=20(#5309)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/containers/TokenSpeedIndicator.tsx | 3 ++- web-app/src/utils/number.ts | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 web-app/src/utils/number.ts diff --git a/web-app/src/containers/TokenSpeedIndicator.tsx b/web-app/src/containers/TokenSpeedIndicator.tsx index 8ca08d707..74f277f70 100644 --- a/web-app/src/containers/TokenSpeedIndicator.tsx +++ b/web-app/src/containers/TokenSpeedIndicator.tsx @@ -1,4 +1,5 @@ import { useAppState } from '@/hooks/useAppState' +import { toNumber } from '@/utils/number' import { Gauge } from 'lucide-react' interface TokenSpeedIndicatorProps { @@ -29,7 +30,7 @@ export const TokenSpeedIndicator = ({ {Math.round( - streaming ? Number(tokenSpeed?.tokenSpeed) : persistedTokenSpeed + streaming ? toNumber(tokenSpeed?.tokenSpeed) : persistedTokenSpeed )}  tokens/sec diff --git a/web-app/src/utils/number.ts b/web-app/src/utils/number.ts new file mode 100644 index 000000000..866755ed6 --- /dev/null +++ b/web-app/src/utils/number.ts @@ -0,0 +1,4 @@ +export const toNumber = (value: unknown): number => { + const num = Number(value) + return isNaN(num) ? 0 : num +} From 50c25b65b66ebb2020e880c8999f847ce368243a Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 17 Jun 2025 13:35:53 +0700 Subject: [PATCH 27/48] chore: correct binary name for stable version (#5303) (#5311) Co-authored-by: hiento09 <136591877+hiento09@users.noreply.github.com> --- .github/workflows/template-tauri-build-windows-x64.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/template-tauri-build-windows-x64.yml b/.github/workflows/template-tauri-build-windows-x64.yml index 47b5663cb..d17546955 100644 --- a/.github/workflows/template-tauri-build-windows-x64.yml +++ b/.github/workflows/template-tauri-build-windows-x64.yml @@ -160,6 +160,9 @@ jobs: sed -i "s/jan_productname/Jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template sed -i "s/jan_mainbinaryname/jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template + else + sed -i "s/jan_productname/Jan/g" ./src-tauri/tauri.bundle.windows.nsis.template + sed -i "s/jan_mainbinaryname/jan/g" ./src-tauri/tauri.bundle.windows.nsis.template fi echo "---------nsis.template---------" cat ./src-tauri/tauri.bundle.windows.nsis.template From 2899d58ad788d787cd6dcbb8970ab522757cacd1 Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 17 Jun 2025 15:30:07 +0700 Subject: [PATCH 28/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20llama.cpp=20default=20?= =?UTF-8?q?NGL=20setting=20does=20not=20offload=20all=20layers=20to=20GPU?= =?UTF-8?q?=20(#5310)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🐛fix: llama.cpp default NGL setting does not offload all layers to GPU * chore: cover more cases * chore: clean up * fix: should not show GPU section on Mac --- core/src/browser/models/utils.ts | 2 +- .../inference-cortex-extension/src/index.ts | 6 ++- web-app/src/routes/settings/hardware.tsx | 50 ++++++++++--------- 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/core/src/browser/models/utils.ts b/core/src/browser/models/utils.ts index 0e52441b2..2ac243b6a 100644 --- a/core/src/browser/models/utils.ts +++ b/core/src/browser/models/utils.ts @@ -17,7 +17,7 @@ export const validationRules: { [key: string]: (value: any) => boolean } = { presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1, ctx_len: (value: any) => Number.isInteger(value) && value >= 0, - ngl: (value: any) => Number.isInteger(value) && value >= 0, + ngl: (value: any) => Number.isInteger(value), embedding: (value: any) => typeof value === 'boolean', n_parallel: (value: any) => Number.isInteger(value) && value >= 0, cpu_threads: (value: any) => Number.isInteger(value) && value >= 0, diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index 3e8b60ebe..d80bad3d4 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -253,11 +253,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { } } } + const modelSettings = extractModelLoadParams(model.settings) return await this.apiInstance().then((api) => api .post('v1/models/start', { json: { - ...extractModelLoadParams(model.settings), + ...modelSettings, model: model.id, engine: model.engine === 'nitro' // Legacy model cache @@ -282,6 +283,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { ...(this.context_shift === false ? { 'no-context-shift': true } : {}), + ...(modelSettings.ngl === -1 || modelSettings.ngl === undefined + ? { ngl: 100 } + : {}), }, timeout: false, signal, diff --git a/web-app/src/routes/settings/hardware.tsx b/web-app/src/routes/settings/hardware.tsx index dcd301773..53de1f9b3 100644 --- a/web-app/src/routes/settings/hardware.tsx +++ b/web-app/src/routes/settings/hardware.tsx @@ -371,30 +371,34 @@ function Hardware() { )} {/* GPU Information */} - - {hardwareData.gpus.length > 0 ? ( - - gpu.id)} - strategy={verticalListSortingStrategy} + {!IS_MACOS ? ( + + {hardwareData.gpus.length > 0 ? ( + - {hardwareData.gpus.map((gpu, index) => ( - - ))} - - - ) : ( - } /> - )} - + gpu.id)} + strategy={verticalListSortingStrategy} + > + {hardwareData.gpus.map((gpu, index) => ( + + ))} + + + ) : ( + } /> + )} + + ) : ( + <> + )}
From 86079074d232ef832d134f5b3aad95112c4fe623 Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 17 Jun 2025 17:37:31 +0700 Subject: [PATCH 29/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20update=20default=20ext?= =?UTF-8?q?ension=20settings=20(#5315)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: update default extension settings * chore: hide language setting on Prod --- .../resources/default_settings.json | 2 +- extensions/inference-cortex-extension/src/index.ts | 4 ++-- web-app/src/hooks/useChat.ts | 4 ---- web-app/src/routes/settings/general.tsx | 11 +++++++---- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/extensions/inference-cortex-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json index d27624639..54d578293 100644 --- a/extensions/inference-cortex-extension/resources/default_settings.json +++ b/extensions/inference-cortex-extension/resources/default_settings.json @@ -14,7 +14,7 @@ "description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.", "controllerType": "checkbox", "controllerProps": { - "value": true + "value": false } }, { diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index d80bad3d4..dd78e2d62 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -64,7 +64,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { cpu_threads?: number auto_unload_models: boolean = true reasoning_budget = -1 // Default reasoning budget in seconds - context_shift = true + context_shift = false /** * The URL for making inference requests. */ @@ -132,7 +132,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { this.flash_attn = await this.getSetting(Settings.flash_attn, true) this.context_shift = await this.getSetting( Settings.context_shift, - true + false ) this.use_mmap = await this.getSetting(Settings.use_mmap, true) if (this.caching_enabled) diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts index 4b0e3e6e8..2c8f9fd2a 100644 --- a/web-app/src/hooks/useChat.ts +++ b/web-app/src/hooks/useChat.ts @@ -180,10 +180,6 @@ export const useChat = () => { if (updatedProvider) await restartModel(updatedProvider, model.id, controller) - console.log( - updatedProvider?.models.find((e) => e.id === model.id)?.settings - ?.ctx_len?.controller_props.value - ) return updatedProvider }, [getProviderByName, restartModel, updateProvider] diff --git a/web-app/src/routes/settings/general.tsx b/web-app/src/routes/settings/general.tsx index 572d7f916..33eafc341 100644 --- a/web-app/src/routes/settings/general.tsx +++ b/web-app/src/routes/settings/general.tsx @@ -45,6 +45,7 @@ import { isDev } from '@/lib/utils' import { emit } from '@tauri-apps/api/event' import { stopAllModels } from '@/services/models' import { SystemEvent } from '@/types/events' +import { isProd } from '@/lib/version' // eslint-disable-next-line @typescript-eslint/no-explicit-any export const Route = createFileRoute(route.settings.general as any)({ @@ -234,10 +235,12 @@ function General() { } /> - } - /> + {!isProd && ( + } + /> + )} {/* Data folder */} From 11d767633e1340c040499930d9ba07218237771a Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Tue, 17 Jun 2025 18:35:02 +0700 Subject: [PATCH 30/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20allow=20script=20posth?= =?UTF-8?q?og=20(#5316)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-tauri/tauri.conf.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index 1810c0661..04191e842 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -45,7 +45,7 @@ ], "img-src": "'self' asset: http://asset.localhost blob: data: https://cdn.jsdelivr.net", "style-src": "'unsafe-inline' 'self' https://fonts.googleapis.com", - "script-src": "'self' asset: $APPDATA/**.* http://asset.localhost" + "script-src": "'self' asset: $APPDATA/**.* http://asset.localhost https://eu-assets.i.posthog.com https://posthog.com" }, "assetProtocol": { "enable": true, From ebc268018bdf457408eefcfbf0e50c6ab3c5df1a Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 17 Jun 2025 21:08:56 +0700 Subject: [PATCH 31/48] Sync 0.5.18 to 0.6.0 (#5320) * chore: correct binary name for stable version (#5303) * ci: enable devtool on prod build (#5317) * ci: enable devtool on prod build --------- Co-authored-by: hiento09 <136591877+hiento09@users.noreply.github.com> Co-authored-by: Nguyen Ngoc Minh <91668012+Minh141120@users.noreply.github.com> --- .github/workflows/template-tauri-build-linux-x64.yml | 4 ++++ .github/workflows/template-tauri-build-macos.yml | 4 ++++ .github/workflows/template-tauri-build-windows-x64.yml | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/.github/workflows/template-tauri-build-linux-x64.yml b/.github/workflows/template-tauri-build-linux-x64.yml index 6c47c79f2..9356c3f28 100644 --- a/.github/workflows/template-tauri-build-linux-x64.yml +++ b/.github/workflows/template-tauri-build-linux-x64.yml @@ -122,6 +122,10 @@ jobs: jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json mv /tmp/package.json web-app/package.json + # Temporarily enable devtool on prod build + ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools" + cat ./src-tauri/Cargo.toml + ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}" cat ./src-tauri/Cargo.toml diff --git a/.github/workflows/template-tauri-build-macos.yml b/.github/workflows/template-tauri-build-macos.yml index 6999ff77e..086e14ad2 100644 --- a/.github/workflows/template-tauri-build-macos.yml +++ b/.github/workflows/template-tauri-build-macos.yml @@ -126,6 +126,10 @@ jobs: ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}" cat ./src-tauri/Cargo.toml + # Temporarily enable devtool on prod build + ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools" + cat ./src-tauri/Cargo.toml + # Change app name for beta and nightly builds if [ "${{ inputs.channel }}" != "stable" ]; then jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json diff --git a/.github/workflows/template-tauri-build-windows-x64.yml b/.github/workflows/template-tauri-build-windows-x64.yml index d17546955..2ab6d7ad9 100644 --- a/.github/workflows/template-tauri-build-windows-x64.yml +++ b/.github/workflows/template-tauri-build-windows-x64.yml @@ -137,6 +137,10 @@ jobs: sed -i "s/jan_version/$new_base_version/g" ./src-tauri/tauri.bundle.windows.nsis.template sed -i "s/jan_build/$new_build_version/g" ./src-tauri/tauri.bundle.windows.nsis.template + # Temporarily enable devtool on prod build + ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools" + cat ./src-tauri/Cargo.toml + # Change app name for beta and nightly builds if [ "${{ inputs.channel }}" != "stable" ]; then jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json From 52bb54d2d50ceee87e72d6d30cd085bd729c5f04 Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 17 Jun 2025 23:01:22 +0700 Subject: [PATCH 32/48] fix: glitch model download issue (#5322) --- .../model-extension/resources/default.json | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/extensions/model-extension/resources/default.json b/extensions/model-extension/resources/default.json index 32bc278e4..bd7c7e63b 100644 --- a/extensions/model-extension/resources/default.json +++ b/extensions/model-extension/resources/default.json @@ -125,59 +125,59 @@ }, "models": [ { - "id": "Menlo:Jan-nano:jan-nano-4b-iQ4_XS.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-iQ4_XS.gguf", "size": 2270750400 }, { - "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_L.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q3_K_L.gguf", "size": 2239784384 }, { - "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_M.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q3_K_M.gguf", "size": 2075616704 }, { - "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_S.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q3_K_S.gguf", "size": 1886995904 }, { - "id": "Menlo:Jan-nano:jan-nano-4b-Q4_0.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_0.gguf", "size": 2369545664 }, { - "id": "Menlo:Jan-nano:jan-nano-4b-Q4_1.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_1.gguf", "size": 2596627904 }, { - "id": "Menlo:Jan-nano:jan-nano-4b-Q4_K_M.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_K_M.gguf", "size": 2497279424 }, { - "id": "Menlo:Jan-nano:jan-nano-4b-Q4_K_S.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_K_S.gguf", "size": 2383308224 }, { - "id": "Menlo:Jan-nano:jan-nano-4b-Q5_0.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_0.gguf", "size": 2823710144 }, { - "id": "Menlo:Jan-nano:jan-nano-4b-Q5_1.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_1.gguf", "size": 3050792384 }, { - "id": "Menlo:Jan-nano:jan-nano-4b-Q5_K_M.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_K_M.gguf", "size": 2889512384 }, { - "id": "Menlo:Jan-nano:jan-nano-4b-Q5_K_S.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_K_S.gguf", "size": 2823710144 }, { - "id": "Menlo:Jan-nano:jan-nano-4b-Q6_K.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q6_K.gguf", "size": 3306259904 }, { - "id": "Menlo:Jan-nano:jan-nano-4b-Q8_0.gguf", + "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q8_0.gguf", "size": 4280403904 } ] From 3f07358125457a95e3f8cd04d4dee2326a0b7770 Mon Sep 17 00:00:00 2001 From: Nguyen Ngoc Minh <91668012+Minh141120@users.noreply.github.com> Date: Tue, 17 Jun 2025 23:43:15 +0700 Subject: [PATCH 33/48] =?UTF-8?q?=F0=9F=90=9B=20fix(updater):=20terminate?= =?UTF-8?q?=20sidecar=20processes=20before=20update=20to=20avoid=20file=20?= =?UTF-8?q?access=20errors=20(#5325)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-tauri/src/core/setup.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src-tauri/src/core/setup.rs b/src-tauri/src/core/setup.rs index 6883b2a3b..602080378 100644 --- a/src-tauri/src/core/setup.rs +++ b/src-tauri/src/core/setup.rs @@ -291,6 +291,7 @@ pub fn setup_sidecar(app: &App) -> Result<(), String> { } else { log::warn!("Kill event received, but no active sidecar process found to kill."); } + clean_up() }); }); From 1160ea140b4fe6774da849bf24e574b0af69820f Mon Sep 17 00:00:00 2001 From: Sam Hoang Van Date: Wed, 18 Jun 2025 00:23:53 +0700 Subject: [PATCH 34/48] =?UTF-8?q?=F0=9F=90=9B=20fix:=20disable=20sorting?= =?UTF-8?q?=20for=20threads=20in=20SortableItem=20and=20clean=20up=20threa?= =?UTF-8?q?d=20order=20handling=20(#5326)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/containers/ThreadList.tsx | 44 ++------------------------- web-app/src/hooks/useThreads.ts | 35 +-------------------- 2 files changed, 3 insertions(+), 76 deletions(-) diff --git a/web-app/src/containers/ThreadList.tsx b/web-app/src/containers/ThreadList.tsx index 44f8bbfc4..0702c1bf9 100644 --- a/web-app/src/containers/ThreadList.tsx +++ b/web-app/src/containers/ThreadList.tsx @@ -9,7 +9,6 @@ import { import { SortableContext, verticalListSortingStrategy, - arrayMove, useSortable, } from '@dnd-kit/sortable' import { CSS } from '@dnd-kit/utilities' @@ -54,7 +53,7 @@ const SortableItem = memo(({ thread }: { thread: Thread }) => { transform, transition, isDragging, - } = useSortable({ id: thread.id }) + } = useSortable({ id: thread.id, disabled: true }) const style = { transform: CSS.Transform.toString(transform), @@ -263,18 +262,8 @@ type ThreadListProps = { } function ThreadList({ threads }: ThreadListProps) { - const { setThreads } = useThreads() - const sortedThreads = useMemo(() => { return threads.sort((a, b) => { - // If both have order, sort by order (ascending, so lower order comes first) - if (a.order != null && b.order != null) { - return a.order - b.order - } - // If only one has order, prioritize the one with order (order comes first) - if (a.order != null) return -1 - if (b.order != null) return 1 - // If neither has order, sort by updated time (newer threads first) return (b.updated || 0) - (a.updated || 0) }) }, [threads]) @@ -290,36 +279,7 @@ function ThreadList({ threads }: ThreadListProps) { ) return ( - { - const { active, over } = event - if (active.id !== over?.id && over) { - // Access Global State - const allThreadsMap = useThreads.getState().threads - const allThreadsArray = Object.values(allThreadsMap) - - // Calculate Global Indices - const oldIndexInGlobal = allThreadsArray.findIndex( - (t) => t.id === active.id - ) - const newIndexInGlobal = allThreadsArray.findIndex( - (t) => t.id === over.id - ) - - // Reorder Globally and Update State - if (oldIndexInGlobal !== -1 && newIndexInGlobal !== -1) { - const reorderedGlobalThreads = arrayMove( - allThreadsArray, - oldIndexInGlobal, - newIndexInGlobal - ) - setThreads(reorderedGlobalThreads) - } - } - }} - > + t.id)} strategy={verticalListSortingStrategy} diff --git a/web-app/src/hooks/useThreads.ts b/web-app/src/hooks/useThreads.ts index 806749b73..7bc46675e 100644 --- a/web-app/src/hooks/useThreads.ts +++ b/web-app/src/hooks/useThreads.ts @@ -32,11 +32,9 @@ export const useThreads = create()((set, get) => ({ threads: {}, searchIndex: null, setThreads: (threads) => { - threads.forEach((thread, index) => { - thread.order = index + 1 + threads.forEach((thread) => { updateThread({ ...thread, - order: index + 1, }) }) const threadMap = threads.reduce( @@ -159,7 +157,6 @@ export const useThreads = create()((set, get) => ({ id: ulid(), title: title ?? 'New Thread', model, - // order: 1, // Will be set properly by setThreads updated: Date.now() / 1000, assistants: assistant ? [assistant] : [], } @@ -244,44 +241,14 @@ export const useThreads = create()((set, get) => ({ const thread = state.threads[threadId] if (!thread) return state - // If the thread is already at order 1, just update the timestamp - if (thread.order === 1) { - const updatedThread = { - ...thread, - updated: Date.now() / 1000, - } - updateThread(updatedThread) - - return { - threads: { - ...state.threads, - [threadId]: updatedThread, - }, - } - } - // Update the thread with new timestamp and set it to order 1 (top) const updatedThread = { ...thread, updated: Date.now() / 1000, - order: 1, } // Update all other threads to increment their order by 1 const updatedThreads = { ...state.threads } - Object.keys(updatedThreads).forEach((id) => { - if (id !== threadId) { - const otherThread = updatedThreads[id] - updatedThreads[id] = { - ...otherThread, - order: (otherThread.order || 1) + 1, - } - // Update the backend for other threads - updateThread(updatedThreads[id]) - } - }) - - // Set the updated thread updatedThreads[threadId] = updatedThread // Update the backend for the main thread From 06be778e9819e7e76c5310759832686c3feb2f9c Mon Sep 17 00:00:00 2001 From: Ramon Perez Date: Wed, 18 Jun 2025 02:17:15 +1000 Subject: [PATCH 35/48] improved wording in UI elements (#5323) --- web-app/src/locales/en/settings.json | 8 ++--- web-app/src/routes/settings/appearance.tsx | 24 +++++++-------- web-app/src/routes/settings/general.tsx | 30 ++++++++++--------- web-app/src/routes/settings/hardware.tsx | 2 +- web-app/src/routes/settings/https-proxy.tsx | 16 +++++----- .../src/routes/settings/local-api-server.tsx | 18 +++++------ web-app/src/routes/settings/privacy.tsx | 18 +++++------ web-app/src/routes/settings/shortcuts.tsx | 14 ++++----- 8 files changed, 65 insertions(+), 65 deletions(-) diff --git a/web-app/src/locales/en/settings.json b/web-app/src/locales/en/settings.json index 67edc9c8b..0422c28ca 100644 --- a/web-app/src/locales/en/settings.json +++ b/web-app/src/locales/en/settings.json @@ -5,15 +5,15 @@ }, "dataFolder": { "appData": "App Data", - "appDataDesc": "Default location for messages and other user data", + "appDataDesc": "Default location for messages and other user data.", "appLogs": "App Logs", - "appLogsDesc": "Default location App Logs" + "appLogsDesc": "Default location App Logs." }, "others": { "spellCheck": "Spell Check", - "spellCheckDesc": "Turn on to enable spell check chat input.", + "spellCheckDesc": "Enable spell check for your threads.", "resetFactory": "Reset To Factory Settings", - "resetFactoryDesc": "Restore application to its initial state, erasing all models and chat history. This action is irreversible and recommended only if the application is corrupted" + "resetFactoryDesc": "Restore application to its initial state, erasing all models and chat history. This action is irreversible and recommended only if the application is corrupted." } } } diff --git a/web-app/src/routes/settings/appearance.tsx b/web-app/src/routes/settings/appearance.tsx index 21b99c73e..cd3306080 100644 --- a/web-app/src/routes/settings/appearance.tsx +++ b/web-app/src/routes/settings/appearance.tsx @@ -43,43 +43,43 @@ function Appareances() { } /> } /> } /> } /> } /> } /> } /> @@ -112,18 +112,18 @@ function Appareances() { } /> } />
} diff --git a/web-app/src/routes/settings/hardware.tsx b/web-app/src/routes/settings/hardware.tsx index 53de1f9b3..ab42a15f9 100644 --- a/web-app/src/routes/settings/hardware.tsx +++ b/web-app/src/routes/settings/hardware.tsx @@ -352,7 +352,7 @@ function Hardware() { -

URL and port of your proxy server.

+

The URL and port of your proxy server.

-

Credentials for your proxy server (if required).

+

Credentials for the proxy server, if required.

-

List of hosts that should bypass the proxy.

+

A comma-separated list of hosts to bypass the proxy.

Local API Server

- Start an OpenAI-compatible local HTTP server. + Run an OpenAI-compatible server locally.

@@ -165,7 +165,7 @@ function LocalAPIServer() { > - By opting in, you help us make Jan better by sharing - anonymous data, like feature usage and user counts. Your - chats and personal information are never collected. + To help us improve Jan, you can share anonymous data like + feature usage and user counts. We never collect your chats + or personal information.

} align="start" @@ -63,17 +63,15 @@ function Privacy() { description={

- We prioritize your control over your data. Learn more - about our Privacy Policy. + You have full control over your data. Learn more in our + Privacy Policy.

- To make Jan better, we need to understand how it’s used - - but only if you choose to help. You can change your Jan - Analytics settings anytime. + To improve Jan, we need to understand how it’s used—but + only with your help. You can change this setting anytime.

- Your choice to opt-in or out doesn't change our core - privacy promises: + Your choice here won’t change our core privacy promises:

  • Your chats are never read
  • diff --git a/web-app/src/routes/settings/shortcuts.tsx b/web-app/src/routes/settings/shortcuts.tsx index 2d46ad7c2..2aac82447 100644 --- a/web-app/src/routes/settings/shortcuts.tsx +++ b/web-app/src/routes/settings/shortcuts.tsx @@ -27,7 +27,7 @@ function Shortcuts() { @@ -38,7 +38,7 @@ function Shortcuts() { /> @@ -49,7 +49,7 @@ function Shortcuts() { /> @@ -60,7 +60,7 @@ function Shortcuts() { /> @@ -75,7 +75,7 @@ function Shortcuts() { Enter @@ -84,7 +84,7 @@ function Shortcuts() { /> Shift + Enter @@ -97,7 +97,7 @@ function Shortcuts() { From b30de3d1ae98b1e05a5db3ed2a24b307de155252 Mon Sep 17 00:00:00 2001 From: Sam Hoang Van Date: Wed, 18 Jun 2025 09:39:18 +0700 Subject: [PATCH 36/48] fix: sorted-thread-not-stable (#5336) --- web-app/src/hooks/useThreads.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/web-app/src/hooks/useThreads.ts b/web-app/src/hooks/useThreads.ts index 7bc46675e..f8f990b44 100644 --- a/web-app/src/hooks/useThreads.ts +++ b/web-app/src/hooks/useThreads.ts @@ -32,11 +32,6 @@ export const useThreads = create()((set, get) => ({ threads: {}, searchIndex: null, setThreads: (threads) => { - threads.forEach((thread) => { - updateThread({ - ...thread, - }) - }) const threadMap = threads.reduce( (acc: Record, thread) => { acc[thread.id] = thread From 417b789b5ee7a30a995e36ca0125b6bbe0e10f97 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Wed, 18 Jun 2025 12:35:37 +0700 Subject: [PATCH 37/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20update=20wording=20des?= =?UTF-8?q?c=20vulkan=20(#5338)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🐛fix: update wording desc vulkan * ✨enhancement: update copy --- web-app/src/routes/settings/hardware.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web-app/src/routes/settings/hardware.tsx b/web-app/src/routes/settings/hardware.tsx index ab42a15f9..beab425fe 100644 --- a/web-app/src/routes/settings/hardware.tsx +++ b/web-app/src/routes/settings/hardware.tsx @@ -352,7 +352,7 @@ function Hardware() { Date: Wed, 18 Jun 2025 12:35:42 +0700 Subject: [PATCH 38/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20handle=20NaN=20value?= =?UTF-8?q?=20tokenspeed=20(#5339)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/containers/TokenSpeedIndicator.tsx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/web-app/src/containers/TokenSpeedIndicator.tsx b/web-app/src/containers/TokenSpeedIndicator.tsx index 74f277f70..ea9f91be0 100644 --- a/web-app/src/containers/TokenSpeedIndicator.tsx +++ b/web-app/src/containers/TokenSpeedIndicator.tsx @@ -12,8 +12,8 @@ export const TokenSpeedIndicator = ({ streaming, }: TokenSpeedIndicatorProps) => { const { tokenSpeed } = useAppState() - const persistedTokenSpeed = (metadata?.tokenSpeed as { tokenSpeed: number }) - ?.tokenSpeed + const persistedTokenSpeed = + (metadata?.tokenSpeed as { tokenSpeed: number })?.tokenSpeed || 0 const nonStreamingAssistantParam = typeof metadata?.assistant === 'object' && @@ -30,7 +30,9 @@ export const TokenSpeedIndicator = ({ {Math.round( - streaming ? toNumber(tokenSpeed?.tokenSpeed) : persistedTokenSpeed + streaming + ? toNumber(tokenSpeed?.tokenSpeed) + : toNumber(persistedTokenSpeed) )}  tokens/sec From a075debb0538e8319377d3ba32961773f67e985e Mon Sep 17 00:00:00 2001 From: LazyYuuki Date: Wed, 18 Jun 2025 14:02:36 +0800 Subject: [PATCH 39/48] =?UTF-8?q?=F0=9F=90=9B=20fix:=20window=20path=20pro?= =?UTF-8?q?blem?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-tauri/src/core/setup.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src-tauri/src/core/setup.rs b/src-tauri/src/core/setup.rs index 602080378..c2d3499f3 100644 --- a/src-tauri/src/core/setup.rs +++ b/src-tauri/src/core/setup.rs @@ -247,7 +247,10 @@ pub fn setup_sidecar(app: &App) -> Result<(), String> { ]); #[cfg(target_os = "windows")] { - cmd = cmd.current_dir(app_handle_for_spawn.path().resource_dir().unwrap()); + let resource_dir = app_handle_for_spawn.path().resource_dir().unwrap(); + let normalized_path = resource_dir.to_string_lossy().replace(r"\\?\", ""); + let normalized_pathbuf = PathBuf::from(normalized_path); + cmd = cmd.current_dir(normalized_pathbuf); } #[cfg(not(target_os = "windows"))] From 369ba5ac75669d30599c04fdb383013e8bad2db9 Mon Sep 17 00:00:00 2001 From: Sam Hoang Van Date: Wed, 18 Jun 2025 14:11:53 +0700 Subject: [PATCH 40/48] feat(server): filter /models endpoint to show only downloaded models (#5343) - Add filtering logic to proxy server for GET /models requests - Keep only models with status "downloaded" in response - Remove Content-Length header to prevent mismatch after filtering - Support both ListModelsResponseDto and direct array formats - Add comprehensive tests for filtering functionality - Fix Content-Length header conflict causing empty responses Fixes issue where all models were returned regardless of download status. --- src-tauri/src/core/server.rs | 202 ++++++++++++++++++++++++++++++++++- 1 file changed, 199 insertions(+), 3 deletions(-) diff --git a/src-tauri/src/core/server.rs b/src-tauri/src/core/server.rs index e5a784670..f4f270106 100644 --- a/src-tauri/src/core/server.rs +++ b/src-tauri/src/core/server.rs @@ -1,6 +1,7 @@ use hyper::service::{make_service_fn, service_fn}; use hyper::{Body, Request, Response, Server, StatusCode}; use reqwest::Client; +use serde_json::Value; use std::convert::Infallible; use std::net::SocketAddr; use std::sync::LazyLock; @@ -263,6 +264,7 @@ async fn proxy_request( let original_path = req.uri().path(); let path = get_destination_path(original_path, &config.prefix); + let method = req.method().clone(); // Verify Host header (check target), but bypass for whitelisted paths let whitelisted_paths = ["/", "/openapi.json", "/favicon.ico"]; @@ -368,10 +370,11 @@ async fn proxy_request( let mut builder = Response::builder().status(status); - // Copy response headers, excluding CORS headers to avoid conflicts + // Copy response headers, excluding CORS headers and Content-Length to avoid conflicts for (name, value) in response.headers() { // Skip CORS headers from upstream to avoid duplicates - if !is_cors_header(name.as_str()) { + // Skip Content-Length header when filtering models response to avoid mismatch + if !is_cors_header(name.as_str()) && name != hyper::header::CONTENT_LENGTH { builder = builder.header(name, value); } } @@ -386,7 +389,20 @@ async fn proxy_request( // Read response body match response.bytes().await { - Ok(bytes) => Ok(builder.body(Body::from(bytes)).unwrap()), + Ok(bytes) => { + // Check if this is a /models endpoint request and filter the response + if path.contains("/models") && method == hyper::Method::GET { + match filter_models_response(&bytes) { + Ok(filtered_bytes) => Ok(builder.body(Body::from(filtered_bytes)).unwrap()), + Err(e) => { + log::warn!("Failed to filter models response: {}, returning original", e); + Ok(builder.body(Body::from(bytes)).unwrap()) + } + } + } else { + Ok(builder.body(Body::from(bytes)).unwrap()) + } + }, Err(e) => { log::error!("Failed to read response body: {}", e); let mut error_response = @@ -419,6 +435,50 @@ async fn proxy_request( } } +/// Filters models response to keep only models with status "downloaded" +fn filter_models_response(bytes: &[u8]) -> Result, Box> { + let response_text = std::str::from_utf8(bytes)?; + let mut response_json: Value = serde_json::from_str(response_text)?; + + // Check if this is a ListModelsResponseDto format with data array + if let Some(data_array) = response_json.get_mut("data") { + if let Some(models) = data_array.as_array_mut() { + // Keep only models where status == "downloaded" + models.retain(|model| { + if let Some(status) = model.get("status") { + if let Some(status_str) = status.as_str() { + status_str == "downloaded" + } else { + false // Remove models without string status + } + } else { + false // Remove models without status field + } + }); + log::debug!("Filtered models response: {} downloaded models remaining", models.len()); + } + } else if response_json.is_array() { + // Handle direct array format + if let Some(models) = response_json.as_array_mut() { + models.retain(|model| { + if let Some(status) = model.get("status") { + if let Some(status_str) = status.as_str() { + status_str == "downloaded" + } else { + false // Remove models without string status + } + } else { + false // Remove models without status field + } + }); + log::debug!("Filtered models response: {} downloaded models remaining", models.len()); + } + } + + let filtered_response = serde_json::to_vec(&response_json)?; + Ok(filtered_response) +} + /// Checks if a header is a CORS-related header that should be filtered out from upstream responses fn is_cors_header(header_name: &str) -> bool { let header_lower = header_name.to_lowercase(); @@ -585,3 +645,139 @@ pub async fn stop_server() -> Result<(), Box Date: Wed, 18 Jun 2025 14:54:56 +0700 Subject: [PATCH 41/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20render=20streaming=20t?= =?UTF-8?q?oken=20speed=20based=20on=20thread=20ID=20&=20assistant=20metad?= =?UTF-8?q?ata=20(#5346)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web-app/src/containers/ThreadContent.tsx | 12 ++++++++++-- web-app/src/hooks/useMessages.ts | 8 +++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx index 9e491bf55..076327ea6 100644 --- a/web-app/src/containers/ThreadContent.tsx +++ b/web-app/src/containers/ThreadContent.tsx @@ -363,7 +363,10 @@ export const ThreadContent = memo(
    @@ -439,7 +442,11 @@ export const ThreadContent = memo(
@@ -447,6 +454,7 @@ export const ThreadContent = memo( )} )} + {item.type === 'image_url' && image && (
()((set, get) => ({ created_at: message.created_at || Date.now(), metadata: { ...message.metadata, - assistant: currentAssistant, + assistant: { + id: currentAssistant?.id || '', + name: currentAssistant?.name || '', + avatar: currentAssistant?.avatar || '', + instructions: currentAssistant?.instructions || '', + parameters: currentAssistant?.parameters || '', + }, }, } createMessage(newMessage).then((createdMessage) => { From 6cee466f52a95be45a34bbedcc1eea457c38872e Mon Sep 17 00:00:00 2001 From: Sam Hoang Van Date: Wed, 18 Jun 2025 15:30:31 +0700 Subject: [PATCH 42/48] fix(server): add gzip decompression support for /models endpoint filtering (#5349) - Add gzip detection using magic number check (0x1f 0x8b) - Implement gzip decompression before JSON parsing - Add gzip re-compression for filtered responses - Fix "invalid utf-8 sequence" error when upstream returns gzipped content - Maintain Content-Encoding consistency for compressed responses - Add comprehensive gzip handling with flate2 library Resolves issue where filtering failed on gzip-compressed model responses. --- src-tauri/src/core/server.rs | 48 +++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/src-tauri/src/core/server.rs b/src-tauri/src/core/server.rs index f4f270106..627ec6a7c 100644 --- a/src-tauri/src/core/server.rs +++ b/src-tauri/src/core/server.rs @@ -7,6 +7,8 @@ use std::net::SocketAddr; use std::sync::LazyLock; use tokio::sync::Mutex; use tokio::task::JoinHandle; +use flate2::read::GzDecoder; +use std::io::Read; /// Server handle type for managing the proxy server lifecycle type ServerHandle = JoinHandle>>; @@ -435,9 +437,42 @@ async fn proxy_request( } } +/// Checks if the byte array starts with gzip magic number +fn is_gzip_encoded(bytes: &[u8]) -> bool { + bytes.len() >= 2 && bytes[0] == 0x1f && bytes[1] == 0x8b +} + +/// Decompresses gzip-encoded bytes +fn decompress_gzip(bytes: &[u8]) -> Result, Box> { + let mut decoder = GzDecoder::new(bytes); + let mut decompressed = Vec::new(); + decoder.read_to_end(&mut decompressed)?; + Ok(decompressed) +} + +/// Compresses bytes using gzip +fn compress_gzip(bytes: &[u8]) -> Result, Box> { + use flate2::write::GzEncoder; + use flate2::Compression; + use std::io::Write; + + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + encoder.write_all(bytes)?; + let compressed = encoder.finish()?; + Ok(compressed) +} + /// Filters models response to keep only models with status "downloaded" fn filter_models_response(bytes: &[u8]) -> Result, Box> { - let response_text = std::str::from_utf8(bytes)?; + // Try to decompress if it's gzip-encoded + let decompressed_bytes = if is_gzip_encoded(bytes) { + log::debug!("Response is gzip-encoded, decompressing..."); + decompress_gzip(bytes)? + } else { + bytes.to_vec() + }; + + let response_text = std::str::from_utf8(&decompressed_bytes)?; let mut response_json: Value = serde_json::from_str(response_text)?; // Check if this is a ListModelsResponseDto format with data array @@ -475,8 +510,15 @@ fn filter_models_response(bytes: &[u8]) -> Result, Box Date: Wed, 18 Jun 2025 16:19:48 +0700 Subject: [PATCH 43/48] fix(proxy): implement true HTTP streaming for chat completions API (#5350) --- src-tauri/src/core/server.rs | 71 ++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/src-tauri/src/core/server.rs b/src-tauri/src/core/server.rs index 627ec6a7c..ee8b1cbb1 100644 --- a/src-tauri/src/core/server.rs +++ b/src-tauri/src/core/server.rs @@ -7,6 +7,7 @@ use std::net::SocketAddr; use std::sync::LazyLock; use tokio::sync::Mutex; use tokio::task::JoinHandle; +use futures_util::StreamExt; use flate2::read::GzDecoder; use std::io::Read; @@ -389,11 +390,11 @@ async fn proxy_request( &config.trusted_hosts, ); - // Read response body - match response.bytes().await { - Ok(bytes) => { - // Check if this is a /models endpoint request and filter the response - if path.contains("/models") && method == hyper::Method::GET { + // Handle streaming vs non-streaming responses + if path.contains("/models") && method == hyper::Method::GET { + // For /models endpoint, we need to buffer and filter the response + match response.bytes().await { + Ok(bytes) => { match filter_models_response(&bytes) { Ok(filtered_bytes) => Ok(builder.body(Body::from(filtered_bytes)).unwrap()), Err(e) => { @@ -401,24 +402,46 @@ async fn proxy_request( Ok(builder.body(Body::from(bytes)).unwrap()) } } - } else { - Ok(builder.body(Body::from(bytes)).unwrap()) + }, + Err(e) => { + log::error!("Failed to read response body: {}", e); + let mut error_response = + Response::builder().status(StatusCode::INTERNAL_SERVER_ERROR); + error_response = add_cors_headers_with_host_and_origin( + error_response, + &host_header, + &origin_header, + &config.trusted_hosts, + ); + Ok(error_response + .body(Body::from("Error reading upstream response")) + .unwrap()) } - }, - Err(e) => { - log::error!("Failed to read response body: {}", e); - let mut error_response = - Response::builder().status(StatusCode::INTERNAL_SERVER_ERROR); - error_response = add_cors_headers_with_host_and_origin( - error_response, - &host_header, - &origin_header, - &config.trusted_hosts, - ); - Ok(error_response - .body(Body::from("Error reading upstream response")) - .unwrap()) } + } else { + // For streaming endpoints (like chat completions), we need to collect and forward the stream + let mut stream = response.bytes_stream(); + let (mut sender, body) = hyper::Body::channel(); + + // Spawn a task to forward the stream + tokio::spawn(async move { + while let Some(chunk_result) = stream.next().await { + match chunk_result { + Ok(chunk) => { + if sender.send_data(chunk).await.is_err() { + log::debug!("Client disconnected during streaming"); + break; + } + } + Err(e) => { + log::error!("Stream error: {}", e); + break; + } + } + } + }); + + Ok(builder.body(body).unwrap()) } } Err(e) => { @@ -640,9 +663,11 @@ pub async fn start_server( trusted_hosts, }; - // Create HTTP client + // Create HTTP client with longer timeout for streaming let client = Client::builder() - .timeout(std::time::Duration::from_secs(30)) + .timeout(std::time::Duration::from_secs(300)) // 5 minutes for streaming + .pool_max_idle_per_host(10) + .pool_idle_timeout(std::time::Duration::from_secs(30)) .build()?; // Create service handler From 0681c6bb9ef2a72c2ab07f49add7bb366c31e22b Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Wed, 18 Jun 2025 19:58:24 +0700 Subject: [PATCH 44/48] fix: glitch toggle gpus (#5353) * fix: glitch toogle gpu * fix: Using the GPU's array index as a key for gpuLoading * enhancement: added try-finally --- web-app/src/hooks/useHardware.ts | 68 ++++++++++++++++-------- web-app/src/routes/settings/hardware.tsx | 8 +-- 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/web-app/src/hooks/useHardware.ts b/web-app/src/hooks/useHardware.ts index 7ad067cc8..16e83a7a5 100644 --- a/web-app/src/hooks/useHardware.ts +++ b/web-app/src/hooks/useHardware.ts @@ -87,8 +87,17 @@ interface HardwareStore { // Update RAM available updateRAMAvailable: (available: number) => void - // Toggle GPU activation - toggleGPUActivation: (index: number) => void + // Toggle GPU activation (async, with loading) + toggleGPUActivation: (index: number) => Promise + + // GPU loading state + gpuLoading: { [index: number]: boolean } + setGpuLoading: (index: number, loading: boolean) => void + + // Polling control + pollingPaused: boolean + pausePolling: () => void + resumePolling: () => void // Reorder GPUs reorderGPUs: (oldIndex: number, newIndex: number) => void @@ -96,8 +105,16 @@ interface HardwareStore { export const useHardware = create()( persist( - (set) => ({ + (set, get) => ({ hardwareData: defaultHardwareData, + gpuLoading: {}, + pollingPaused: false, + setGpuLoading: (index, loading) => + set((state) => ({ + gpuLoading: { ...state.gpuLoading, [state.hardwareData.gpus[index].uuid]: loading }, + })), + pausePolling: () => set({ pollingPaused: true }), + resumePolling: () => set({ pollingPaused: false }), setCPU: (cpu) => set((state) => ({ @@ -172,25 +189,34 @@ export const useHardware = create()( }, })), - toggleGPUActivation: (index) => { - set((state) => { - const newGPUs = [...state.hardwareData.gpus] - if (index >= 0 && index < newGPUs.length) { - newGPUs[index] = { - ...newGPUs[index], - activated: !newGPUs[index].activated, + toggleGPUActivation: async (index) => { + const { pausePolling, setGpuLoading, resumePolling } = get(); + pausePolling(); + setGpuLoading(index, true); + try { + await new Promise((resolve) => setTimeout(resolve, 200)); // Simulate async, replace with real API if needed + set((state) => { + const newGPUs = [...state.hardwareData.gpus]; + if (index >= 0 && index < newGPUs.length) { + newGPUs[index] = { + ...newGPUs[index], + activated: !newGPUs[index].activated, + }; } - } - setActiveGpus({ - gpus: newGPUs.filter((e) => e.activated).map((e) => parseInt(e.id)), - }) - return { - hardwareData: { - ...state.hardwareData, - gpus: newGPUs, - }, - } - }) + setActiveGpus({ + gpus: newGPUs.filter((e) => e.activated).map((e) => parseInt(e.id)), + }); + return { + hardwareData: { + ...state.hardwareData, + gpus: newGPUs, + }, + }; + }); + } finally { + setGpuLoading(index, false); + setTimeout(resumePolling, 1000); // Resume polling after 1s + } }, reorderGPUs: (oldIndex, newIndex) => diff --git a/web-app/src/routes/settings/hardware.tsx b/web-app/src/routes/settings/hardware.tsx index beab425fe..23f4eafef 100644 --- a/web-app/src/routes/settings/hardware.tsx +++ b/web-app/src/routes/settings/hardware.tsx @@ -49,7 +49,7 @@ function SortableGPUItem({ gpu, index }: { gpu: GPU; index: number }) { isDragging, } = useSortable({ id: gpu.id || index }) - const { toggleGPUActivation } = useHardware() + const { toggleGPUActivation, gpuLoading } = useHardware() const style = { transform: CSS.Transform.toString(transform), @@ -78,6 +78,7 @@ function SortableGPUItem({ gpu, index }: { gpu: GPU; index: number }) {
toggleGPUActivation(index)} />
@@ -122,6 +123,7 @@ function Hardware() { updateCPUUsage, updateRAMAvailable, reorderGPUs, + pollingPaused, } = useHardware() const { vulkanEnabled, setVulkanEnabled } = useVulkan() @@ -155,16 +157,16 @@ function Hardware() { } useEffect(() => { + if (pollingPaused) return; const intervalId = setInterval(() => { getHardwareInfo().then((data) => { - setHardwareData(data as unknown as HardwareData) updateCPUUsage(data.cpu.usage) updateRAMAvailable(data.ram.available) }) }, 5000) return () => clearInterval(intervalId) - }, [setHardwareData, updateCPUUsage, updateRAMAvailable]) + }, [setHardwareData, updateCPUUsage, updateRAMAvailable, pollingPaused]) const handleClickSystemMonitor = async () => { try { From c6cd37d91631a006e1073744a8f1fedaa3ee13ca Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Wed, 18 Jun 2025 20:28:30 +0700 Subject: [PATCH 45/48] fix: built in models capabilities (#5354) --- web-app/src/services/providers.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web-app/src/services/providers.ts b/web-app/src/services/providers.ts index 6bd2b63f0..517d2c5f0 100644 --- a/web-app/src/services/providers.ts +++ b/web-app/src/services/providers.ts @@ -13,6 +13,7 @@ import { import { modelSettings } from '@/lib/predefined' import { fetchModels } from './models' import { ExtensionManager } from '@/lib/extension' +import { isProd } from '@/lib/version' export const getProviders = async (): Promise => { const engines = !localStorage.getItem('migration_completed') @@ -65,7 +66,7 @@ export const getProviders = async (): Promise => { ].filter(Boolean) as string[] return { ...(modelManifest ?? { id: model, name: model }), - capabilities, + ...(!isProd ? { capabilities } : {}), } as Model }) } From 5b60116d218c15a49b6a5ee6eab3dcc1a16ee727 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Wed, 18 Jun 2025 21:48:18 +0700 Subject: [PATCH 46/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20setting=20provider=20h?= =?UTF-8?q?ide=20model=20capabilities=20(#5355)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🐛fix: setting provider hide model capabilities * 🐛fix: hide tools icon on dropdown model providers * fix: stop server on app close or reload * ✨enhancement: reset heading class --------- Co-authored-by: Louis --- web-app/src/containers/DropdownModelProvider.tsx | 3 ++- web-app/src/index.css | 9 +++++++++ web-app/src/routes/__root.tsx | 8 ++++++++ web-app/src/routes/hub.tsx | 2 +- web-app/src/routes/settings/providers/$providerName.tsx | 4 +++- web-app/src/services/providers.ts | 3 +-- 6 files changed, 24 insertions(+), 5 deletions(-) diff --git a/web-app/src/containers/DropdownModelProvider.tsx b/web-app/src/containers/DropdownModelProvider.tsx index 6d5d24155..5b125282a 100644 --- a/web-app/src/containers/DropdownModelProvider.tsx +++ b/web-app/src/containers/DropdownModelProvider.tsx @@ -16,6 +16,7 @@ import { ModelSetting } from '@/containers/ModelSetting' import ProvidersAvatar from '@/containers/ProvidersAvatar' import { Fzf } from 'fzf' import { localStorageKey } from '@/constants/localStorage' +import { isProd } from '@/lib/version' type DropdownModelProviderProps = { model?: ThreadModel @@ -400,7 +401,7 @@ const DropdownModelProvider = ({ />
- {capabilities.length > 0 && ( + {!isProd && capabilities.length > 0 && (
diff --git a/web-app/src/index.css b/web-app/src/index.css index 185cb0612..d8ae284e9 100644 --- a/web-app/src/index.css +++ b/web-app/src/index.css @@ -79,6 +79,15 @@ ::-ms-reveal { display: none; } + + .reset-heading { + :is(h1, h2, h3, h4, h5, h6) { + font-weight: 600; + font-size: 14px !important; + margin-top: 0 !important; + margin-bottom: 0.5em; + } + } } @layer utilities { diff --git a/web-app/src/routes/__root.tsx b/web-app/src/routes/__root.tsx index 67e88ed90..6f6099cbb 100644 --- a/web-app/src/routes/__root.tsx +++ b/web-app/src/routes/__root.tsx @@ -18,6 +18,7 @@ import { AnalyticProvider } from '@/providers/AnalyticProvider' import { useLeftPanel } from '@/hooks/useLeftPanel' import { cn } from '@/lib/utils' import ToolApproval from '@/containers/dialogs/ToolApproval' +import { useEffect } from 'react' export const Route = createRootRoute({ component: RootLayout, @@ -82,6 +83,13 @@ function RootLayout() { router.location.pathname === route.systemMonitor || router.location.pathname === route.appLogs + useEffect(() => { + return () => { + // This is to attempt to stop the local API server when the app is closed or reloaded. + window.core?.api?.stopServer() + } + }, []) + return ( diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx index 42147d0f8..3bca8649f 100644 --- a/web-app/src/routes/hub.tsx +++ b/web-app/src/routes/hub.tsx @@ -495,7 +495,7 @@ function Hub() { } actions={ diff --git a/web-app/src/services/providers.ts b/web-app/src/services/providers.ts index 517d2c5f0..6bd2b63f0 100644 --- a/web-app/src/services/providers.ts +++ b/web-app/src/services/providers.ts @@ -13,7 +13,6 @@ import { import { modelSettings } from '@/lib/predefined' import { fetchModels } from './models' import { ExtensionManager } from '@/lib/extension' -import { isProd } from '@/lib/version' export const getProviders = async (): Promise => { const engines = !localStorage.getItem('migration_completed') @@ -66,7 +65,7 @@ export const getProviders = async (): Promise => { ].filter(Boolean) as string[] return { ...(modelManifest ?? { id: model, name: model }), - ...(!isProd ? { capabilities } : {}), + capabilities, } as Model }) } From 22396111be16228092e133c34407e00f8d83c2e8 Mon Sep 17 00:00:00 2001 From: Louis Date: Thu, 19 Jun 2025 00:12:03 +0700 Subject: [PATCH 47/48] fix: stop api server on page unload (#5356) * fix: stop api server on page unload * fix: check api server status on reload * refactor: api server state * fix: should not pop the guard --- src-tauri/src/core/cmd.rs | 38 +++++-- src-tauri/src/core/server.rs | 100 +++++++++++------- src-tauri/src/core/state.rs | 5 + src-tauri/src/lib.rs | 2 + web-app/src/routes/__root.tsx | 8 -- .../src/routes/settings/local-api-server.tsx | 14 ++- 6 files changed, 109 insertions(+), 58 deletions(-) diff --git a/src-tauri/src/core/cmd.rs b/src-tauri/src/core/cmd.rs index 4a48e63d3..4b4463d12 100644 --- a/src-tauri/src/core/cmd.rs +++ b/src-tauri/src/core/cmd.rs @@ -348,23 +348,41 @@ pub async fn start_server( api_key: String, trusted_hosts: Vec, ) -> Result { - let auth_token = app - .state::() - .app_token - .clone() - .unwrap_or_default(); - server::start_server(host, port, prefix, auth_token, api_key, trusted_hosts) - .await - .map_err(|e| e.to_string())?; + let state = app.state::(); + let auth_token = state.app_token.clone().unwrap_or_default(); + let server_handle = state.server_handle.clone(); + + server::start_server( + server_handle, + host, + port, + prefix, + auth_token, + api_key, + trusted_hosts, + ) + .await + .map_err(|e| e.to_string())?; Ok(true) } #[tauri::command] -pub async fn stop_server() -> Result<(), String> { - server::stop_server().await.map_err(|e| e.to_string())?; +pub async fn stop_server(state: State<'_, AppState>) -> Result<(), String> { + let server_handle = state.server_handle.clone(); + + server::stop_server(server_handle) + .await + .map_err(|e| e.to_string())?; Ok(()) } +#[tauri::command] +pub async fn get_server_status(state: State<'_, AppState>) -> Result { + let server_handle = state.server_handle.clone(); + + Ok(server::is_server_running(server_handle).await) +} + #[tauri::command] pub async fn read_logs(app: AppHandle) -> Result { let log_path = get_jan_data_folder_path(app).join("logs").join("app.log"); diff --git a/src-tauri/src/core/server.rs b/src-tauri/src/core/server.rs index ee8b1cbb1..6da4ebf9b 100644 --- a/src-tauri/src/core/server.rs +++ b/src-tauri/src/core/server.rs @@ -1,21 +1,16 @@ +use flate2::read::GzDecoder; +use futures_util::StreamExt; use hyper::service::{make_service_fn, service_fn}; use hyper::{Body, Request, Response, Server, StatusCode}; use reqwest::Client; use serde_json::Value; use std::convert::Infallible; -use std::net::SocketAddr; -use std::sync::LazyLock; -use tokio::sync::Mutex; -use tokio::task::JoinHandle; -use futures_util::StreamExt; -use flate2::read::GzDecoder; use std::io::Read; +use std::net::SocketAddr; +use std::sync::Arc; +use tokio::sync::Mutex; -/// Server handle type for managing the proxy server lifecycle -type ServerHandle = JoinHandle>>; - -/// Global singleton for the current server instance -static SERVER_HANDLE: LazyLock>> = LazyLock::new(|| Mutex::new(None)); +use crate::core::state::ServerHandle; /// Configuration for the proxy server #[derive(Clone)] @@ -272,7 +267,7 @@ async fn proxy_request( // Verify Host header (check target), but bypass for whitelisted paths let whitelisted_paths = ["/", "/openapi.json", "/favicon.ico"]; let is_whitelisted_path = whitelisted_paths.contains(&path.as_str()); - + if !is_whitelisted_path { if !host_header.is_empty() { if !is_valid_host(&host_header, &config.trusted_hosts) { @@ -333,7 +328,10 @@ async fn proxy_request( .unwrap()); } } else if is_whitelisted_path { - log::debug!("Bypassing authorization check for whitelisted path: {}", path); + log::debug!( + "Bypassing authorization check for whitelisted path: {}", + path + ); } // Block access to /configs endpoint @@ -394,13 +392,14 @@ async fn proxy_request( if path.contains("/models") && method == hyper::Method::GET { // For /models endpoint, we need to buffer and filter the response match response.bytes().await { - Ok(bytes) => { - match filter_models_response(&bytes) { - Ok(filtered_bytes) => Ok(builder.body(Body::from(filtered_bytes)).unwrap()), - Err(e) => { - log::warn!("Failed to filter models response: {}, returning original", e); - Ok(builder.body(Body::from(bytes)).unwrap()) - } + Ok(bytes) => match filter_models_response(&bytes) { + Ok(filtered_bytes) => Ok(builder.body(Body::from(filtered_bytes)).unwrap()), + Err(e) => { + log::warn!( + "Failed to filter models response: {}, returning original", + e + ); + Ok(builder.body(Body::from(bytes)).unwrap()) } }, Err(e) => { @@ -422,7 +421,7 @@ async fn proxy_request( // For streaming endpoints (like chat completions), we need to collect and forward the stream let mut stream = response.bytes_stream(); let (mut sender, body) = hyper::Body::channel(); - + // Spawn a task to forward the stream tokio::spawn(async move { while let Some(chunk_result) = stream.next().await { @@ -440,7 +439,7 @@ async fn proxy_request( } } }); - + Ok(builder.body(body).unwrap()) } } @@ -478,7 +477,7 @@ fn compress_gzip(bytes: &[u8]) -> Result, Box Result, Box Result, Box> { +fn filter_models_response( + bytes: &[u8], +) -> Result, Box> { // Try to decompress if it's gzip-encoded let decompressed_bytes = if is_gzip_encoded(bytes) { log::debug!("Response is gzip-encoded, decompressing..."); @@ -494,10 +495,10 @@ fn filter_models_response(bytes: &[u8]) -> Result, Box Result, Box Result, Box bool { }) } +pub async fn is_server_running(server_handle: Arc>>) -> bool { + let handle_guard = server_handle.lock().await; + + if handle_guard.is_some() { + true + } else { + false + } +} + /// Starts the proxy server pub async fn start_server( + server_handle: Arc>>, host: String, port: u16, prefix: String, @@ -644,7 +662,7 @@ pub async fn start_server( trusted_hosts: Vec, ) -> Result> { // Check if server is already running - let mut handle_guard = SERVER_HANDLE.lock().await; + let mut handle_guard = server_handle.lock().await; if handle_guard.is_some() { return Err("Server is already running".into()); } @@ -687,7 +705,7 @@ pub async fn start_server( log::info!("Proxy server started on http://{}", addr); // Spawn server task - let server_handle = tokio::spawn(async move { + let server_task = tokio::spawn(async move { if let Err(e) = server.await { log::error!("Server error: {}", e); return Err(Box::new(e) as Box); @@ -695,16 +713,20 @@ pub async fn start_server( Ok(()) }); - *handle_guard = Some(server_handle); + *handle_guard = Some(server_task); Ok(true) } /// Stops the currently running proxy server -pub async fn stop_server() -> Result<(), Box> { - let mut handle_guard = SERVER_HANDLE.lock().await; +pub async fn stop_server( + server_handle: Arc>>, +) -> Result<(), Box> { + let mut handle_guard = server_handle.lock().await; if let Some(handle) = handle_guard.take() { handle.abort(); + // remove the handle to prevent future use + *handle_guard = None; log::info!("Proxy server stopped"); } else { log::debug!("No server was running"); @@ -746,10 +768,10 @@ mod tests { let data = filtered_response["data"].as_array().unwrap(); assert_eq!(data.len(), 1); // Should have 1 model (only model1 with "downloaded" status) - + // Verify only model1 (with "downloaded" status) is kept assert!(data.iter().any(|model| model["id"] == "model1")); - + // Verify model2 and model3 are filtered out assert!(!data.iter().any(|model| model["id"] == "model2")); assert!(!data.iter().any(|model| model["id"] == "model3")); @@ -838,11 +860,11 @@ mod tests { let data = filtered_response["data"].as_array().unwrap(); assert_eq!(data.len(), 2); // Should have 2 models (model1 and model3 with "downloaded" status) - + // Verify only models with "downloaded" status are kept assert!(data.iter().any(|model| model["id"] == "model1")); assert!(data.iter().any(|model| model["id"] == "model3")); - + // Verify other models are filtered out assert!(!data.iter().any(|model| model["id"] == "model2")); assert!(!data.iter().any(|model| model["id"] == "model4")); diff --git a/src-tauri/src/core/state.rs b/src-tauri/src/core/state.rs index cb6a5d3fa..9957ba92e 100644 --- a/src-tauri/src/core/state.rs +++ b/src-tauri/src/core/state.rs @@ -4,6 +4,10 @@ use crate::core::utils::download::DownloadManagerState; use rand::{distributions::Alphanumeric, Rng}; use rmcp::{service::RunningService, RoleClient}; use tokio::sync::Mutex; +use tokio::task::JoinHandle; + +/// Server handle type for managing the proxy server lifecycle +pub type ServerHandle = JoinHandle>>; #[derive(Default)] pub struct AppState { @@ -12,6 +16,7 @@ pub struct AppState { pub download_manager: Arc>, pub cortex_restart_count: Arc>, pub cortex_killed_intentionally: Arc>, + pub server_handle: Arc>>, } pub fn generate_app_token() -> String { rand::thread_rng() diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 076984106..4ed6ecee7 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -55,6 +55,7 @@ pub fn run() { core::cmd::app_token, core::cmd::start_server, core::cmd::stop_server, + core::cmd::get_server_status, core::cmd::read_logs, core::cmd::change_app_data_folder, core::cmd::reset_cortex_restart_count, @@ -92,6 +93,7 @@ pub fn run() { download_manager: Arc::new(Mutex::new(DownloadManagerState::default())), cortex_restart_count: Arc::new(Mutex::new(0)), cortex_killed_intentionally: Arc::new(Mutex::new(false)), + server_handle: Arc::new(Mutex::new(None)), }) .setup(|app| { app.handle().plugin( diff --git a/web-app/src/routes/__root.tsx b/web-app/src/routes/__root.tsx index 6f6099cbb..67e88ed90 100644 --- a/web-app/src/routes/__root.tsx +++ b/web-app/src/routes/__root.tsx @@ -18,7 +18,6 @@ import { AnalyticProvider } from '@/providers/AnalyticProvider' import { useLeftPanel } from '@/hooks/useLeftPanel' import { cn } from '@/lib/utils' import ToolApproval from '@/containers/dialogs/ToolApproval' -import { useEffect } from 'react' export const Route = createRootRoute({ component: RootLayout, @@ -83,13 +82,6 @@ function RootLayout() { router.location.pathname === route.systemMonitor || router.location.pathname === route.appLogs - useEffect(() => { - return () => { - // This is to attempt to stop the local API server when the app is closed or reloaded. - window.core?.api?.stopServer() - } - }, []) - return ( diff --git a/web-app/src/routes/settings/local-api-server.tsx b/web-app/src/routes/settings/local-api-server.tsx index dd7561be5..94f577074 100644 --- a/web-app/src/routes/settings/local-api-server.tsx +++ b/web-app/src/routes/settings/local-api-server.tsx @@ -17,7 +17,8 @@ import { windowKey } from '@/constants/windows' import { IconLogs } from '@tabler/icons-react' import { cn } from '@/lib/utils' import { ApiKeyInput } from '@/containers/ApiKeyInput' -import { useState } from 'react' +import { useEffect, useState } from 'react' +import { invoke } from '@tauri-apps/api/core' // eslint-disable-next-line @typescript-eslint/no-explicit-any export const Route = createFileRoute(route.settings.local_api_server as any)({ @@ -44,6 +45,17 @@ function LocalAPIServer() { !apiKey || apiKey.toString().trim().length === 0 ) + useEffect(() => { + const checkServerStatus = async () => { + invoke('get_server_status').then((running) => { + if (running) { + setServerStatus('running') + } + }) + } + checkServerStatus() + }, [setServerStatus]) + const handleApiKeyValidation = (isValid: boolean) => { setIsApiKeyEmpty(!isValid) } From 67592f3f45ccd863d704bde1b60826a451b6fbd3 Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Thu, 19 Jun 2025 23:08:45 +0700 Subject: [PATCH 48/48] =?UTF-8?q?=F0=9F=90=9Bfix:=20avoid=20render=20html?= =?UTF-8?q?=20title=20thread=20(#5375)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🐛fix: avoid render html title thread * chore: minor bump - tokenjs for manual adding models --------- Co-authored-by: Louis --- web-app/package.json | 2 +- web-app/src/containers/DropdownModelProvider.tsx | 11 +++-------- web-app/src/containers/ThreadList.tsx | 4 +--- web-app/src/hooks/useThreads.ts | 8 +++----- 4 files changed, 8 insertions(+), 17 deletions(-) diff --git a/web-app/package.json b/web-app/package.json index ad82e5688..8b3193817 100644 --- a/web-app/package.json +++ b/web-app/package.json @@ -65,7 +65,7 @@ "remark-math": "^6.0.0", "sonner": "^2.0.3", "tailwindcss": "^4.1.4", - "token.js": "npm:token.js-fork@0.7.5", + "token.js": "npm:token.js-fork@0.7.9", "tw-animate-css": "^1.2.7", "ulidx": "^2.4.1", "unified": "^11.0.5", diff --git a/web-app/src/containers/DropdownModelProvider.tsx b/web-app/src/containers/DropdownModelProvider.tsx index 5b125282a..0747a1ad1 100644 --- a/web-app/src/containers/DropdownModelProvider.tsx +++ b/web-app/src/containers/DropdownModelProvider.tsx @@ -391,14 +391,9 @@ const DropdownModelProvider = ({ )} >
- + + {searchableModel.model.id} +
{!isProd && capabilities.length > 0 && ( diff --git a/web-app/src/containers/ThreadList.tsx b/web-app/src/containers/ThreadList.tsx index 0702c1bf9..9e3df65c1 100644 --- a/web-app/src/containers/ThreadList.tsx +++ b/web-app/src/containers/ThreadList.tsx @@ -101,9 +101,7 @@ const SortableItem = memo(({ thread }: { thread: Thread }) => { )} >
- + {thread.title || 'New Thread'}
currentThreadId?: string @@ -68,12 +68,10 @@ export const useThreads = create()((set, get) => ({ return fzfResults.map( (result: { item: Thread; positions: Set }) => { const thread = result.item // Fzf stores the original item here - // Ensure result.positions is an array, default to empty if undefined - const positions = Array.from(result.positions) || [] - const highlightedTitle = highlightFzfMatch(thread.title, positions) + return { ...thread, - title: highlightedTitle, // Override title with highlighted version + title: thread.title, // Override title with highlighted version } } )