From c32c9e1dcb515f8e75043d12612086739e028b71 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 5 Feb 2025 23:10:40 +0700
Subject: [PATCH] chore: add DeepSeek R1 Distill models to Hub (#4586)

---
 .../inference-cortex-extension/package.json   |  2 +-
 .../deepseek-r1-distill-llama-70b/model.json  | 35 +++++++++++++++++++
 .../deepseek-r1-distill-llama-8b/model.json   | 35 +++++++++++++++++++
 .../deepseek-r1-distill-qwen-1.5b/model.json  | 35 +++++++++++++++++++
 .../deepseek-r1-distill-qwen-14b/model.json   | 35 +++++++++++++++++++
 .../deepseek-r1-distill-qwen-32b/model.json   | 35 +++++++++++++++++++
 .../deepseek-r1-distill-qwen-7b/model.json    | 35 +++++++++++++++++++
 .../models/llama3.1-8b-instruct/model.json    | 10 ++----
 .../rolldown.config.mjs                       | 13 +++++++
 9 files changed, 226 insertions(+), 9 deletions(-)
 create mode 100644 extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json
 create mode 100644 extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json
 create mode 100644 extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json
 create mode 100644 extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json
 create mode 100644 extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json
 create mode 100644 extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json

diff --git a/extensions/inference-cortex-extension/package.json b/extensions/inference-cortex-extension/package.json
index f191f3071..a4558dc8f 100644
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@janhq/inference-cortex-extension",
   "productName": "Cortex Inference Engine",
-  "version": "1.0.24",
+  "version": "1.0.25",
   "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
   "main": "dist/index.js",
   "node": "dist/node/index.cjs.js",
diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json
new file mode 100644
index 000000000..7f98b07a1
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json
@@ -0,0 +1,35 @@
+{
+  "sources": [
+    {
+      "filename": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
+      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-r1-distill-llama-70b",
+  "object": "model",
+  "name": "DeepSeek R1 Distill Llama 70B Q4",
+  "version": "1.0",
+  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 131072,
+    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
+    "llama_model_path": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
+    "ngl": 81
+  },
+  "parameters": {
+    "temperature": 0.6,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 131072,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "DeepSeek",
+    "tags": ["70B", "Featured"],
+    "size": 42500000000
+  },
+  "engine": "llama-cpp"
+}
diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json
new file mode 100644
index 000000000..a3a075888
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json
@@ -0,0 +1,35 @@
+{
+  "sources": [
+    {
+      "filename": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
+      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-r1-distill-llama-8b",
+  "object": "model",
+  "name": "DeepSeek R1 Distill Llama 8B Q5",
+  "version": "1.0",
+  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 131072,
+    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
+    "llama_model_path": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
+    "ngl": 33
+  },
+  "parameters": {
+    "temperature": 0.6,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 131072,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "DeepSeek",
+    "tags": ["8B", "Featured"],
+    "size": 5730000000
+  },
+  "engine": "llama-cpp"
+}
diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json
new file mode 100644
index 000000000..74b3dfc54
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json
@@ -0,0 +1,35 @@
+{
+  "sources": [
+    {
+      "filename": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
+      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-r1-distill-qwen-1.5b",
+  "object": "model",
+  "name": "DeepSeek R1 Distill Qwen 1.5B Q5",
+  "version": "1.0",
+  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 131072,
+    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
+    "llama_model_path": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
+    "ngl": 29
+  },
+  "parameters": {
+    "temperature": 0.6,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 131072,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "DeepSeek",
+    "tags": ["1.5B", "Featured"],
+    "size": 1290000000
+  },
+  "engine": "llama-cpp"
+}
diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json
new file mode 100644
index 000000000..594ba6e41
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json
@@ -0,0 +1,35 @@
+{
+  "sources": [
+    {
+      "filename": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
+      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-r1-distill-qwen-14b",
+  "object": "model",
+  "name": "DeepSeek R1 Distill Qwen 14B Q4",
+  "version": "1.0",
+  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 131072,
+    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
+    "llama_model_path": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
+    "ngl": 49
+  },
+  "parameters": {
+    "temperature": 0.6,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 131072,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "DeepSeek",
+    "tags": ["14B", "Featured"],
+    "size": 8990000000
+  },
+  "engine": "llama-cpp"
+}
diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json
new file mode 100644
index 000000000..6d7f5accf
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json
@@ -0,0 +1,35 @@
+{
+  "sources": [
+    {
+      "filename": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
+      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-r1-distill-qwen-32b",
+  "object": "model",
+  "name": "DeepSeek R1 Distill Qwen 32B Q4",
+  "version": "1.0",
+  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 131072,
+    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
+    "llama_model_path": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
+    "ngl": 65
+  },
+  "parameters": {
+    "temperature": 0.6,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 131072,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "DeepSeek",
+    "tags": ["32B", "Featured"],
+    "size": 19900000000
+  },
+  "engine": "llama-cpp"
+}
diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json
new file mode 100644
index 000000000..eae53cf0e
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json
@@ -0,0 +1,35 @@
+{
+  "sources": [
+    {
+      "filename": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
+      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-r1-distill-qwen-7b",
+  "object": "model",
+  "name": "DeepSeek R1 Distill Qwen 7B Q5",
+  "version": "1.0",
+  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 131072,
+    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
+    "llama_model_path": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
+    "ngl": 29
+  },
+  "parameters": {
+    "temperature": 0.6,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 131072,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "DeepSeek",
+    "tags": ["7B", "Featured"],
+    "size": 5440000000
+  },
+  "engine": "llama-cpp"
+}
diff --git a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
index ec9a0284b..1aeb80450 100644
--- a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
@@ -22,19 +22,13 @@
     "top_p": 0.95,
     "stream": true,
     "max_tokens": 8192,
-    "stop": [
-      "<|end_of_text|>",
-      "<|eot_id|>",
-      "<|eom_id|>"
-    ],
+    "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
     "frequency_penalty": 0,
     "presence_penalty": 0
   },
   "metadata": {
     "author": "MetaAI",
-    "tags": [
-      "8B", "Featured"
-    ],
+    "tags": ["8B", "Featured"],
     "size": 4920000000
   },
   "engine": "llama-cpp"
diff --git a/extensions/inference-cortex-extension/rolldown.config.mjs b/extensions/inference-cortex-extension/rolldown.config.mjs
index 278664d3d..aebd8ac38 100644
--- a/extensions/inference-cortex-extension/rolldown.config.mjs
+++ b/extensions/inference-cortex-extension/rolldown.config.mjs
@@ -49,6 +49,13 @@ import qwen2514bJson from './resources/models/qwen2.5-14b-instruct/model.json' w
 import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' }
 import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' }
 
+import deepseekR1DistillQwen_1_5b from './resources/models/deepseek-r1-distill-qwen-1.5b/model.json' with { type: 'json' }
+import deepseekR1DistillQwen_7b from './resources/models/deepseek-r1-distill-qwen-7b/model.json' with { type: 'json' }
+import deepseekR1DistillQwen_14b from './resources/models/deepseek-r1-distill-qwen-14b/model.json' with { type: 'json' }
+import deepseekR1DistillQwen_32b from './resources/models/deepseek-r1-distill-qwen-32b/model.json' with { type: 'json' }
+import deepseekR1DistillLlama_8b from './resources/models/deepseek-r1-distill-llama-8b/model.json' with { type: 'json' }
+import deepseekR1DistillLlama_70b from './resources/models/deepseek-r1-distill-llama-70b/model.json' with { type: 'json' }
+
 export default defineConfig([
   {
     input: 'src/index.ts',
@@ -106,6 +113,12 @@ export default defineConfig([
         qwen2514bJson,
         qwen2532bJson,
         qwen2572bJson,
+        deepseekR1DistillQwen_1_5b,
+        deepseekR1DistillQwen_7b,
+        deepseekR1DistillQwen_14b,
+        deepseekR1DistillQwen_32b,
+        deepseekR1DistillLlama_8b,
+        deepseekR1DistillLlama_70b,
       ]),
       NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
       SETTINGS: JSON.stringify(defaultSettingJson),