From c32c9e1dcb515f8e75043d12612086739e028b71 Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 5 Feb 2025 23:10:40 +0700 Subject: [PATCH] chore: add DeepSeek R1 Distill models to Hub (#4586) --- .../inference-cortex-extension/package.json | 2 +- .../deepseek-r1-distill-llama-70b/model.json | 35 +++++++++++++++++++ .../deepseek-r1-distill-llama-8b/model.json | 35 +++++++++++++++++++ .../deepseek-r1-distill-qwen-1.5b/model.json | 35 +++++++++++++++++++ .../deepseek-r1-distill-qwen-14b/model.json | 35 +++++++++++++++++++ .../deepseek-r1-distill-qwen-32b/model.json | 35 +++++++++++++++++++ .../deepseek-r1-distill-qwen-7b/model.json | 35 +++++++++++++++++++ .../models/llama3.1-8b-instruct/model.json | 10 ++---- .../rolldown.config.mjs | 13 +++++++ 9 files changed, 226 insertions(+), 9 deletions(-) create mode 100644 extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json create mode 100644 extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json create mode 100644 extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json create mode 100644 extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json create mode 100644 extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json create mode 100644 extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json diff --git a/extensions/inference-cortex-extension/package.json b/extensions/inference-cortex-extension/package.json index f191f3071..a4558dc8f 100644 --- a/extensions/inference-cortex-extension/package.json +++ b/extensions/inference-cortex-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.24", + "version": "1.0.25", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json new file mode 100644 index 000000000..7f98b07a1 --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf", + "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf" + } + ], + "id": "deepseek-r1-distill-llama-70b", + "object": "model", + "name": "DeepSeek R1 Distill Llama 70B Q4", + "version": "1.0", + "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|User|> {prompt} <|Assistant|>", + "llama_model_path": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf", + "ngl": 81 + }, + "parameters": { + "temperature": 0.6, + "top_p": 0.95, + "stream": true, + "max_tokens": 131072, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "DeepSeek", + "tags": ["70B", "Featured"], + "size": 42500000000 + }, + "engine": "llama-cpp" +} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json new file mode 100644 index 000000000..a3a075888 --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf", + "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf" + } + ], + "id": "deepseek-r1-distill-llama-8b", + "object": "model", + "name": "DeepSeek R1 Distill Llama 8B Q5", + "version": "1.0", + "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|User|> {prompt} <|Assistant|>", + "llama_model_path": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf", + "ngl": 33 + }, + "parameters": { + "temperature": 0.6, + "top_p": 0.95, + "stream": true, + "max_tokens": 131072, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "DeepSeek", + "tags": ["8B", "Featured"], + "size": 5730000000 + }, + "engine": "llama-cpp" +} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json new file mode 100644 index 000000000..74b3dfc54 --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf", + "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf" + } + ], + "id": "deepseek-r1-distill-qwen-1.5b", + "object": "model", + "name": "DeepSeek R1 Distill Qwen 1.5B Q5", + "version": "1.0", + "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|User|> {prompt} <|Assistant|>", + "llama_model_path": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf", + "ngl": 29 + }, + "parameters": { + "temperature": 0.6, + "top_p": 0.95, + "stream": true, + "max_tokens": 131072, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "DeepSeek", + "tags": ["1.5B", "Featured"], + "size": 1290000000 + }, + "engine": "llama-cpp" +} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json new file mode 100644 index 000000000..594ba6e41 --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf", + "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf" + } + ], + "id": "deepseek-r1-distill-qwen-14b", + "object": "model", + "name": "DeepSeek R1 Distill Qwen 14B Q4", + "version": "1.0", + "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|User|> {prompt} <|Assistant|>", + "llama_model_path": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf", + "ngl": 49 + }, + "parameters": { + "temperature": 0.6, + "top_p": 0.95, + "stream": true, + "max_tokens": 131072, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "DeepSeek", + "tags": ["14B", "Featured"], + "size": 8990000000 + }, + "engine": "llama-cpp" +} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json new file mode 100644 index 000000000..6d7f5accf --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf", + "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf" + } + ], + "id": "deepseek-r1-distill-qwen-32b", + "object": "model", + "name": "DeepSeek R1 Distill Qwen 32B Q4", + "version": "1.0", + "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|User|> {prompt} <|Assistant|>", + "llama_model_path": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf", + "ngl": 65 + }, + "parameters": { + "temperature": 0.6, + "top_p": 0.95, + "stream": true, + "max_tokens": 131072, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "DeepSeek", + "tags": ["32B", "Featured"], + "size": 19900000000 + }, + "engine": "llama-cpp" +} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json new file mode 100644 index 000000000..eae53cf0e --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf", + "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf" + } + ], + "id": "deepseek-r1-distill-qwen-7b", + "object": "model", + "name": "DeepSeek R1 Distill Qwen 7B Q5", + "version": "1.0", + "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|User|> {prompt} <|Assistant|>", + "llama_model_path": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf", + "ngl": 29 + }, + "parameters": { + "temperature": 0.6, + "top_p": 0.95, + "stream": true, + "max_tokens": 131072, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "DeepSeek", + "tags": ["7B", "Featured"], + "size": 5440000000 + }, + "engine": "llama-cpp" +} diff --git a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json index ec9a0284b..1aeb80450 100644 --- a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json @@ -22,19 +22,13 @@ "top_p": 0.95, "stream": true, "max_tokens": 8192, - "stop": [ - "<|end_of_text|>", - "<|eot_id|>", - "<|eom_id|>" - ], + "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"], "frequency_penalty": 0, "presence_penalty": 0 }, "metadata": { "author": "MetaAI", - "tags": [ - "8B", "Featured" - ], + "tags": ["8B", "Featured"], "size": 4920000000 }, "engine": "llama-cpp" diff --git a/extensions/inference-cortex-extension/rolldown.config.mjs b/extensions/inference-cortex-extension/rolldown.config.mjs index 278664d3d..aebd8ac38 100644 --- a/extensions/inference-cortex-extension/rolldown.config.mjs +++ b/extensions/inference-cortex-extension/rolldown.config.mjs @@ -49,6 +49,13 @@ import qwen2514bJson from './resources/models/qwen2.5-14b-instruct/model.json' w import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' } import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' } +import deepseekR1DistillQwen_1_5b from './resources/models/deepseek-r1-distill-qwen-1.5b/model.json' with { type: 'json' } +import deepseekR1DistillQwen_7b from './resources/models/deepseek-r1-distill-qwen-7b/model.json' with { type: 'json' } +import deepseekR1DistillQwen_14b from './resources/models/deepseek-r1-distill-qwen-14b/model.json' with { type: 'json' } +import deepseekR1DistillQwen_32b from './resources/models/deepseek-r1-distill-qwen-32b/model.json' with { type: 'json' } +import deepseekR1DistillLlama_8b from './resources/models/deepseek-r1-distill-llama-8b/model.json' with { type: 'json' } +import deepseekR1DistillLlama_70b from './resources/models/deepseek-r1-distill-llama-70b/model.json' with { type: 'json' } + export default defineConfig([ { input: 'src/index.ts', @@ -106,6 +113,12 @@ export default defineConfig([ qwen2514bJson, qwen2532bJson, qwen2572bJson, + deepseekR1DistillQwen_1_5b, + deepseekR1DistillQwen_7b, + deepseekR1DistillQwen_14b, + deepseekR1DistillQwen_32b, + deepseekR1DistillLlama_8b, + deepseekR1DistillLlama_70b, ]), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), SETTINGS: JSON.stringify(defaultSettingJson),