From 358d8f83f9405cc32df1a8d4847bf8e6b2d93bdc Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 2 Oct 2024 11:36:55 +0700 Subject: [PATCH 1/2] chore: add llama3.2 and qwen models --- .../inference-nitro-extension/package.json | 2 +- .../resources/models/command-r-34b/model.json | 4 +-- .../resources/models/gemma-2-27b/model.json | 5 ++- .../resources/models/gemma-2-2b/model.json | 5 ++- .../resources/models/gemma-2-9b/model.json | 5 ++- .../models/llama3.1-70b-instruct/model.json | 5 ++- .../models/llama3.1-8b-instruct/model.json | 5 ++- .../models/llama3.2-1b-instruct/model.json | 35 ++++++++++++++++++ .../models/llama3.2-3b-instruct/model.json | 35 ++++++++++++++++++ .../models/qwen2.5-14b-instruct/model.json | 36 +++++++++++++++++++ .../models/qwen2.5-32b-instruct/model.json | 36 +++++++++++++++++++ .../models/qwen2.5-72b-instruct/model.json | 36 +++++++++++++++++++ .../models/qwen2.5-7b-instruct/model.json | 36 +++++++++++++++++++ .../qwen2.5-coder-7b-instruct/model.json | 36 +++++++++++++++++++ .../rollup.config.ts | 14 ++++++++ .../ChatBody/OnDeviceStarterScreen/index.tsx | 5 +-- 16 files changed, 278 insertions(+), 22 deletions(-) create mode 100644 extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json create mode 100644 extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json create mode 100644 extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json create mode 100644 extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json create mode 100644 extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json create mode 100644 extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json create mode 100644 extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index f484b4511..42c31938e 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.18", + "version": "1.0.19", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json index d29e70a17..6b166eea5 100644 --- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json @@ -8,7 +8,7 @@ "id": "command-r-34b", "object": "model", "name": "Command-R v01 34B Q4", - "version": "1.5", + "version": "1.6", "description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.", "format": "gguf", "settings": { @@ -28,7 +28,7 @@ }, "metadata": { "author": "CohereAI", - "tags": ["34B", "Finetuned", "Featured"], + "tags": ["34B", "Finetuned"], "size": 21500000000 }, "engine": "nitro" diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json index bdf2d5c9c..4db74ac6f 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json +++ b/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json @@ -8,7 +8,7 @@ "id": "gemma-2-27b-it", "object": "model", "name": "Gemma 2 27B Q4", - "version": "1.0", + "version": "1.1", "description": "Gemma is built from the same technology with Google's Gemini.", "format": "gguf", "settings": { @@ -33,8 +33,7 @@ "tags": [ "27B", "Conversational", - "Text-generation", - "Featured" + "Text-generation" ], "size": 16600000000 }, diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json index 1665f76ee..d85759f9b 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json +++ b/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json @@ -8,7 +8,7 @@ "id": "gemma-2-2b-it", "object": "model", "name": "Gemma 2 2B Q4", - "version": "1.0", + "version": "1.1", "description": "Gemma is built from the same technology with Google's Gemini.", "format": "gguf", "settings": { @@ -34,8 +34,7 @@ "2B", "Tiny", "Conversational", - "Text-generation", - "Featured" + "Text-generation" ], "size": 1710000000 }, diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json index 42e7dcee2..8f6af15d9 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json +++ b/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json @@ -8,7 +8,7 @@ "id": "gemma-2-9b-it", "object": "model", "name": "Gemma 2 9B Q4", - "version": "1.0", + "version": "1.1", "description": "Gemma is built from the same technology with Google's Gemini.", "format": "gguf", "settings": { @@ -33,8 +33,7 @@ "tags": [ "9B", "Conversational", - "Text-generation", - "Featured" + "Text-generation" ], "size": 5760000000 }, diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json index 780ef8465..1f4931e11 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json @@ -8,7 +8,7 @@ "id": "llama3.1-70b-instruct", "object": "model", "name": "Llama 3.1 70B Instruct Q4", - "version": "1.1", + "version": "1.2", "description": "Meta's Llama 3.1 excels at general usage situations, including chat, general world knowledge, and coding.", "format": "gguf", "settings": { @@ -33,8 +33,7 @@ "metadata": { "author": "MetaAI", "tags": [ - "70B", - "Featured" + "70B" ], "size": 42500000000 }, diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json index 9de8555be..4b21534bc 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json @@ -8,7 +8,7 @@ "id": "llama3.1-8b-instruct", "object": "model", "name": "Llama 3.1 8B Instruct Q4", - "version": "1.1", + "version": "1.2", "description": "Meta's Llama 3.1 excels at general usage situations, including chat, general world knowledge, and coding.", "format": "gguf", "settings": { @@ -33,8 +33,7 @@ "metadata": { "author": "MetaAI", "tags": [ - "8B", - "Featured" + "8B", "Featured" ], "size": 4920000000 }, diff --git a/extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json new file mode 100644 index 000000000..5be08409d --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "Llama-3.2-1B-Instruct-Q8_0.gguf", + "url": "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf" + } + ], + "id": "llama3.2-1b-instruct", + "object": "model", + "name": "Llama 3.2 1B Instruct Q8", + "version": "1.0", + "description": "Meta's Llama 3.2 excels at general usage situations, including chat, general world knowledge, and coding.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", + "llama_model_path": "Llama-3.2-1B-Instruct-Q8_0.gguf", + "ngl": 33 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 8192, + "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "MetaAI", + "tags": ["1B", "Featured"], + "size": 1320000000 + }, + "engine": "nitro" +} diff --git a/extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json new file mode 100644 index 000000000..aacb3f0f8 --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "Llama-3.2-3B-Instruct-Q8_0.gguf", + "url": "https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q8_0.gguf" + } + ], + "id": "llama3.2-3b-instruct", + "object": "model", + "name": "Llama 3.2 3B Instruct Q8", + "version": "1.0", + "description": "Meta's Llama 3.2 excels at general usage situations, including chat, general world knowledge, and coding.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", + "llama_model_path": "Llama-3.2-3B-Instruct-Q8_0.gguf", + "ngl": 33 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 8192, + "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "MetaAI", + "tags": ["3B", "Featured"], + "size": 3420000000 + }, + "engine": "nitro" +} diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json new file mode 100644 index 000000000..f194b3f16 --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json @@ -0,0 +1,36 @@ +{ + "sources": [ + { + "filename": "Qwen2.5-14B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF/resolve/main/Qwen2.5-14B-Instruct-Q4_K_M.gguf" + } + ], + "id": "qwen2.5-14b-instruct", + "object": "model", + "name": "Qwen2.5 14B Instruct Q4", + "version": "1.0", + "description": "The Qwen 2.5 family is specifically designed to excel in math, coding, and other computational tasks", + "format": "gguf", + "settings": { + "ctx_len": 32768, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "Qwen2.5-14B-Instruct-Q4_K_M.gguf", + "ngl": 49 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 32768, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "QwenLM", + "tags": ["14B", "Featured"], + "size": 8990000000 + }, + "engine": "nitro" + } + \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json new file mode 100644 index 000000000..7b9b1ab88 --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json @@ -0,0 +1,36 @@ +{ + "sources": [ + { + "filename": "Qwen2.5-32B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Qwen2.5-32B-Instruct-GGUF/resolve/main/Qwen2.5-32B-Instruct-Q4_K_M.gguf" + } + ], + "id": "qwen2.5-32b-instruct", + "object": "model", + "name": "Qwen2.5 32B Instruct Q4", + "version": "1.0", + "description": "The Qwen 2.5 family is specifically designed to excel in math, coding, and other computational tasks", + "format": "gguf", + "settings": { + "ctx_len": 32768, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "Qwen2.5-32B-Instruct-Q4_K_M.gguf", + "ngl": 65 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 32768, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "QwenLM", + "tags": ["32B"], + "size": 19900000000 + }, + "engine": "nitro" + } + \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json new file mode 100644 index 000000000..73e000406 --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json @@ -0,0 +1,36 @@ +{ + "sources": [ + { + "filename": "Qwen2.5-72B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Qwen2.5-72B-Instruct-GGUF/resolve/main/Qwen2.5-72B-Instruct-Q4_K_M.gguf" + } + ], + "id": "qwen2.5-72b-instruct", + "object": "model", + "name": "Qwen2.5 72B Instruct Q4", + "version": "1.0", + "description": "The Qwen 2.5 family is specifically designed to excel in math, coding, and other computational tasks", + "format": "gguf", + "settings": { + "ctx_len": 32768, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "Qwen2.5-72B-Instruct-Q4_K_M.gguf", + "ngl": 81 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 32768, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "QwenLM", + "tags": ["72B"], + "size": 47400000000 + }, + "engine": "nitro" + } + \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json new file mode 100644 index 000000000..2d0112bdb --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json @@ -0,0 +1,36 @@ +{ + "sources": [ + { + "filename": "Qwen2.5-7B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Qwen2.5-7B-Instruct-GGUF/resolve/main/Qwen2.5-7B-Instruct-Q4_K_M.gguf" + } + ], + "id": "qwen2.5-7b-instruct", + "object": "model", + "name": "Qwen2.5 7B Instruct Q4", + "version": "1.0", + "description": "The Qwen 2.5 family is specifically designed to excel in math, coding, and other computational tasks", + "format": "gguf", + "settings": { + "ctx_len": 32768, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "Qwen2.5-7B-Instruct-Q4_K_M.gguf", + "ngl": 29 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 32768, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "QwenLM", + "tags": ["7B", "Featured"], + "size": 4680000000 + }, + "engine": "nitro" + } + \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json new file mode 100644 index 000000000..23b58d163 --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json @@ -0,0 +1,36 @@ +{ + "sources": [ + { + "filename": "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf" + } + ], + "id": "qwen2.5-coder-7b-instruct", + "object": "model", + "name": "Qwen2.5 Coder 7B Instruct Q4", + "version": "1.0", + "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.", + "format": "gguf", + "settings": { + "ctx_len": 32768, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf", + "ngl": 29 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 32768, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "QwenLM", + "tags": ["7B", "Featured"], + "size": 4680000000 + }, + "engine": "nitro" + } + \ No newline at end of file diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts index 4e1731a09..1a8badb6f 100644 --- a/extensions/inference-nitro-extension/rollup.config.ts +++ b/extensions/inference-nitro-extension/rollup.config.ts @@ -45,6 +45,13 @@ const llama3170bJson = require('./resources/models/llama3.1-70b-instruct/model.j const gemma22bJson = require('./resources/models/gemma-2-2b/model.json') const gemma29bJson = require('./resources/models/gemma-2-9b/model.json') const gemma227bJson = require('./resources/models/gemma-2-27b/model.json') +const llama321bJson = require('./resources/models/llama3.2-1b-instruct/model.json') +const llama323bJson = require('./resources/models/llama3.2-3b-instruct/model.json') +const qwen257bJson = require('./resources/models/qwen2.5-7b-instruct/model.json') +const qwen25coder7bJson = require('./resources/models/qwen2.5-coder-7b-instruct/model.json') +const qwen2514bJson = require('./resources/models/qwen2.5-14b-instruct/model.json') +const qwen2532bJson = require('./resources/models/qwen2.5-32b-instruct/model.json') +const qwen2572bJson = require('./resources/models/qwen2.5-72b-instruct/model.json') export default [ { @@ -97,6 +104,13 @@ export default [ gemma22bJson, gemma29bJson, gemma227bJson, + llama321bJson, + llama323bJson, + qwen257bJson, + qwen25coder7bJson, + qwen2514bJson, + qwen2532bJson, + qwen2572bJson, ]), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx index b1e9d081a..0ef9a9ba1 100644 --- a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx +++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx @@ -58,7 +58,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => { const configuredModels = useAtomValue(configuredModelsAtom) const setMainViewState = useSetAtom(mainViewStateAtom) - const recommendModel = ['gemma-2-2b-it', 'llama3.1-8b-instruct'] + const recommendModel = ['llama3.2-1b-instruct', 'llama3.2-3b-instruct'] const featuredModel = configuredModels.filter((x) => { const manualRecommendModel = configuredModels.filter((x) => @@ -219,9 +219,6 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => { >
{featModel.name}
-

- {featModel.metadata.author} -

{isDownloading ? ( From 6082959d17fcafefe08dbbc5083bdf2f69012b2d Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 2 Oct 2024 11:46:42 +0700 Subject: [PATCH 2/2] chore: update models stop token --- .../resources/models/qwen2.5-14b-instruct/model.json | 2 +- .../resources/models/qwen2.5-32b-instruct/model.json | 2 +- .../resources/models/qwen2.5-72b-instruct/model.json | 2 +- .../resources/models/qwen2.5-7b-instruct/model.json | 2 +- .../resources/models/qwen2.5-coder-7b-instruct/model.json | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json index f194b3f16..04913b874 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json @@ -22,7 +22,7 @@ "top_p": 0.95, "stream": true, "max_tokens": 32768, - "stop": [], + "stop": ["<|endoftext|>", "<|im_end|>"], "frequency_penalty": 0, "presence_penalty": 0 }, diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json index 7b9b1ab88..43ba30c56 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json @@ -22,7 +22,7 @@ "top_p": 0.95, "stream": true, "max_tokens": 32768, - "stop": [], + "stop": ["<|endoftext|>", "<|im_end|>"], "frequency_penalty": 0, "presence_penalty": 0 }, diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json index 73e000406..1852a0909 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json @@ -22,7 +22,7 @@ "top_p": 0.95, "stream": true, "max_tokens": 32768, - "stop": [], + "stop": ["<|endoftext|>", "<|im_end|>"], "frequency_penalty": 0, "presence_penalty": 0 }, diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json index 2d0112bdb..b47511f96 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json @@ -22,7 +22,7 @@ "top_p": 0.95, "stream": true, "max_tokens": 32768, - "stop": [], + "stop": ["<|endoftext|>", "<|im_end|>"], "frequency_penalty": 0, "presence_penalty": 0 }, diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json index 23b58d163..2f1080b2c 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json @@ -22,7 +22,7 @@ "top_p": 0.95, "stream": true, "max_tokens": 32768, - "stop": [], + "stop": ["<|endoftext|>", "<|im_end|>"], "frequency_penalty": 0, "presence_penalty": 0 },