diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index ac3ed180a..f484b4511 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.17", + "version": "1.0.18", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json b/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json index 36fceaad2..4d825cfeb 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json +++ b/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json @@ -8,7 +8,7 @@ "id": "deepseek-coder-1.3b", "object": "model", "name": "Deepseek Coder 1.3B Instruct Q8", - "version": "1.3", + "version": "1.4", "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", "format": "gguf", "settings": { @@ -22,13 +22,13 @@ "top_p": 0.95, "stream": true, "max_tokens": 16384, - "stop": [], + "stop": ["<|EOT|>"], "frequency_penalty": 0, "presence_penalty": 0 }, "metadata": { "author": "Deepseek, The Bloke", - "tags": ["Tiny", "Foundational Model"], + "tags": ["Tiny"], "size": 1430000000 }, "engine": "nitro" diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json b/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json index 103c4cbcb..e87d6a643 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json @@ -2,13 +2,13 @@ "sources": [ { "filename": "deepseek-coder-33b-instruct.Q4_K_M.gguf", - "url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q4_K_M.gguf" + "url": "https://huggingface.co/mradermacher/deepseek-coder-33b-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q4_K_M.gguf" } ], "id": "deepseek-coder-34b", "object": "model", "name": "Deepseek Coder 33B Instruct Q4", - "version": "1.3", + "version": "1.4", "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", "format": "gguf", "settings": { @@ -22,13 +22,13 @@ "top_p": 0.95, "stream": true, "max_tokens": 16384, - "stop": [], + "stop": ["<|EOT|>"], "frequency_penalty": 0, "presence_penalty": 0 }, "metadata": { - "author": "Deepseek, The Bloke", - "tags": ["34B", "Foundational Model"], + "author": "Deepseek", + "tags": ["33B"], "size": 19940000000 }, "engine": "nitro"