From cd4f72dd31a409bd2aa955c69504da9526a570ec Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 12 Nov 2024 13:46:13 +0700 Subject: [PATCH] chore: add qwen2.5-coder 14B and 32B models --- .../inference-cortex-extension/package.json | 2 +- .../qwen2.5-coder-14b-instruct/model.json | 36 +++++++++++++++++++ .../qwen2.5-coder-32b-instruct/model.json | 36 +++++++++++++++++++ .../rollup.config.ts | 4 +++ 4 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json create mode 100644 extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json diff --git a/extensions/inference-cortex-extension/package.json b/extensions/inference-cortex-extension/package.json index 5a9fc56e9..d262ad5ec 100644 --- a/extensions/inference-cortex-extension/package.json +++ b/extensions/inference-cortex-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.20", + "version": "1.0.21", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json new file mode 100644 index 000000000..a445ee2db --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json @@ -0,0 +1,36 @@ +{ + "sources": [ + { + "filename": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf" + } + ], + "id": "qwen2.5-coder-14b-instruct", + "object": "model", + "name": "Qwen2.5 Coder 14B Instruct Q4", + "version": "1.0", + "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.", + "format": "gguf", + "settings": { + "ctx_len": 32768, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf", + "ngl": 29 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 32768, + "stop": ["<|endoftext|>", "<|im_end|>"], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "QwenLM", + "tags": ["14B", "Featured"], + "size": 8990000000 + }, + "engine": "llama-cpp" + } + \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json new file mode 100644 index 000000000..cffdf03df --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json @@ -0,0 +1,36 @@ +{ + "sources": [ + { + "filename": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf" + } + ], + "id": "qwen2.5-coder-32b-instruct", + "object": "model", + "name": "Qwen2.5 Coder 32B Instruct Q4", + "version": "1.0", + "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.", + "format": "gguf", + "settings": { + "ctx_len": 32768, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf", + "ngl": 29 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 32768, + "stop": ["<|endoftext|>", "<|im_end|>"], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "QwenLM", + "tags": ["32B", "Featured"], + "size": 19900000000 + }, + "engine": "llama-cpp" + } + \ No newline at end of file diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts index ea873990b..d7dc8d40a 100644 --- a/extensions/inference-cortex-extension/rollup.config.ts +++ b/extensions/inference-cortex-extension/rollup.config.ts @@ -49,6 +49,8 @@ const llama321bJson = require('./resources/models/llama3.2-1b-instruct/model.jso const llama323bJson = require('./resources/models/llama3.2-3b-instruct/model.json') const qwen257bJson = require('./resources/models/qwen2.5-7b-instruct/model.json') const qwen25coder7bJson = require('./resources/models/qwen2.5-coder-7b-instruct/model.json') +const qwen25coder14bJson = require('./resources/models/qwen2.5-coder-14b-instruct/model.json') +const qwen25coder32bJson = require('./resources/models/qwen2.5-coder-32b-instruct/model.json') const qwen2514bJson = require('./resources/models/qwen2.5-14b-instruct/model.json') const qwen2532bJson = require('./resources/models/qwen2.5-32b-instruct/model.json') const qwen2572bJson = require('./resources/models/qwen2.5-72b-instruct/model.json') @@ -108,6 +110,8 @@ export default [ llama323bJson, qwen257bJson, qwen25coder7bJson, + qwen25coder14bJson, + qwen25coder32bJson, qwen2514bJson, qwen2532bJson, qwen2572bJson,