From cd4f72dd31a409bd2aa955c69504da9526a570ec Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 12 Nov 2024 13:46:13 +0700
Subject: [PATCH] chore: add qwen2.5-coder 14B and 32B models

---
 .../inference-cortex-extension/package.json   |  2 +-
 .../qwen2.5-coder-14b-instruct/model.json     | 36 +++++++++++++++++++
 .../qwen2.5-coder-32b-instruct/model.json     | 36 +++++++++++++++++++
 .../rollup.config.ts                          |  4 +++
 4 files changed, 77 insertions(+), 1 deletion(-)
 create mode 100644 extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json
 create mode 100644 extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json

diff --git a/extensions/inference-cortex-extension/package.json b/extensions/inference-cortex-extension/package.json
index 5a9fc56e9..d262ad5ec 100644
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@janhq/inference-cortex-extension",
   "productName": "Cortex Inference Engine",
-  "version": "1.0.20",
+  "version": "1.0.21",
   "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
   "main": "dist/index.js",
   "node": "dist/node/index.cjs.js",
diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json
new file mode 100644
index 000000000..a445ee2db
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json
@@ -0,0 +1,36 @@
+{
+    "sources": [
+      {
+        "filename": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf",
+        "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf"
+      }
+    ],
+    "id": "qwen2.5-coder-14b-instruct",
+    "object": "model",
+    "name": "Qwen2.5 Coder 14B Instruct Q4",
+    "version": "1.0",
+    "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 32768,
+      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+      "llama_model_path": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf",
+      "ngl": 29
+    },
+    "parameters": {
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 32768,
+      "stop": ["<|endoftext|>", "<|im_end|>"],
+      "frequency_penalty": 0,
+      "presence_penalty": 0
+    },
+    "metadata": {
+      "author": "QwenLM",
+      "tags": ["14B", "Featured"],
+      "size": 8990000000
+    },
+    "engine": "llama-cpp"
+  }
+  
\ No newline at end of file
diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json
new file mode 100644
index 000000000..cffdf03df
--- /dev/null
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json
@@ -0,0 +1,36 @@
+{
+    "sources": [
+      {
+        "filename": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf",
+        "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf"
+      }
+    ],
+    "id": "qwen2.5-coder-32b-instruct",
+    "object": "model",
+    "name": "Qwen2.5 Coder 32B Instruct Q4",
+    "version": "1.0",
+    "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 32768,
+      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+      "llama_model_path": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf",
+      "ngl": 29
+    },
+    "parameters": {
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 32768,
+      "stop": ["<|endoftext|>", "<|im_end|>"],
+      "frequency_penalty": 0,
+      "presence_penalty": 0
+    },
+    "metadata": {
+      "author": "QwenLM",
+      "tags": ["32B", "Featured"],
+      "size": 19900000000
+    },
+    "engine": "llama-cpp"
+  }
+  
\ No newline at end of file
diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts
index ea873990b..d7dc8d40a 100644
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@@ -49,6 +49,8 @@ const llama321bJson = require('./resources/models/llama3.2-1b-instruct/model.jso
 const llama323bJson = require('./resources/models/llama3.2-3b-instruct/model.json')
 const qwen257bJson = require('./resources/models/qwen2.5-7b-instruct/model.json')
 const qwen25coder7bJson = require('./resources/models/qwen2.5-coder-7b-instruct/model.json')
+const qwen25coder14bJson = require('./resources/models/qwen2.5-coder-14b-instruct/model.json')
+const qwen25coder32bJson = require('./resources/models/qwen2.5-coder-32b-instruct/model.json')
 const qwen2514bJson = require('./resources/models/qwen2.5-14b-instruct/model.json')
 const qwen2532bJson = require('./resources/models/qwen2.5-32b-instruct/model.json')
 const qwen2572bJson = require('./resources/models/qwen2.5-72b-instruct/model.json')
@@ -108,6 +110,8 @@ export default [
           llama323bJson,
           qwen257bJson,
           qwen25coder7bJson,
+          qwen25coder14bJson,
+          qwen25coder32bJson,
           qwen2514bJson,
           qwen2532bJson,
           qwen2572bJson,