Chore: Model Hub update (#2966)

* fix: correct size * version bump * add: codestral 22b * add: codestral 22b * versino bump * upgrade to v3 * Update stop token default-model.json confirmed with Rex * fix: whitespace --------- Co-authored-by: Van Pham <64197333+Van-QA@users.noreply.github.com>
2024-05-30 12:33:47 +07:00 · 2024-05-30 12:33:47 +07:00 · bd5a0ea8ab
commit bd5a0ea8ab
parent b662c25007
6 changed files with 48 additions and 10 deletions
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/inference-cortex-extension",
  "productName": "Cortex Inference Engine",
-  "version": "1.0.10",
+  "version": "1.0.11",
  "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
--- a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json
@ -0,0 +1,36 @@
+{
+    "sources": [
+      {
+        "filename": "Codestral-22B-v0.1-Q4_K_M.gguf",
+        "url": "https://huggingface.co/bartowski/Codestral-22B-v0.1-GGUF/resolve/main/Codestral-22B-v0.1-Q4_K_M.gguf"
+      }
+    ],
+    "id": "codestral-22b",
+    "object": "model",
+    "name": "Codestral 22B Q4",
+    "version": "1.0",
+    "description": "Latest model from MistralAI optimized for code generation tasks.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 32000,
+      "prompt_template": "{system_message} [INST] {prompt} [/INST]",
+      "llama_model_path": "Codestral-22B-v0.1-Q4_K_M.gguf",
+      "ngl": 56
+    },
+    "parameters": {
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 32000,
+      "stop": ["<endofstring>, [/INST]"],
+      "frequency_penalty": 0,
+      "presence_penalty": 0
+    },
+    "metadata": {
+      "author": "MistralAI",
+      "tags": ["22B", "Finetuned", "Featured"],
+      "size": 13341237440
+    },
+    "engine": "nitro"
+  }
+  
--- a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json
+++ b/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json
@ -1,20 +1,20 @@
 {
  "sources": [
    {
-      "filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
-      "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+      "filename": "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
+      "url": "https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF/resolve/main/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf"
    }
  ],
  "id": "mistral-ins-7b-q4",
  "object": "model",
  "name": "Mistral Instruct 7B Q4",
-  "version": "1.1",
+  "version": "1.2",
  "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding of the world.",
  "format": "gguf",
  "settings": {
    "ctx_len": 32768,
-    "prompt_template": "[INST] {prompt} [/INST]",
-    "llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
+    "prompt_template": "{system_message} [INST] {prompt} [/INST]",
+    "llama_model_path": "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
    "ngl": 32
  },
  "parameters": {
--- a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json
+++ b/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json
@ -8,7 +8,7 @@
    "id": "phi3-medium",
    "object": "model",
    "name": "Phi-3 Medium",
-    "version": "1.0",
+    "version": "1.1",
    "description": "Phi-3 Medium is Microsoft's latest SOTA model.",
    "format": "gguf",
    "settings": {
@ -29,7 +29,7 @@
    "metadata": {
      "author": "Microsoft",
      "tags": [
-        "7B",
+        "14B",
        "Finetuned"
      ],
      "size": 8366000000
--- a/extensions/inference-nitro-extension/rollup.config.ts
+++ b/extensions/inference-nitro-extension/rollup.config.ts
@ -38,6 +38,7 @@ const llama3Hermes8bJson = require('./resources/models/llama3-hermes-8b/model.js
 const aya8bJson = require('./resources/models/aya-23-8b/model.json')
 const aya35bJson = require('./resources/models/aya-23-35b/model.json')
 const phimediumJson = require('./resources/models/phi3-medium/model.json')
+const codestralJson = require('./resources/models/codestral-22b/model.json')

 export default [
  {
@ -82,7 +83,8 @@ export default [
          llama3Hermes8bJson,
          phimediumJson,
          aya8bJson,
-          aya35bJson
+          aya35bJson,
+          codestralJson
        ]),
        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
        DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
--- a/extensions/model-extension/resources/default-model.json
+++ b/extensions/model-extension/resources/default-model.json
@ -23,7 +23,7 @@
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 2048,
-    "stop": ["<endofstring>"],
+    "stop": ["<|END_OF_TURN_TOKEN|>", "<end_of_turn>", "[/INST]", "<|end_of_text|>", "<|eot_id|>", "<|im_end|>", "<|end|>"],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },