Chore: Model Hub update (#2966)

* fix: correct size

* version bump

* add: codestral 22b

* add: codestral 22b

* versino bump

* upgrade to v3

* Update stop token default-model.json 

confirmed with Rex

* fix: whitespace

---------

Co-authored-by: Van Pham <64197333+Van-QA@users.noreply.github.com>
This commit is contained in:
Hoang Ha 2024-05-30 12:33:47 +07:00 committed by GitHub
parent b662c25007
commit bd5a0ea8ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 48 additions and 10 deletions

View File

@ -1,7 +1,7 @@
{ {
"name": "@janhq/inference-cortex-extension", "name": "@janhq/inference-cortex-extension",
"productName": "Cortex Inference Engine", "productName": "Cortex Inference Engine",
"version": "1.0.10", "version": "1.0.11",
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js", "main": "dist/index.js",
"node": "dist/node/index.cjs.js", "node": "dist/node/index.cjs.js",

View File

@ -0,0 +1,36 @@
{
"sources": [
{
"filename": "Codestral-22B-v0.1-Q4_K_M.gguf",
"url": "https://huggingface.co/bartowski/Codestral-22B-v0.1-GGUF/resolve/main/Codestral-22B-v0.1-Q4_K_M.gguf"
}
],
"id": "codestral-22b",
"object": "model",
"name": "Codestral 22B Q4",
"version": "1.0",
"description": "Latest model from MistralAI optimized for code generation tasks.",
"format": "gguf",
"settings": {
"ctx_len": 32000,
"prompt_template": "{system_message} [INST] {prompt} [/INST]",
"llama_model_path": "Codestral-22B-v0.1-Q4_K_M.gguf",
"ngl": 56
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 32000,
"stop": ["<endofstring>, [/INST]"],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "MistralAI",
"tags": ["22B", "Finetuned", "Featured"],
"size": 13341237440
},
"engine": "nitro"
}

View File

@ -1,20 +1,20 @@
{ {
"sources": [ "sources": [
{ {
"filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf", "filename": "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
"url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf" "url": "https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF/resolve/main/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf"
} }
], ],
"id": "mistral-ins-7b-q4", "id": "mistral-ins-7b-q4",
"object": "model", "object": "model",
"name": "Mistral Instruct 7B Q4", "name": "Mistral Instruct 7B Q4",
"version": "1.1", "version": "1.2",
"description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding of the world.", "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding of the world.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 32768, "ctx_len": 32768,
"prompt_template": "[INST] {prompt} [/INST]", "prompt_template": "{system_message} [INST] {prompt} [/INST]",
"llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf", "llama_model_path": "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
"ngl": 32 "ngl": 32
}, },
"parameters": { "parameters": {

View File

@ -8,7 +8,7 @@
"id": "phi3-medium", "id": "phi3-medium",
"object": "model", "object": "model",
"name": "Phi-3 Medium", "name": "Phi-3 Medium",
"version": "1.0", "version": "1.1",
"description": "Phi-3 Medium is Microsoft's latest SOTA model.", "description": "Phi-3 Medium is Microsoft's latest SOTA model.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
@ -29,7 +29,7 @@
"metadata": { "metadata": {
"author": "Microsoft", "author": "Microsoft",
"tags": [ "tags": [
"7B", "14B",
"Finetuned" "Finetuned"
], ],
"size": 8366000000 "size": 8366000000

View File

@ -38,6 +38,7 @@ const llama3Hermes8bJson = require('./resources/models/llama3-hermes-8b/model.js
const aya8bJson = require('./resources/models/aya-23-8b/model.json') const aya8bJson = require('./resources/models/aya-23-8b/model.json')
const aya35bJson = require('./resources/models/aya-23-35b/model.json') const aya35bJson = require('./resources/models/aya-23-35b/model.json')
const phimediumJson = require('./resources/models/phi3-medium/model.json') const phimediumJson = require('./resources/models/phi3-medium/model.json')
const codestralJson = require('./resources/models/codestral-22b/model.json')
export default [ export default [
{ {
@ -82,7 +83,8 @@ export default [
llama3Hermes8bJson, llama3Hermes8bJson,
phimediumJson, phimediumJson,
aya8bJson, aya8bJson,
aya35bJson aya35bJson,
codestralJson
]), ]),
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),

View File

@ -23,7 +23,7 @@
"top_p": 0.95, "top_p": 0.95,
"stream": true, "stream": true,
"max_tokens": 2048, "max_tokens": 2048,
"stop": ["<endofstring>"], "stop": ["<|END_OF_TURN_TOKEN|>", "<end_of_turn>", "[/INST]", "<|end_of_text|>", "<|eot_id|>", "<|im_end|>", "<|end|>"],
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0 "presence_penalty": 0
}, },