From 1c5b6355d99bd9e090e5080589b493d55d84f8ad Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 27 Aug 2024 16:45:19 +0700 Subject: [PATCH] chore: add llama 3.1 8B gguf model (#3468) * chore: add llama 3.1 8B gguf model * chore: add llama3.1 70B model * chore: add models to rollup * chore: fix tag * chore: fix size * fix: 8b model * Chore/add gemma2 model (#3471) * feat: add gemma 2 * feat: add gemma 2 * feat: correct ngl --------- Co-authored-by: Van QA * feat: add featured tag --------- Co-authored-by: Van Pham <64197333+Van-QA@users.noreply.github.com> Co-authored-by: Van QA --- .../inference-nitro-extension/package.json | 2 +- .../{gemma-2b => gemma-1.1-2b}/model.json | 12 +++--- .../{gemma-7b => gemma-1.1-7b}/model.json | 10 ++--- .../resources/models/gemma-2-27b/model.json | 42 ++++++++++++++++++ .../resources/models/gemma-2-2b/model.json | 43 +++++++++++++++++++ .../resources/models/gemma-2-9b/model.json | 42 ++++++++++++++++++ .../models/llama3-8b-instruct/model.json | 4 +- .../models/llama3.1-70b-instruct/model.json | 42 ++++++++++++++++++ .../models/llama3.1-8b-instruct/model.json | 42 ++++++++++++++++++ .../rollup.config.ts | 21 ++++++--- 10 files changed, 240 insertions(+), 20 deletions(-) rename extensions/inference-nitro-extension/resources/models/{gemma-2b => gemma-1.1-2b}/model.json (68%) rename extensions/inference-nitro-extension/resources/models/{gemma-7b => gemma-1.1-7b}/model.json (69%) create mode 100644 extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json create mode 100644 extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json create mode 100644 extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json create mode 100644 extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json create mode 100644 extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 24c887024..7be4be69a 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.14", + "version": "1.0.15", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json similarity index 68% rename from extensions/inference-nitro-extension/resources/models/gemma-2b/model.json rename to extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json index 68cff325a..56cd9c81c 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json +++ b/extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json @@ -1,20 +1,20 @@ { "sources": [ { - "filename": "gemma-2b-it-q4_k_m.gguf", - "url": "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/main/gemma-2b-it-q4_k_m.gguf" + "filename": "gemma-1.1-2b-it-q4_k_m.gguf", + "url": "https://huggingface.co/bartowski/gemma-1.1-2b-it-GGUF/resolve/main/gemma-1.1-2b-it-Q4_K_M.gguf" } ], - "id": "gemma-2b", + "id": "gemma-1.1-2b-it", "object": "model", - "name": "Gemma 2B Q4", + "name": "Gemma 1.1 2B Q4", "version": "1.3", "description": "Gemma is built from the same technology with Google's Gemini.", "format": "gguf", "settings": { "ctx_len": 8192, "prompt_template": "user\n{prompt}\nmodel", - "llama_model_path": "gemma-2b-it-q4_k_m.gguf", + "llama_model_path": "gemma-1.1-2b-it-Q4_K_M.gguf", "ngl": 19 }, "parameters": { @@ -29,7 +29,7 @@ "metadata": { "author": "Google", "tags": ["2B", "Finetuned", "Tiny"], - "size": 1500000000 + "size": 1630000000 }, "engine": "nitro" } diff --git a/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json similarity index 69% rename from extensions/inference-nitro-extension/resources/models/gemma-7b/model.json rename to extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json index 615f1149b..5bd89b478 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json @@ -1,20 +1,20 @@ { "sources": [ { - "filename": "gemma-7b-it-q4_K_M.gguf", - "url": "https://huggingface.co/mmnga/gemma-7b-it-gguf/resolve/main/gemma-7b-it-q4_K_M.gguf" + "filename": "gemma-1.1-7b-it-q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf" } ], - "id": "gemma-7b", + "id": "gemma-1.1-7b-it", "object": "model", - "name": "Gemma 7B Q4", + "name": "Gemma 1.1 7B Q4", "version": "1.2", "description": "Google's Gemma is built for multilingual purpose", "format": "gguf", "settings": { "ctx_len": 8192, "prompt_template": "user\n{prompt}\nmodel", - "llama_model_path": "gemma-7b-it-q4_K_M.gguf", + "llama_model_path": "gemma-1.1-7b-it-q4_K_M.gguf", "ngl": 29 }, "parameters": { diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json new file mode 100644 index 000000000..bdf2d5c9c --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json @@ -0,0 +1,42 @@ +{ + "sources": [ + { + "filename": "gemma-2-27b-it-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/gemma-2-27b-it-GGUF/resolve/main/gemma-2-27b-it-Q4_K_M.gguf" + } + ], + "id": "gemma-2-27b-it", + "object": "model", + "name": "Gemma 2 27B Q4", + "version": "1.0", + "description": "Gemma is built from the same technology with Google's Gemini.", + "format": "gguf", + "settings": { + "ctx_len": 8192, + "prompt_template": "user\n{prompt}\nmodel\n\nmodel\n", + "llama_model_path": "gemma-2-27b-it-Q4_K_M.gguf", + "ngl": 47 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 8192, + "stop": [ + "" + ], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Google", + "tags": [ + "27B", + "Conversational", + "Text-generation", + "Featured" + ], + "size": 16600000000 + }, + "engine": "nitro" +} diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json new file mode 100644 index 000000000..1665f76ee --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json @@ -0,0 +1,43 @@ +{ + "sources": [ + { + "filename": "gemma-2-2b-it-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q4_K_M.gguf" + } + ], + "id": "gemma-2-2b-it", + "object": "model", + "name": "Gemma 2 2B Q4", + "version": "1.0", + "description": "Gemma is built from the same technology with Google's Gemini.", + "format": "gguf", + "settings": { + "ctx_len": 8192, + "prompt_template": "user\n{prompt}\nmodel\n\nmodel\n", + "llama_model_path": "gemma-2-2b-it-Q4_K_M.gguf", + "ngl": 27 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 8192, + "stop": [ + "" + ], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Google", + "tags": [ + "2B", + "Tiny", + "Conversational", + "Text-generation", + "Featured" + ], + "size": 1710000000 + }, + "engine": "nitro" +} diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json new file mode 100644 index 000000000..42e7dcee2 --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json @@ -0,0 +1,42 @@ +{ + "sources": [ + { + "filename": "gemma-2-9b-it-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/gemma-2-9b-it-GGUF/resolve/main/gemma-2-9b-it-Q4_K_M.gguf" + } + ], + "id": "gemma-2-9b-it", + "object": "model", + "name": "Gemma 2 9B Q4", + "version": "1.0", + "description": "Gemma is built from the same technology with Google's Gemini.", + "format": "gguf", + "settings": { + "ctx_len": 8192, + "prompt_template": "user\n{prompt}\nmodel\n\nmodel\n", + "llama_model_path": "gemma-2-9b-it-Q4_K_M.gguf", + "ngl": 43 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 8192, + "stop": [ + "" + ], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Google", + "tags": [ + "9B", + "Conversational", + "Text-generation", + "Featured" + ], + "size": 5760000000 + }, + "engine": "nitro" +} diff --git a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json index 313bf8425..ced7e1ca8 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json @@ -2,7 +2,7 @@ "sources": [ { "filename": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf" + "url": "https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf" } ], "id": "llama3-8b-instruct", @@ -28,7 +28,7 @@ }, "metadata": { "author": "MetaAI", - "tags": ["7B", "Featured"], + "tags": ["8B", "Featured"], "size": 4920000000 }, "engine": "nitro" diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json new file mode 100644 index 000000000..4d8eab7e3 --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json @@ -0,0 +1,42 @@ +{ + "sources": [ + { + "filename": "Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf" + } + ], + "id": "llama3.1-70b-instruct", + "object": "model", + "name": "Llama 3.1 70B Q4 Instruct", + "version": "1.0", + "description": "Meta's Llama 3.1 excels at general usage situations, including chat, general world knowledge, and coding.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", + "llama_model_path": "Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf", + "ngl": 33 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 8192, + "stop": [ + "<|end_of_text|>", + "<|eot_id|>", + "<|eom_id|>" + ], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "MetaAI", + "tags": [ + "70B", + "Featured" + ], + "size": 42500000000 + }, + "engine": "nitro" +} diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json new file mode 100644 index 000000000..fe44b0b1c --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json @@ -0,0 +1,42 @@ +{ + "sources": [ + { + "filename": "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf" + } + ], + "id": "llama3.1-8b-instruct", + "object": "model", + "name": "Llama 3.1 8B Q4 Instruct", + "version": "1.0", + "description": "Meta's Llama 3.1 excels at general usage situations, including chat, general world knowledge, and coding.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", + "llama_model_path": "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + "ngl": 33 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 8192, + "stop": [ + "<|end_of_text|>", + "<|eot_id|>", + "<|eom_id|>" + ], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "MetaAI", + "tags": [ + "8B", + "Featured" + ], + "size": 4920000000 + }, + "engine": "nitro" +} diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts index 71712a4d6..fdd11f961 100644 --- a/extensions/inference-nitro-extension/rollup.config.ts +++ b/extensions/inference-nitro-extension/rollup.config.ts @@ -12,8 +12,8 @@ const codeninja7bJson = require('./resources/models/codeninja-1.0-7b/model.json' const commandr34bJson = require('./resources/models/command-r-34b/model.json') const deepseekCoder13bJson = require('./resources/models/deepseek-coder-1.3b/model.json') const deepseekCoder34bJson = require('./resources/models/deepseek-coder-34b/model.json') -const gemma2bJson = require('./resources/models/gemma-2b/model.json') -const gemma7bJson = require('./resources/models/gemma-7b/model.json') +const gemma112bJson = require('./resources/models/gemma-1.1-2b/model.json') +const gemma117bJson = require('./resources/models/gemma-1.1-7b/model.json') const llama2Chat70bJson = require('./resources/models/llama2-chat-70b/model.json') const llama2Chat7bJson = require('./resources/models/llama2-chat-7b/model.json') const llamacorn1bJson = require('./resources/models/llamacorn-1.1b/model.json') @@ -40,7 +40,11 @@ const aya35bJson = require('./resources/models/aya-23-35b/model.json') const phimediumJson = require('./resources/models/phi3-medium/model.json') const codestralJson = require('./resources/models/codestral-22b/model.json') const qwen2Json = require('./resources/models/qwen2-7b/model.json') - +const llama318bJson = require('./resources/models/llama3.1-8b-instruct/model.json') +const llama3170bJson = require('./resources/models/llama3.1-70b-instruct/model.json') +const gemma22bJson = require('./resources/models/gemma-2-2b/model.json') +const gemma29bJson = require('./resources/models/gemma-2-9b/model.json') +const gemma227bJson = require('./resources/models/gemma-2-27b/model.json') export default [ { @@ -60,8 +64,8 @@ export default [ commandr34bJson, deepseekCoder13bJson, deepseekCoder34bJson, - gemma2bJson, - gemma7bJson, + gemma112bJson, + gemma117bJson, llama2Chat70bJson, llama2Chat7bJson, llamacorn1bJson, @@ -87,7 +91,12 @@ export default [ aya8bJson, aya35bJson, codestralJson, - qwen2Json + qwen2Json, + llama318bJson, + llama3170bJson, + gemma22bJson, + gemma29bJson, + gemma227bJson ]), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),