chore: add llama 3.1 8B gguf model (#3468)
* chore: add llama 3.1 8B gguf model * chore: add llama3.1 70B model * chore: add models to rollup * chore: fix tag * chore: fix size * fix: 8b model * Chore/add gemma2 model (#3471) * feat: add gemma 2 * feat: add gemma 2 * feat: correct ngl --------- Co-authored-by: Van QA <van@jan.ai> * feat: add featured tag --------- Co-authored-by: Van Pham <64197333+Van-QA@users.noreply.github.com> Co-authored-by: Van QA <van@jan.ai>
This commit is contained in:
parent
17653460b5
commit
1c5b6355d9
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@janhq/inference-cortex-extension",
|
||||
"productName": "Cortex Inference Engine",
|
||||
"version": "1.0.14",
|
||||
"version": "1.0.15",
|
||||
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
|
||||
@ -1,20 +1,20 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "gemma-2b-it-q4_k_m.gguf",
|
||||
"url": "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/main/gemma-2b-it-q4_k_m.gguf"
|
||||
"filename": "gemma-1.1-2b-it-q4_k_m.gguf",
|
||||
"url": "https://huggingface.co/bartowski/gemma-1.1-2b-it-GGUF/resolve/main/gemma-1.1-2b-it-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "gemma-2b",
|
||||
"id": "gemma-1.1-2b-it",
|
||||
"object": "model",
|
||||
"name": "Gemma 2B Q4",
|
||||
"name": "Gemma 1.1 2B Q4",
|
||||
"version": "1.3",
|
||||
"description": "Gemma is built from the same technology with Google's Gemini.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
|
||||
"llama_model_path": "gemma-2b-it-q4_k_m.gguf",
|
||||
"llama_model_path": "gemma-1.1-2b-it-Q4_K_M.gguf",
|
||||
"ngl": 19
|
||||
},
|
||||
"parameters": {
|
||||
@ -29,7 +29,7 @@
|
||||
"metadata": {
|
||||
"author": "Google",
|
||||
"tags": ["2B", "Finetuned", "Tiny"],
|
||||
"size": 1500000000
|
||||
"size": 1630000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
@ -1,20 +1,20 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "gemma-7b-it-q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/mmnga/gemma-7b-it-gguf/resolve/main/gemma-7b-it-q4_K_M.gguf"
|
||||
"filename": "gemma-1.1-7b-it-q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "gemma-7b",
|
||||
"id": "gemma-1.1-7b-it",
|
||||
"object": "model",
|
||||
"name": "Gemma 7B Q4",
|
||||
"name": "Gemma 1.1 7B Q4",
|
||||
"version": "1.2",
|
||||
"description": "Google's Gemma is built for multilingual purpose",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
|
||||
"llama_model_path": "gemma-7b-it-q4_K_M.gguf",
|
||||
"llama_model_path": "gemma-1.1-7b-it-q4_K_M.gguf",
|
||||
"ngl": 29
|
||||
},
|
||||
"parameters": {
|
||||
@ -0,0 +1,42 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "gemma-2-27b-it-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/bartowski/gemma-2-27b-it-GGUF/resolve/main/gemma-2-27b-it-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "gemma-2-27b-it",
|
||||
"object": "model",
|
||||
"name": "Gemma 2 27B Q4",
|
||||
"version": "1.0",
|
||||
"description": "Gemma is built from the same technology with Google's Gemini.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "<bos><start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n<end_of_turn>\n<start_of_turn>model\n",
|
||||
"llama_model_path": "gemma-2-27b-it-Q4_K_M.gguf",
|
||||
"ngl": 47
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 8192,
|
||||
"stop": [
|
||||
"<end_of_turn>"
|
||||
],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Google",
|
||||
"tags": [
|
||||
"27B",
|
||||
"Conversational",
|
||||
"Text-generation",
|
||||
"Featured"
|
||||
],
|
||||
"size": 16600000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
@ -0,0 +1,43 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "gemma-2-2b-it-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "gemma-2-2b-it",
|
||||
"object": "model",
|
||||
"name": "Gemma 2 2B Q4",
|
||||
"version": "1.0",
|
||||
"description": "Gemma is built from the same technology with Google's Gemini.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "<bos><start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n<end_of_turn>\n<start_of_turn>model\n",
|
||||
"llama_model_path": "gemma-2-2b-it-Q4_K_M.gguf",
|
||||
"ngl": 27
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 8192,
|
||||
"stop": [
|
||||
"<end_of_turn>"
|
||||
],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Google",
|
||||
"tags": [
|
||||
"2B",
|
||||
"Tiny",
|
||||
"Conversational",
|
||||
"Text-generation",
|
||||
"Featured"
|
||||
],
|
||||
"size": 1710000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
@ -0,0 +1,42 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "gemma-2-9b-it-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/bartowski/gemma-2-9b-it-GGUF/resolve/main/gemma-2-9b-it-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "gemma-2-9b-it",
|
||||
"object": "model",
|
||||
"name": "Gemma 2 9B Q4",
|
||||
"version": "1.0",
|
||||
"description": "Gemma is built from the same technology with Google's Gemini.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "<bos><start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n<end_of_turn>\n<start_of_turn>model\n",
|
||||
"llama_model_path": "gemma-2-9b-it-Q4_K_M.gguf",
|
||||
"ngl": 43
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 8192,
|
||||
"stop": [
|
||||
"<end_of_turn>"
|
||||
],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Google",
|
||||
"tags": [
|
||||
"9B",
|
||||
"Conversational",
|
||||
"Text-generation",
|
||||
"Featured"
|
||||
],
|
||||
"size": 5760000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
@ -2,7 +2,7 @@
|
||||
"sources": [
|
||||
{
|
||||
"filename": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
|
||||
"url": "https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "llama3-8b-instruct",
|
||||
@ -28,7 +28,7 @@
|
||||
},
|
||||
"metadata": {
|
||||
"author": "MetaAI",
|
||||
"tags": ["7B", "Featured"],
|
||||
"tags": ["8B", "Featured"],
|
||||
"size": 4920000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
|
||||
@ -0,0 +1,42 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/bartowski/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "llama3.1-70b-instruct",
|
||||
"object": "model",
|
||||
"name": "Llama 3.1 70B Q4 Instruct",
|
||||
"version": "1.0",
|
||||
"description": "Meta's Llama 3.1 excels at general usage situations, including chat, general world knowledge, and coding.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 131072,
|
||||
"prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
|
||||
"llama_model_path": "Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf",
|
||||
"ngl": 33
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 8192,
|
||||
"stop": [
|
||||
"<|end_of_text|>",
|
||||
"<|eot_id|>",
|
||||
"<|eom_id|>"
|
||||
],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "MetaAI",
|
||||
"tags": [
|
||||
"70B",
|
||||
"Featured"
|
||||
],
|
||||
"size": 42500000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
@ -0,0 +1,42 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "llama3.1-8b-instruct",
|
||||
"object": "model",
|
||||
"name": "Llama 3.1 8B Q4 Instruct",
|
||||
"version": "1.0",
|
||||
"description": "Meta's Llama 3.1 excels at general usage situations, including chat, general world knowledge, and coding.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 131072,
|
||||
"prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
|
||||
"llama_model_path": "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf",
|
||||
"ngl": 33
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 8192,
|
||||
"stop": [
|
||||
"<|end_of_text|>",
|
||||
"<|eot_id|>",
|
||||
"<|eom_id|>"
|
||||
],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "MetaAI",
|
||||
"tags": [
|
||||
"8B",
|
||||
"Featured"
|
||||
],
|
||||
"size": 4920000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
@ -12,8 +12,8 @@ const codeninja7bJson = require('./resources/models/codeninja-1.0-7b/model.json'
|
||||
const commandr34bJson = require('./resources/models/command-r-34b/model.json')
|
||||
const deepseekCoder13bJson = require('./resources/models/deepseek-coder-1.3b/model.json')
|
||||
const deepseekCoder34bJson = require('./resources/models/deepseek-coder-34b/model.json')
|
||||
const gemma2bJson = require('./resources/models/gemma-2b/model.json')
|
||||
const gemma7bJson = require('./resources/models/gemma-7b/model.json')
|
||||
const gemma112bJson = require('./resources/models/gemma-1.1-2b/model.json')
|
||||
const gemma117bJson = require('./resources/models/gemma-1.1-7b/model.json')
|
||||
const llama2Chat70bJson = require('./resources/models/llama2-chat-70b/model.json')
|
||||
const llama2Chat7bJson = require('./resources/models/llama2-chat-7b/model.json')
|
||||
const llamacorn1bJson = require('./resources/models/llamacorn-1.1b/model.json')
|
||||
@ -40,7 +40,11 @@ const aya35bJson = require('./resources/models/aya-23-35b/model.json')
|
||||
const phimediumJson = require('./resources/models/phi3-medium/model.json')
|
||||
const codestralJson = require('./resources/models/codestral-22b/model.json')
|
||||
const qwen2Json = require('./resources/models/qwen2-7b/model.json')
|
||||
|
||||
const llama318bJson = require('./resources/models/llama3.1-8b-instruct/model.json')
|
||||
const llama3170bJson = require('./resources/models/llama3.1-70b-instruct/model.json')
|
||||
const gemma22bJson = require('./resources/models/gemma-2-2b/model.json')
|
||||
const gemma29bJson = require('./resources/models/gemma-2-9b/model.json')
|
||||
const gemma227bJson = require('./resources/models/gemma-2-27b/model.json')
|
||||
|
||||
export default [
|
||||
{
|
||||
@ -60,8 +64,8 @@ export default [
|
||||
commandr34bJson,
|
||||
deepseekCoder13bJson,
|
||||
deepseekCoder34bJson,
|
||||
gemma2bJson,
|
||||
gemma7bJson,
|
||||
gemma112bJson,
|
||||
gemma117bJson,
|
||||
llama2Chat70bJson,
|
||||
llama2Chat7bJson,
|
||||
llamacorn1bJson,
|
||||
@ -87,7 +91,12 @@ export default [
|
||||
aya8bJson,
|
||||
aya35bJson,
|
||||
codestralJson,
|
||||
qwen2Json
|
||||
qwen2Json,
|
||||
llama318bJson,
|
||||
llama3170bJson,
|
||||
gemma22bJson,
|
||||
gemma29bJson,
|
||||
gemma227bJson
|
||||
]),
|
||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user