chore: add model.json for Llama3 and other outdated model version (#2773)
* chore: add model.json for Llama3 and other outdated model version * fix: consistency format * fix: correct folder id * update: bump version * add: stop words * fix: model.json * Update extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json * Update extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json Based on suggested change Co-authored-by: Nikolaus Kühn <nikolaus.kuehn@commercetools.com> --------- Co-authored-by: Van-QA <van@jan.ai> Co-authored-by: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Co-authored-by: Louis <louis@jan.ai> Co-authored-by: Nikolaus Kühn <nikolaus.kuehn@commercetools.com>
This commit is contained in:
parent
83e9e2b80a
commit
67db45ff3c
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "@janhq/inference-nitro-extension",
|
"name": "@janhq/inference-nitro-extension",
|
||||||
"productName": "Nitro Inference Engine",
|
"productName": "Nitro Inference Engine",
|
||||||
"version": "1.0.1",
|
"version": "1.0.2",
|
||||||
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"node": "dist/node/index.cjs.js",
|
"node": "dist/node/index.cjs.js",
|
||||||
|
|||||||
@ -8,7 +8,7 @@
|
|||||||
"id": "command-r-34b",
|
"id": "command-r-34b",
|
||||||
"object": "model",
|
"object": "model",
|
||||||
"name": "Command-R v01 34B Q4",
|
"name": "Command-R v01 34B Q4",
|
||||||
"version": "1.1",
|
"version": "1.2",
|
||||||
"description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.",
|
"description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.",
|
||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
|
|||||||
@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"filename": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
|
||||||
|
"url": "https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": "llama3-8b-instruct",
|
||||||
|
"object": "model",
|
||||||
|
"name": "Llama 3 8B Q4",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "Meta's Llama 3 excels at general usage situations, including chat, general world knowledge, and coding.",
|
||||||
|
"format": "gguf",
|
||||||
|
"settings": {
|
||||||
|
"ctx_len": 8192,
|
||||||
|
"prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
|
||||||
|
"llama_model_path": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"stream": true,
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"stop": ["<|end_of_text|>","<|eot_id|>"],
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"author": "MetaAI",
|
||||||
|
"tags": ["7B", "Featured"],
|
||||||
|
"size": 4920000000
|
||||||
|
},
|
||||||
|
"engine": "nitro"
|
||||||
|
}
|
||||||
@ -8,7 +8,7 @@
|
|||||||
"id": "phind-34b",
|
"id": "phind-34b",
|
||||||
"object": "model",
|
"object": "model",
|
||||||
"name": "Phind 34B Q4",
|
"name": "Phind 34B Q4",
|
||||||
"version": "1.0",
|
"version": "1.1",
|
||||||
"description": "Phind 34B is the best Open-source coding model.",
|
"description": "Phind 34B is the best Open-source coding model.",
|
||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
|
|||||||
@ -36,6 +36,7 @@ const trinityv127bJson = require('./resources/models/trinity-v1.2-7b/model.json'
|
|||||||
const vistral7bJson = require('./resources/models/vistral-7b/model.json')
|
const vistral7bJson = require('./resources/models/vistral-7b/model.json')
|
||||||
const wizardcoder13bJson = require('./resources/models/wizardcoder-13b/model.json')
|
const wizardcoder13bJson = require('./resources/models/wizardcoder-13b/model.json')
|
||||||
const yi34bJson = require('./resources/models/yi-34b/model.json')
|
const yi34bJson = require('./resources/models/yi-34b/model.json')
|
||||||
|
const llama3Json = require('./resources/models/llama3-8b-instruct/model.json')
|
||||||
|
|
||||||
export default [
|
export default [
|
||||||
{
|
{
|
||||||
@ -79,6 +80,7 @@ export default [
|
|||||||
vistral7bJson,
|
vistral7bJson,
|
||||||
wizardcoder13bJson,
|
wizardcoder13bJson,
|
||||||
yi34bJson,
|
yi34bJson,
|
||||||
|
llama3Json
|
||||||
]),
|
]),
|
||||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||||
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user