diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index a756d275b..dabda9aec 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-nitro-extension", "productName": "Nitro Inference Engine", - "version": "1.0.1", + "version": "1.0.2", "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json index f564d7b23..a6827b391 100644 --- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json @@ -8,7 +8,7 @@ "id": "command-r-34b", "object": "model", "name": "Command-R v01 34B Q4", - "version": "1.1", + "version": "1.2", "description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json new file mode 100644 index 000000000..4dbb941ef --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json @@ -0,0 +1,34 @@ +{ + "sources": [ + { + "filename": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf" + } + ], + "id": "llama3-8b-instruct", + "object": "model", + "name": "Llama 3 8B Q4", + "version": "1.0", + "description": "Meta's Llama 3 excels at general usage situations, including chat, general world knowledge, and coding.", + "format": "gguf", + "settings": { + "ctx_len": 8192, + "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", + "llama_model_path": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": ["<|end_of_text|>","<|eot_id|>"], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "MetaAI", + "tags": ["7B", "Featured"], + "size": 4920000000 + }, + "engine": "nitro" +} diff --git a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json b/extensions/inference-nitro-extension/resources/models/phind-34b/model.json index aa8228bc9..6b0abe2a1 100644 --- a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/phind-34b/model.json @@ -8,7 +8,7 @@ "id": "phind-34b", "object": "model", "name": "Phind 34B Q4", - "version": "1.0", + "version": "1.1", "description": "Phind 34B is the best Open-source coding model.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts index 7b2758881..497bb6466 100644 --- a/extensions/inference-nitro-extension/rollup.config.ts +++ b/extensions/inference-nitro-extension/rollup.config.ts @@ -36,6 +36,7 @@ const trinityv127bJson = require('./resources/models/trinity-v1.2-7b/model.json' const vistral7bJson = require('./resources/models/vistral-7b/model.json') const wizardcoder13bJson = require('./resources/models/wizardcoder-13b/model.json') const yi34bJson = require('./resources/models/yi-34b/model.json') +const llama3Json = require('./resources/models/llama3-8b-instruct/model.json') export default [ { @@ -79,6 +80,7 @@ export default [ vistral7bJson, wizardcoder13bJson, yi34bJson, + llama3Json ]), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),