diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index f17d81853..1903eafef 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.9", + "version": "1.0.10", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json b/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json new file mode 100644 index 000000000..c70c02080 --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "aya-23-35B-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/aya-23-35B-GGUF/resolve/main/aya-23-35B-Q4_K_M.gguf" + } + ], + "id": "aya-23-35b", + "object": "model", + "name": "Aya 23 35B Q4", + "version": "1.0", + "description": "Aya 23 can talk upto 23 languages fluently.", + "format": "gguf", + "settings": { + "ctx_len": 8192, + "prompt_template": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", + "llama_model_path": "aya-23-35B-Q4_K_M.gguf", + "ngl": 40 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 8192, + "frequency_penalty": 0, + "presence_penalty": 0, + "stop": ["<|END_OF_TURN_TOKEN|>"] + }, + "metadata": { + "author": "CohereForAI", + "tags": ["34B", "Finetuned"], + "size": 21556982144 + }, + "engine": "nitro" +} diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json b/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json new file mode 100644 index 000000000..ccb9a6f7f --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json @@ -0,0 +1,35 @@ +{ + "sources": [ + { + "filename": "aya-23-8B-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/aya-23-8B-GGUF/resolve/main/aya-23-8B-Q4_K_M.gguf" + } + ], + "id": "aya-23-8b", + "object": "model", + "name": "Aya 23 8B Q4", + "version": "1.0", + "description": "Aya 23 can talk upto 23 languages fluently.", + "format": "gguf", + "settings": { + "ctx_len": 8192, + "prompt_template": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", + "llama_model_path": "aya-23-8B-Q4_K_M.gguf", + "ngl": 32 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 8192, + "frequency_penalty": 0, + "presence_penalty": 0, + "stop": ["<|END_OF_TURN_TOKEN|>"] + }, + "metadata": { + "author": "CohereForAI", + "tags": ["7B", "Finetuned","Featured"], + "size": 5056982144 + }, + "engine": "nitro" +} diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts index 7c3cc90d7..c28d5b64e 100644 --- a/extensions/inference-nitro-extension/rollup.config.ts +++ b/extensions/inference-nitro-extension/rollup.config.ts @@ -35,6 +35,9 @@ const wizardcoder13bJson = require('./resources/models/wizardcoder-13b/model.jso const yi34bJson = require('./resources/models/yi-34b/model.json') const llama3Json = require('./resources/models/llama3-8b-instruct/model.json') const llama3Hermes8bJson = require('./resources/models/llama3-hermes-8b/model.json') +const aya8bJson = require('./resources/models/aya-23-8b/model.json') +const aya35bJson = require('./resources/models/aya-23-35b/model.json') +const phimediumJson = require('./resources/models/phi3-medium/model.json') export default [ { @@ -76,7 +79,10 @@ export default [ wizardcoder13bJson, yi34bJson, llama3Json, - llama3Hermes8bJson + llama3Hermes8bJson, + phimediumJson, + aya8bJson, + aya35bJson ]), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),