diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 23c3ec613..21a345d6a 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.12", + "version": "1.0.13", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json b/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json index c89bb16cd..8c3029be0 100644 --- a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json +++ b/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json @@ -8,7 +8,7 @@ "id": "aya-23-35b", "object": "model", "name": "Aya 23 35B Q4", - "version": "1.0", + "version": "1.1", "description": "Aya 23 can talk upto 23 languages fluently.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json b/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json index e04b5f1f7..b82cf2f39 100644 --- a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json +++ b/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json @@ -8,7 +8,7 @@ "id": "aya-23-8b", "object": "model", "name": "Aya 23 8B Q4", - "version": "1.0", + "version": "1.1", "description": "Aya 23 can talk upto 23 languages fluently.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json b/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json index ca548369e..fb2a5f346 100644 --- a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json @@ -8,7 +8,7 @@ "id": "codeninja-1.0-7b", "object": "model", "name": "CodeNinja 7B Q4", - "version": "1.1", + "version": "1.2", "description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json b/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json index 6b12329f4..f90f848dd 100644 --- a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json +++ b/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json @@ -8,7 +8,7 @@ "id": "codestral-22b", "object": "model", "name": "Codestral 22B Q4", - "version": "1.0", + "version": "1.1", "description": "Latest model from MistralAI optimized for code generation tasks.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json index e9abfa267..d29e70a17 100644 --- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json @@ -8,7 +8,7 @@ "id": "command-r-34b", "object": "model", "name": "Command-R v01 34B Q4", - "version": "1.4", + "version": "1.5", "description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json b/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json index 00a11d1a5..53f7f43e9 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json +++ b/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json @@ -8,7 +8,7 @@ "id": "deepseek-coder-1.3b", "object": "model", "name": "Deepseek Coder 1.3B Q8", - "version": "1.1", + "version": "1.2", "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json b/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json index d209963f7..0a3e58b48 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json @@ -8,7 +8,7 @@ "id": "deepseek-coder-34b", "object": "model", "name": "Deepseek Coder 33B Q4", - "version": "1.1", + "version": "1.2", "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json index b4665f4c0..e5ee3c239 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json +++ b/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json @@ -8,7 +8,7 @@ "id": "gemma-2b", "object": "model", "name": "Gemma 2B Q4", - "version": "1.1", + "version": "1.2", "description": "Gemma is built from the same technology with Google's Gemini.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json index 9461a206b..615f1149b 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json @@ -8,7 +8,7 @@ "id": "gemma-7b", "object": "model", "name": "Gemma 7B Q4", - "version": "1.1", + "version": "1.2", "description": "Google's Gemma is built for multilingual purpose", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json b/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json index ef13dc353..0c770b189 100644 --- a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json @@ -8,7 +8,7 @@ "id": "llama2-chat-70b", "object": "model", "name": "Llama 2 Chat 70B Q4", - "version": "1.0", + "version": "1.1", "description": "Llama 2 specifically designed for a comprehensive understanding the world.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json b/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json index 2e3d73b7c..9efd634b5 100644 --- a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json @@ -8,7 +8,7 @@ "id": "llama2-chat-7b", "object": "model", "name": "Llama 2 Chat 7B Q4", - "version": "1.0", + "version": "1.1", "description": "Llama 2 specifically designed for a comprehensive understanding the world.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json index b54c78f3d..313bf8425 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json @@ -8,7 +8,7 @@ "id": "llama3-8b-instruct", "object": "model", "name": "Llama 3 8B Q4", - "version": "1.1", + "version": "1.2", "description": "Meta's Llama 3 excels at general usage situations, including chat, general world knowledge, and coding.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json b/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json index 9d5ab57fb..a3601c8cd 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json @@ -8,7 +8,7 @@ "id": "llama3-hermes-8b", "object": "model", "name": "Hermes Pro Llama 3 8B Q4", - "version": "1.1", + "version": "1.2", "description": "Hermes Pro is well-designed for General chat and JSON output.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json b/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json index 9bcbc3f27..94b62ec82 100644 --- a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json +++ b/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json @@ -8,7 +8,7 @@ "id": "llamacorn-1.1b", "object": "model", "name": "LlamaCorn 1.1B Q8", - "version": "1.0", + "version": "1.1", "description": "LlamaCorn is designed to improve chat functionality from TinyLlama.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json b/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json index 972af8fb4..d223306f8 100644 --- a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json +++ b/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json @@ -8,7 +8,7 @@ "id": "mistral-ins-7b-q4", "object": "model", "name": "Mistral Instruct 7B Q4", - "version": "1.2", + "version": "1.3", "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding of the world.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json b/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json index 991410896..10c17c310 100644 --- a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json @@ -8,7 +8,7 @@ "id": "noromaid-7b", "object": "model", "name": "Noromaid 7B Q4", - "version": "1.1", + "version": "1.2", "description": "The Noromaid 7b model is designed for role-playing with human-like behavior.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json b/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json index 74315c8b9..e743a74c9 100644 --- a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json @@ -8,7 +8,7 @@ "id": "openchat-3.5-7b", "object": "model", "name": "Openchat-3.5 7B Q4", - "version": "1.1", + "version": "1.2", "description": "The performance of Openchat surpasses ChatGPT-3.5 and Grok-1 across various benchmarks.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json index 451747a30..2a572db92 100644 --- a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json +++ b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json @@ -8,7 +8,7 @@ "id": "phi3-3.8b", "object": "model", "name": "Phi-3 Mini", - "version": "1.1", + "version": "1.2", "description": "Phi-3 Mini is Microsoft's newest, compact model designed for mobile use.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json b/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json index 274c96180..ac83ca077 100644 --- a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json +++ b/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json @@ -8,7 +8,7 @@ "id": "phi3-medium", "object": "model", "name": "Phi-3 Medium", - "version": "1.1", + "version": "1.2", "description": "Phi-3 Medium is Microsoft's latest SOTA model.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json b/extensions/inference-nitro-extension/resources/models/phind-34b/model.json index 6a8ed507b..14099a635 100644 --- a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/phind-34b/model.json @@ -8,7 +8,7 @@ "id": "phind-34b", "object": "model", "name": "Phind 34B Q4", - "version": "1.2", + "version": "1.3", "description": "Phind 34B is the best Open-source coding model.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json b/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json index 4b8713230..85081a605 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json @@ -8,7 +8,7 @@ "id": "qwen-7b", "object": "model", "name": "Qwen Chat 7B Q4", - "version": "1.1", + "version": "1.2", "description": "Qwen is optimized at Chinese, ideal for everyday tasks.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json b/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json index 38bcddce0..8939a98f3 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json @@ -8,7 +8,7 @@ "id": "qwen2-7b", "object": "model", "name": "Qwen 2 Instruct 7B Q4", - "version": "1.0", + "version": "1.1", "description": "Qwen is optimized at Chinese, ideal for everyday tasks.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json b/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json index 983566adf..938e03fb7 100644 --- a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json +++ b/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json @@ -8,7 +8,7 @@ "id": "stable-zephyr-3b", "object": "model", "name": "Stable Zephyr 3B Q8", - "version": "1.0", + "version": "1.1", "description": "StableLM Zephyr 3B is a best model for low-end machine.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json b/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json index f8f85c8e9..c17d1c35e 100644 --- a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json @@ -8,7 +8,7 @@ "id": "stealth-v1.2-7b", "object": "model", "name": "Stealth 7B Q4", - "version": "1.1", + "version": "1.2", "description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json b/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json index 815533466..a49e79073 100644 --- a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json +++ b/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json @@ -8,7 +8,7 @@ "id": "tinyllama-1.1b", "object": "model", "name": "TinyLlama Chat 1.1B Q4", - "version": "1.0", + "version": "1.1", "description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json b/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json index 5159cab58..6c9aa2b89 100644 --- a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json @@ -8,7 +8,7 @@ "id": "trinity-v1.2-7b", "object": "model", "name": "Trinity-v1.2 7B Q4", - "version": "1.1", + "version": "1.2", "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json b/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json index e9a255c4a..b84f2c676 100644 --- a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json @@ -8,7 +8,7 @@ "id": "vistral-7b", "object": "model", "name": "Vistral 7B Q4", - "version": "1.1", + "version": "1.2", "description": "Vistral 7B has a deep understanding of Vietnamese.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json b/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json index 82aa4adf2..101eedfd1 100644 --- a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json +++ b/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json @@ -8,7 +8,7 @@ "id": "wizardcoder-13b", "object": "model", "name": "Wizard Coder Python 13B Q4", - "version": "1.1", + "version": "1.2", "description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json b/extensions/inference-nitro-extension/resources/models/yi-34b/model.json index 81700a046..db7df9f2d 100644 --- a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/yi-34b/model.json @@ -8,7 +8,7 @@ "id": "yi-34b", "object": "model", "name": "Yi 34B Q4", - "version": "1.0", + "version": "1.1", "description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.", "format": "gguf", "settings": {