diff --git a/extensions/inference-groq-extension/resources/models.json b/extensions/inference-groq-extension/resources/models.json index 81275f47c..6fce1c71b 100644 --- a/extensions/inference-groq-extension/resources/models.json +++ b/extensions/inference-groq-extension/resources/models.json @@ -69,9 +69,9 @@ ], "id": "gemma-7b-it", "object": "model", - "name": "Groq Gemma 7b Instruct", - "version": "1.1", - "description": "Groq Gemma 7b Instruct with supercharged speed!", + "name": "Groq Gemma 7B Instruct", + "version": "1.2", + "description": "Groq Gemma 7B Instruct with supercharged speed!", "format": "api", "settings": {}, "parameters": { @@ -99,9 +99,9 @@ ], "id": "mixtral-8x7b-32768", "object": "model", - "name": "Groq Mixtral 8x7b Instruct", - "version": "1.1", - "description": "Groq Mixtral 8x7b Instruct is Mixtral with supercharged speed!", + "name": "Groq Mixtral 8x7B Instruct", + "version": "1.2", + "description": "Groq Mixtral 8x7B Instruct is Mixtral with supercharged speed!", "format": "api", "settings": {}, "parameters": { diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 1e3ea6d38..425e4b49c 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.15", + "version": "1.0.16", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json b/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json index b82cf2f39..163373014 100644 --- a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json +++ b/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json @@ -8,7 +8,7 @@ "id": "aya-23-8b", "object": "model", "name": "Aya 23 8B Q4", - "version": "1.1", + "version": "1.2", "description": "Aya 23 can talk upto 23 languages fluently.", "format": "gguf", "settings": { @@ -28,7 +28,7 @@ }, "metadata": { "author": "CohereForAI", - "tags": ["7B", "Finetuned","Featured"], + "tags": ["7B", "Finetuned"], "size": 5056982144 }, "engine": "nitro" diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json b/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json index 53f7f43e9..36fceaad2 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json +++ b/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json @@ -7,8 +7,8 @@ ], "id": "deepseek-coder-1.3b", "object": "model", - "name": "Deepseek Coder 1.3B Q8", - "version": "1.2", + "name": "Deepseek Coder 1.3B Instruct Q8", + "version": "1.3", "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json b/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json index 0a3e58b48..103c4cbcb 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json @@ -7,8 +7,8 @@ ], "id": "deepseek-coder-34b", "object": "model", - "name": "Deepseek Coder 33B Q4", - "version": "1.2", + "name": "Deepseek Coder 33B Instruct Q4", + "version": "1.3", "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json index a532c1dc3..b29043483 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json @@ -8,7 +8,7 @@ "id": "gemma-1.1-7b-it", "object": "model", "name": "Gemma 1.1 7B Q4", - "version": "1.2", + "version": "1.3", "description": "Google's Gemma is built for multilingual purpose", "format": "gguf", "settings": { @@ -28,7 +28,7 @@ }, "metadata": { "author": "Google", - "tags": ["7B", "Finetuned", "Featured"], + "tags": ["7B", "Finetuned"], "size": 5330000000 }, "engine": "nitro" diff --git a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json index ced7e1ca8..4d84b9967 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json @@ -7,8 +7,8 @@ ], "id": "llama3-8b-instruct", "object": "model", - "name": "Llama 3 8B Q4", - "version": "1.2", + "name": "Llama 3 8B Instruct Q4", + "version": "1.4", "description": "Meta's Llama 3 excels at general usage situations, including chat, general world knowledge, and coding.", "format": "gguf", "settings": { @@ -28,7 +28,7 @@ }, "metadata": { "author": "MetaAI", - "tags": ["8B", "Featured"], + "tags": ["8B"], "size": 4920000000 }, "engine": "nitro" diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json index 4d8eab7e3..780ef8465 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json @@ -7,8 +7,8 @@ ], "id": "llama3.1-70b-instruct", "object": "model", - "name": "Llama 3.1 70B Q4 Instruct", - "version": "1.0", + "name": "Llama 3.1 70B Instruct Q4", + "version": "1.1", "description": "Meta's Llama 3.1 excels at general usage situations, including chat, general world knowledge, and coding.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json index fe44b0b1c..9de8555be 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json @@ -7,8 +7,8 @@ ], "id": "llama3.1-8b-instruct", "object": "model", - "name": "Llama 3.1 8B Q4 Instruct", - "version": "1.0", + "name": "Llama 3.1 8B Instruct Q4", + "version": "1.1", "description": "Meta's Llama 3.1 excels at general usage situations, including chat, general world knowledge, and coding.", "format": "gguf", "settings": { diff --git a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json b/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json index d223306f8..88f701466 100644 --- a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json +++ b/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json @@ -7,9 +7,9 @@ ], "id": "mistral-ins-7b-q4", "object": "model", - "name": "Mistral Instruct 7B Q4", - "version": "1.3", - "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding of the world.", + "name": "Mistral 7B Instruct Q4", + "version": "1.5", + "description": "Mistral 7B Instruct model, specifically designed for a comprehensive understanding of the world.", "format": "gguf", "settings": { "ctx_len": 32768, @@ -28,7 +28,7 @@ }, "metadata": { "author": "MistralAI", - "tags": ["Featured", "7B", "Foundational Model"], + "tags": ["7B", "Foundational Model"], "size": 4370000000, "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png" }, diff --git a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json index 2a572db92..6459b049d 100644 --- a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json +++ b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json @@ -1,20 +1,20 @@ { "sources": [ { - "url": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf", - "filename": "Phi-3-mini-4k-instruct-q4.gguf" + "url": "https://huggingface.co/cortexso/phi3/resolve/main/model.gguf", + "filename": "model.gguf" } ], "id": "phi3-3.8b", "object": "model", - "name": "Phi-3 Mini", - "version": "1.2", + "name": "Phi-3 Mini Instruct Q4", + "version": "1.3", "description": "Phi-3 Mini is Microsoft's newest, compact model designed for mobile use.", "format": "gguf", "settings": { "ctx_len": 4096, "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n", - "llama_model_path": "Phi-3-mini-4k-instruct-q4.gguf", + "llama_model_path": "model.gguf", "ngl": 33 }, "parameters": { @@ -35,4 +35,4 @@ "size": 2320000000 }, "engine": "nitro" - } \ No newline at end of file +} \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json b/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json index ac83ca077..50944b9fe 100644 --- a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json +++ b/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json @@ -7,13 +7,13 @@ ], "id": "phi3-medium", "object": "model", - "name": "Phi-3 Medium", - "version": "1.2", + "name": "Phi-3 Medium Instruct Q4", + "version": "1.3", "description": "Phi-3 Medium is Microsoft's latest SOTA model.", "format": "gguf", "settings": { "ctx_len": 128000, - "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n", + "prompt_template": "<|user|> {prompt}<|end|><|assistant|><|end|>", "llama_model_path": "Phi-3-medium-128k-instruct-Q4_K_M.gguf", "ngl": 33 }, diff --git a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json b/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json index 8939a98f3..a7613982c 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json @@ -7,8 +7,8 @@ ], "id": "qwen2-7b", "object": "model", - "name": "Qwen 2 Instruct 7B Q4", - "version": "1.1", + "name": "Qwen 2 7B Instruct Q4", + "version": "1.2", "description": "Qwen is optimized at Chinese, ideal for everyday tasks.", "format": "gguf", "settings": {