diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 37a75c01f..7d31a1d38 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-nitro-extension", "productName": "Nitro Inference Engine", - "version": "1.0.6", + "version": "1.0.7", "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json new file mode 100644 index 000000000..6777cb6b6 --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json @@ -0,0 +1,32 @@ +{ + "sources": [ + { + "url": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf", + "filename": "Phi-3-mini-4k-instruct-q4.gguf" + } + ], + "id": "phi3-3.8b", + "object": "model", + "name": "Phi-3 Mini", + "version": "1.0", + "description": "Phi-3 Mini is Microsoft's newest, compact model designed for mobile use.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n", + "llama_model_path": "Phi-3-mini-4k-instruct-q4.gguf" + }, + "parameters": { + "max_tokens": 4096, + "stop": ["<|end|>"] + }, + "metadata": { + "author": "Microsoft", + "tags": [ + "3B", + "Finetuned" + ], + "size": 2320000000 + }, + "engine": "nitro" + } \ No newline at end of file