From 3c294d6a48f8da86511189f845e614ec28fd6363 Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Wed, 24 Apr 2024 14:17:42 +0700 Subject: [PATCH] Chore: Add phi-3 (#2794) * add: phi-3 * chore: bump version * fix: correct model id --- .../inference-nitro-extension/package.json | 2 +- .../resources/models/phi3-3.8b/model.json | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index dabda9aec..9a98eed8c 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-nitro-extension", "productName": "Nitro Inference Engine", - "version": "1.0.2", + "version": "1.0.3", "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json new file mode 100644 index 000000000..0d789385b --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json @@ -0,0 +1,32 @@ +{ + "sources": [ + { + "url": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf", + "filename": "Phi-3-mini-4k-instruct-q4.gguf" + } + ], + "id": "phi3-3.8b", + "object": "model", + "name": "Phi-3 Mini", + "version": "1.0", + "description": "Phi-3 Mini is Microsoft's newest, compact model designed for mobile use.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "<|system|>\n{system_message}<|end|>\n<|user|>\n{prompt}<|end|>\n<|assistant|>\n", + "llama_model_path": "Phi-3-mini-4k-instruct-q4.gguf" + }, + "parameters": { + "max_tokens": 4096, + "stop": ["<|end|>"] + }, + "metadata": { + "author": "Microsoft", + "tags": [ + "3B", + "Finetuned" + ], + "size": 2320000000 + }, + "engine": "nitro" + }