diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 13db97f64..f17d81853 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.8", + "version": "1.0.9", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json b/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json new file mode 100644 index 000000000..63dda8f0a --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json @@ -0,0 +1,38 @@ +{ + "sources": [ + { + "url": "https://huggingface.co/bartowski/Phi-3-medium-128k-instruct-GGUF/resolve/main/Phi-3-medium-128k-instruct-Q4_K_M.gguf", + "filename": "Phi-3-medium-128k-instruct-Q4_K_M.gguf" + } + ], + "id": "phi3-medium", + "object": "model", + "name": "Phi-3 Medium", + "version": "1.0", + "description": "Phi-3 Medium is Microsoft's latest SOTA model.", + "format": "gguf", + "settings": { + "ctx_len": 128000, + "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n", + "llama_model_path": "Phi-3-medium-128k-instruct-Q4_K_M.gguf", + "ngl": 32 + }, + "parameters": { + "max_tokens": 128000, + "stop": ["<|end|>"], + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Microsoft", + "tags": [ + "7B", + "Finetuned" + ], + "size": 8366000000 + }, + "engine": "nitro" + } \ No newline at end of file