diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index d396778d9..13db97f64 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.7", + "version": "1.0.8", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json index 6777cb6b6..f384fd953 100644 --- a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json +++ b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json @@ -8,17 +8,23 @@ "id": "phi3-3.8b", "object": "model", "name": "Phi-3 Mini", - "version": "1.0", + "version": "1.1", "description": "Phi-3 Mini is Microsoft's newest, compact model designed for mobile use.", "format": "gguf", "settings": { "ctx_len": 4096, "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n", - "llama_model_path": "Phi-3-mini-4k-instruct-q4.gguf" + "llama_model_path": "Phi-3-mini-4k-instruct-q4.gguf", + "ngl": 32 }, "parameters": { "max_tokens": 4096, - "stop": ["<|end|>"] + "stop": ["<|end|>"], + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "frequency_penalty": 0, + "presence_penalty": 0 }, "metadata": { "author": "Microsoft", diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts index b0707f404..7c3cc90d7 100644 --- a/extensions/inference-nitro-extension/rollup.config.ts +++ b/extensions/inference-nitro-extension/rollup.config.ts @@ -23,6 +23,7 @@ const mistralIns7bq4Json = require('./resources/models/mistral-ins-7b-q4/model.j const mixtral8x7bInstructJson = require('./resources/models/mixtral-8x7b-instruct/model.json') const noromaid7bJson = require('./resources/models/noromaid-7b/model.json') const openchat357bJson = require('./resources/models/openchat-3.5-7b/model.json') +const phi3bJson = require('./resources/models/phi3-3.8b/model.json') const phind34bJson = require('./resources/models/phind-34b/model.json') const qwen7bJson = require('./resources/models/qwen-7b/model.json') const stableZephyr3bJson = require('./resources/models/stable-zephyr-3b/model.json') @@ -64,6 +65,7 @@ export default [ mixtral8x7bInstructJson, noromaid7bJson, openchat357bJson, + phi3bJson, phind34bJson, qwen7bJson, stableZephyr3bJson,