diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index e15b09ec0..37a75c01f 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-nitro-extension", "productName": "Nitro Inference Engine", - "version": "1.0.5", + "version": "1.0.6", "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json b/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json index b82a430a1..8497aa11c 100644 --- a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 8192, "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:", - "llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf" + "llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf", + "ngl": 32 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json index 031dc88d4..fdf638d83 100644 --- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 131072, "prompt_template": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", - "llama_model_path": "c4ai-command-r-v01-Q4_K_M.gguf" + "llama_model_path": "c4ai-command-r-v01-Q4_K_M.gguf", + "ngl": 40 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json b/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json index 49814adf2..f8fe7344c 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json +++ b/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 16384, "prompt_template": "### Instruction:\n{prompt}\n### Response:", - "llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf" + "llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf", + "ngl": 24 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json b/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json index 9d3f6a4b2..b488e6bbb 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 16384, "prompt_template": "### Instruction:\n{prompt}\n### Response:", - "llama_model_path": "deepseek-coder-33b-instruct.Q4_K_M.gguf" + "llama_model_path": "deepseek-coder-33b-instruct.Q4_K_M.gguf", + "ngl": 62 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/dolphin-phi-2/model.json b/extensions/inference-nitro-extension/resources/models/dolphin-phi-2/model.json deleted file mode 100644 index b2a837bf0..000000000 --- a/extensions/inference-nitro-extension/resources/models/dolphin-phi-2/model.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "sources": [ - { - "url": "https://huggingface.co/TheBloke/dolphin-2_6-phi-2-GGUF/resolve/main/dolphin-2_6-phi-2.Q8_0.gguf", - "filename": "dolphin-2_6-phi-2.Q8_0.gguf" - } - ], - "id": "dolphin-phi-2", - "object": "model", - "name": "Dolphin Phi-2 2.7B Q8", - "version": "1.0", - "description": "Dolphin Phi-2 is a good alternative for Phi-2 in chatting", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "dolphin-2_6-phi-2.Q8_0.gguf" - }, - "parameters": { - "max_tokens": 4096, - "stop": ["<|im_end|>"] - }, - "metadata": { - "author": "Cognitive Computations, Microsoft", - "tags": [ - "3B", - "Finetuned" - ], - "size": 2960000000 - }, - "engine": "nitro" - } diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json index 9673b62dd..a9acb6ef8 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json +++ b/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 8192, "prompt_template": "user\n{prompt}\nmodel", - "llama_model_path": "gemma-2b-it-q4_k_m.gguf" + "llama_model_path": "gemma-2b-it-q4_k_m.gguf", + "ngl": 18 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json index d7384a8b9..96afe7a61 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 8192, "prompt_template": "user\n{prompt}\nmodel", - "llama_model_path": "gemma-7b-it-q4_K_M.gguf" + "llama_model_path": "gemma-7b-it-q4_K_M.gguf", + "ngl": 28 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json b/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json index 34180604b..4b255c9e2 100644 --- a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 4096, "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]", - "llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf" + "llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf", + "ngl": 80 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json b/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json index 4f6d0b9e3..b7d3eeb80 100644 --- a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 4096, "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]", - "llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf" + "llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf", + "ngl": 32 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json index 4dbb941ef..7bed6e43c 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json @@ -8,19 +8,20 @@ "id": "llama3-8b-instruct", "object": "model", "name": "Llama 3 8B Q4", - "version": "1.0", + "version": "1.1", "description": "Meta's Llama 3 excels at general usage situations, including chat, general world knowledge, and coding.", "format": "gguf", "settings": { "ctx_len": 8192, "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", - "llama_model_path": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf" + "llama_model_path": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf", + "ngl": 32 }, "parameters": { "temperature": 0.7, "top_p": 0.95, "stream": true, - "max_tokens": 4096, + "max_tokens": 8192, "stop": ["<|end_of_text|>","<|eot_id|>"], "frequency_penalty": 0, "presence_penalty": 0 diff --git a/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json b/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json similarity index 54% rename from extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json rename to extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json index e478ff4cd..16d50b9f9 100644 --- a/extensions/inference-nitro-extension/resources/models/hermes-pro-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json @@ -1,35 +1,38 @@ { "sources": [ { - "filename": "Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf", - "url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/resolve/main/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf" + "filename": "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", + "url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf" } ], - "id": "hermes-pro-7b", + "id": "llama3-hermes-8b", "object": "model", - "name": "Hermes Pro 7B Q4", + "name": "Hermes Pro Llama 3 8B Q4", "version": "1.1", - "description": "Hermes Pro is superior in Roleplaying, Reasoning and Explaining problem.", + "description": "Hermes Pro is well-designed for General chat and JSON output.", "format": "gguf", "settings": { - "ctx_len": 4096, + "ctx_len": 8192, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf" + "llama_model_path": "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", + "ngl": 32 }, "parameters": { "temperature": 0.7, "top_p": 0.95, "stream": true, - "max_tokens": 4096, + "max_tokens": 8192, "stop": [], "frequency_penalty": 0, "presence_penalty": 0 }, "metadata": { "author": "NousResearch", - "tags": ["7B", "Finetuned"], - "size": 4370000000 + "tags": [ + "7B", + "Finetuned" + ], + "size": 4920000000 }, "engine": "nitro" } - diff --git a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json b/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json index 056fb9050..b8da24e71 100644 --- a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json +++ b/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 2048, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "llamacorn-1.1b-chat.Q8_0.gguf" + "llama_model_path": "llamacorn-1.1b-chat.Q8_0.gguf", + "ngl": 22 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/miqu-70b/model.json b/extensions/inference-nitro-extension/resources/models/miqu-70b/model.json deleted file mode 100644 index 23e110d0e..000000000 --- a/extensions/inference-nitro-extension/resources/models/miqu-70b/model.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "sources": [ - { - "filename": "miqu-1-70b.q4_k_m.gguf", - "url": "https://huggingface.co/miqudev/miqu-1-70b/resolve/main/miqu-1-70b.q4_k_m.gguf" - } - ], - "id": "miqu-70b", - "object": "model", - "name": "Mistral 70B Q4", - "version": "1.0", - "description": "A leak weight of Mistral 70B model.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "[INST] {prompt} [/INST]", - "llama_model_path": "miqu-1-70b.q4_k_m.gguf" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "miqudev", - "tags": ["70B", "Foundational Model"], - "size": 26440000000 - }, - "engine": "nitro" - } - \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json b/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json index 73a908e17..c372aa329 100644 --- a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json +++ b/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 32768, "prompt_template": "[INST] {prompt} [/INST]", - "llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf" + "llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf", + "ngl": 32 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json index 0935a25a3..4413b415c 100644 --- a/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 32768, "prompt_template": "[INST] {prompt} [/INST]", - "llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf" + "llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf", + "ngl": 100 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json b/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json index ab0884936..aa39b62c2 100644 --- a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 32768, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "Noromaid-7B-0.4-DPO.q4_k_m.gguf" + "llama_model_path": "Noromaid-7B-0.4-DPO.q4_k_m.gguf", + "ngl": 32 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json b/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json index b37a470d3..94967962d 100644 --- a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 8192, "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:", - "llama_model_path": "openchat-3.5-0106.Q4_K_M.gguf" + "llama_model_path": "openchat-3.5-0106.Q4_K_M.gguf", + "ngl": 32 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json b/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json deleted file mode 100644 index dbbc9e0ec..000000000 --- a/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "sources": [ - { - "filename": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf", - "url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf" - } - ], - "id": "openhermes-neural-7b", - "object": "model", - "name": "OpenHermes Neural 7B Q4", - "version": "1.1", - "description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Intel, Jan", - "tags": ["7B", "Merged"], - "size": 4370000000, - "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/openhermes-neural-7b/cover.png" - }, - "engine": "nitro" -} diff --git a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json deleted file mode 100644 index 0d789385b..000000000 --- a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "sources": [ - { - "url": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf", - "filename": "Phi-3-mini-4k-instruct-q4.gguf" - } - ], - "id": "phi3-3.8b", - "object": "model", - "name": "Phi-3 Mini", - "version": "1.0", - "description": "Phi-3 Mini is Microsoft's newest, compact model designed for mobile use.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "<|system|>\n{system_message}<|end|>\n<|user|>\n{prompt}<|end|>\n<|assistant|>\n", - "llama_model_path": "Phi-3-mini-4k-instruct-q4.gguf" - }, - "parameters": { - "max_tokens": 4096, - "stop": ["<|end|>"] - }, - "metadata": { - "author": "Microsoft", - "tags": [ - "3B", - "Finetuned" - ], - "size": 2320000000 - }, - "engine": "nitro" - } diff --git a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json b/extensions/inference-nitro-extension/resources/models/phind-34b/model.json index f79f4e2d0..f96fb4a49 100644 --- a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/phind-34b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 16384, "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant", - "llama_model_path": "phind-codellama-34b-v2.Q4_K_M.gguf" + "llama_model_path": "phind-codellama-34b-v2.Q4_K_M.gguf", + "ngl": 48 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json b/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json index 6c0f642e3..202221bd7 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 32768, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "qwen1_5-7b-chat-q4_k_m.gguf" + "llama_model_path": "qwen1_5-7b-chat-q4_k_m.gguf", + "ngl": 32 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json b/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json index 1e789bf07..81bf4306c 100644 --- a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json +++ b/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 4096, "prompt_template": "<|user|>\n{prompt}<|endoftext|>\n<|assistant|>", - "llama_model_path": "stablelm-zephyr-3b.Q8_0.gguf" + "llama_model_path": "stablelm-zephyr-3b.Q8_0.gguf", + "ngl": 32 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json b/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json index 574b4d893..2848931bb 100644 --- a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 32768, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "stealth-v1.3.Q4_K_M.gguf" + "llama_model_path": "stealth-v1.3.Q4_K_M.gguf", + "ngl": 32 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json b/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json index 6a9187fa5..443ee7dcd 100644 --- a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json +++ b/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 4096, "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>", - "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" + "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", + "ngl": 22 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json b/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json index 9c988e19a..1a98ddb2e 100644 --- a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 32768, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "trinity-v1.2.Q4_K_M.gguf" + "llama_model_path": "trinity-v1.2.Q4_K_M.gguf", + "ngl": 32 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json b/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json index d435fb784..978f8cf54 100644 --- a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 32768, "prompt_template": "[INST] <>\n{system_message}\n<>\n{prompt} [/INST]", - "llama_model_path": "vistral-7b-chat-dpo.Q4_K_M.gguf" + "llama_model_path": "vistral-7b-chat-dpo.Q4_K_M.gguf", + "ngl": 32 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json b/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json index b7e84bf88..5e77faa14 100644 --- a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json +++ b/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 16384, "prompt_template": "### Instruction:\n{prompt}\n### Response:", - "llama_model_path": "wizardcoder-python-13b-v1.0.Q4_K_M.gguf" + "llama_model_path": "wizardcoder-python-13b-v1.0.Q4_K_M.gguf", + "ngl": 40 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json b/extensions/inference-nitro-extension/resources/models/yi-34b/model.json index 4bc9b0ba1..637eec453 100644 --- a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/yi-34b/model.json @@ -14,7 +14,8 @@ "settings": { "ctx_len": 4096, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "yi-34b-chat.Q4_K_M.gguf" + "llama_model_path": "yi-34b-chat.Q4_K_M.gguf", + "ngl": 60 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts index 497bb6466..f1d3eb32f 100644 --- a/extensions/inference-nitro-extension/rollup.config.ts +++ b/extensions/inference-nitro-extension/rollup.config.ts @@ -12,21 +12,17 @@ const codeninja7bJson = require('./resources/models/codeninja-1.0-7b/model.json' const commandr34bJson = require('./resources/models/command-r-34b/model.json') const deepseekCoder13bJson = require('./resources/models/deepseek-coder-1.3b/model.json') const deepseekCoder34bJson = require('./resources/models/deepseek-coder-34b/model.json') -const dolphinPhi2Json = require('./resources/models/dolphin-phi-2/model.json') const gemma2bJson = require('./resources/models/gemma-2b/model.json') const gemma7bJson = require('./resources/models/gemma-7b/model.json') -const hermesPro7bJson = require('./resources/models/hermes-pro-7b/model.json') const llama2Chat70bJson = require('./resources/models/llama2-chat-70b/model.json') const llama2Chat7bJson = require('./resources/models/llama2-chat-7b/model.json') const llamacorn1bJson = require('./resources/models/llamacorn-1.1b/model.json') const llava13bJson = require('./resources/models/llava-13b/model.json') const llava7bJson = require('./resources/models/llava-7b/model.json') -const miqu70bJson = require('./resources/models/miqu-70b/model.json') const mistralIns7bq4Json = require('./resources/models/mistral-ins-7b-q4/model.json') const mixtral8x7bInstructJson = require('./resources/models/mixtral-8x7b-instruct/model.json') const noromaid7bJson = require('./resources/models/noromaid-7b/model.json') const openchat357bJson = require('./resources/models/openchat-3.5-7b/model.json') -const openhermesNeural7bJson = require('./resources/models/openhermes-neural-7b/model.json') const phind34bJson = require('./resources/models/phind-34b/model.json') const qwen7bJson = require('./resources/models/qwen-7b/model.json') const stableZephyr3bJson = require('./resources/models/stable-zephyr-3b/model.json') @@ -37,6 +33,7 @@ const vistral7bJson = require('./resources/models/vistral-7b/model.json') const wizardcoder13bJson = require('./resources/models/wizardcoder-13b/model.json') const yi34bJson = require('./resources/models/yi-34b/model.json') const llama3Json = require('./resources/models/llama3-8b-instruct/model.json') +const llama3Hermes8bJson = require('./resources/models/llama3-hermes-8b/model.json') export default [ { @@ -56,21 +53,17 @@ export default [ commandr34bJson, deepseekCoder13bJson, deepseekCoder34bJson, - dolphinPhi2Json, gemma2bJson, gemma7bJson, - hermesPro7bJson, llama2Chat70bJson, llama2Chat7bJson, llamacorn1bJson, llava13bJson, llava7bJson, - miqu70bJson, mistralIns7bq4Json, mixtral8x7bInstructJson, noromaid7bJson, openchat357bJson, - openhermesNeural7bJson, phind34bJson, qwen7bJson, stableZephyr3bJson, @@ -80,7 +73,8 @@ export default [ vistral7bJson, wizardcoder13bJson, yi34bJson, - llama3Json + llama3Json, + llama3Hermes8bJson ]), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),