From 71a707aa77d5e5509e31d849e34d2910577b0857 Mon Sep 17 00:00:00 2001 From: Hoang Ha <64120343+hahuyhoang411@users.noreply.github.com> Date: Fri, 21 Jun 2024 14:34:38 +0700 Subject: [PATCH] adjust correct ngl number (#3081) --- .../resources/models/aya-23-35b/model.json | 2 +- .../resources/models/aya-23-8b/model.json | 2 +- .../resources/models/codeninja-1.0-7b/model.json | 2 +- .../resources/models/codestral-22b/model.json | 2 +- .../resources/models/command-r-34b/model.json | 2 +- .../resources/models/deepseek-coder-1.3b/model.json | 2 +- .../resources/models/deepseek-coder-34b/model.json | 2 +- .../resources/models/gemma-2b/model.json | 2 +- .../resources/models/gemma-7b/model.json | 2 +- .../resources/models/llama2-chat-70b/model.json | 2 +- .../resources/models/llama2-chat-7b/model.json | 2 +- .../resources/models/llama3-8b-instruct/model.json | 2 +- .../resources/models/llama3-hermes-8b/model.json | 2 +- .../resources/models/llamacorn-1.1b/model.json | 2 +- .../resources/models/mistral-ins-7b-q4/model.json | 2 +- .../resources/models/noromaid-7b/model.json | 2 +- .../resources/models/openchat-3.5-7b/model.json | 2 +- .../resources/models/phi3-3.8b/model.json | 2 +- .../resources/models/phi3-medium/model.json | 2 +- .../resources/models/phind-34b/model.json | 2 +- .../resources/models/qwen-7b/model.json | 2 +- .../resources/models/qwen2-7b/model.json | 2 +- .../resources/models/stable-zephyr-3b/model.json | 2 +- .../resources/models/stealth-v1.2-7b/model.json | 2 +- .../resources/models/tinyllama-1.1b/model.json | 2 +- .../resources/models/trinity-v1.2-7b/model.json | 2 +- .../resources/models/vistral-7b/model.json | 2 +- .../resources/models/wizardcoder-13b/model.json | 2 +- .../resources/models/yi-34b/model.json | 2 +- 29 files changed, 29 insertions(+), 29 deletions(-) diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json b/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json index c70c02080..c89bb16cd 100644 --- a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json +++ b/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json @@ -15,7 +15,7 @@ "ctx_len": 8192, "prompt_template": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", "llama_model_path": "aya-23-35B-Q4_K_M.gguf", - "ngl": 40 + "ngl": 41 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json b/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json index ccb9a6f7f..e04b5f1f7 100644 --- a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json +++ b/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json @@ -15,7 +15,7 @@ "ctx_len": 8192, "prompt_template": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", "llama_model_path": "aya-23-8B-Q4_K_M.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json b/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json index 8497aa11c..ca548369e 100644 --- a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json @@ -15,7 +15,7 @@ "ctx_len": 8192, "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:", "llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json b/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json index 8e026e340..6b12329f4 100644 --- a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json +++ b/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json @@ -15,7 +15,7 @@ "ctx_len": 32000, "prompt_template": "{system_message} [INST] {prompt} [/INST]", "llama_model_path": "Codestral-22B-v0.1-Q4_K_M.gguf", - "ngl": 56 + "ngl": 57 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json index fdf638d83..e9abfa267 100644 --- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json @@ -15,7 +15,7 @@ "ctx_len": 131072, "prompt_template": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", "llama_model_path": "c4ai-command-r-v01-Q4_K_M.gguf", - "ngl": 40 + "ngl": 41 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json b/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json index f8fe7344c..00a11d1a5 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json +++ b/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json @@ -15,7 +15,7 @@ "ctx_len": 16384, "prompt_template": "### Instruction:\n{prompt}\n### Response:", "llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf", - "ngl": 24 + "ngl": 25 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json b/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json index b488e6bbb..d209963f7 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json @@ -15,7 +15,7 @@ "ctx_len": 16384, "prompt_template": "### Instruction:\n{prompt}\n### Response:", "llama_model_path": "deepseek-coder-33b-instruct.Q4_K_M.gguf", - "ngl": 62 + "ngl": 63 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json index a9acb6ef8..b4665f4c0 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json +++ b/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json @@ -15,7 +15,7 @@ "ctx_len": 8192, "prompt_template": "user\n{prompt}\nmodel", "llama_model_path": "gemma-2b-it-q4_k_m.gguf", - "ngl": 18 + "ngl": 19 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json b/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json index 96afe7a61..9461a206b 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json @@ -15,7 +15,7 @@ "ctx_len": 8192, "prompt_template": "user\n{prompt}\nmodel", "llama_model_path": "gemma-7b-it-q4_K_M.gguf", - "ngl": 28 + "ngl": 29 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json b/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json index 4b255c9e2..ef13dc353 100644 --- a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json @@ -15,7 +15,7 @@ "ctx_len": 4096, "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]", "llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf", - "ngl": 80 + "ngl": 81 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json b/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json index b7d3eeb80..2e3d73b7c 100644 --- a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json @@ -15,7 +15,7 @@ "ctx_len": 4096, "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]", "llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json index 7bed6e43c..b54c78f3d 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json @@ -15,7 +15,7 @@ "ctx_len": 8192, "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", "llama_model_path": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json b/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json index 16d50b9f9..9d5ab57fb 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json +++ b/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json @@ -15,7 +15,7 @@ "ctx_len": 8192, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "llama_model_path": "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json b/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json index b8da24e71..9bcbc3f27 100644 --- a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json +++ b/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json @@ -15,7 +15,7 @@ "ctx_len": 2048, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "llama_model_path": "llamacorn-1.1b-chat.Q8_0.gguf", - "ngl": 22 + "ngl": 23 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json b/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json index 21dcea865..972af8fb4 100644 --- a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json +++ b/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json @@ -15,7 +15,7 @@ "ctx_len": 32768, "prompt_template": "{system_message} [INST] {prompt} [/INST]", "llama_model_path": "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json b/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json index aa39b62c2..991410896 100644 --- a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json @@ -15,7 +15,7 @@ "ctx_len": 32768, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "llama_model_path": "Noromaid-7B-0.4-DPO.q4_k_m.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json b/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json index 94967962d..74315c8b9 100644 --- a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json @@ -15,7 +15,7 @@ "ctx_len": 8192, "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:", "llama_model_path": "openchat-3.5-0106.Q4_K_M.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json index f384fd953..451747a30 100644 --- a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json +++ b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json @@ -15,7 +15,7 @@ "ctx_len": 4096, "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n", "llama_model_path": "Phi-3-mini-4k-instruct-q4.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "max_tokens": 4096, diff --git a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json b/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json index 8f5bfa1c3..274c96180 100644 --- a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json +++ b/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json @@ -15,7 +15,7 @@ "ctx_len": 128000, "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n", "llama_model_path": "Phi-3-medium-128k-instruct-Q4_K_M.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "max_tokens": 128000, diff --git a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json b/extensions/inference-nitro-extension/resources/models/phind-34b/model.json index f96fb4a49..6a8ed507b 100644 --- a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/phind-34b/model.json @@ -15,7 +15,7 @@ "ctx_len": 16384, "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant", "llama_model_path": "phind-codellama-34b-v2.Q4_K_M.gguf", - "ngl": 48 + "ngl": 49 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json b/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json index 202221bd7..4b8713230 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json @@ -15,7 +15,7 @@ "ctx_len": 32768, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "llama_model_path": "qwen1_5-7b-chat-q4_k_m.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json b/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json index 39343575c..38bcddce0 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json @@ -15,7 +15,7 @@ "ctx_len": 32768, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "llama_model_path": "Qwen2-7B-Instruct-Q4_K_M.gguf", - "ngl": 28 + "ngl": 29 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json b/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json index 81bf4306c..983566adf 100644 --- a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json +++ b/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json @@ -15,7 +15,7 @@ "ctx_len": 4096, "prompt_template": "<|user|>\n{prompt}<|endoftext|>\n<|assistant|>", "llama_model_path": "stablelm-zephyr-3b.Q8_0.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json b/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json index 2848931bb..f8f85c8e9 100644 --- a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json @@ -15,7 +15,7 @@ "ctx_len": 32768, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "llama_model_path": "stealth-v1.3.Q4_K_M.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json b/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json index 443ee7dcd..815533466 100644 --- a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json +++ b/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json @@ -15,7 +15,7 @@ "ctx_len": 4096, "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>", "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", - "ngl": 22 + "ngl": 23 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json b/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json index 1a98ddb2e..5159cab58 100644 --- a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json @@ -15,7 +15,7 @@ "ctx_len": 32768, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "llama_model_path": "trinity-v1.2.Q4_K_M.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json b/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json index 978f8cf54..e9a255c4a 100644 --- a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json +++ b/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json @@ -15,7 +15,7 @@ "ctx_len": 32768, "prompt_template": "[INST] <>\n{system_message}\n<>\n{prompt} [/INST]", "llama_model_path": "vistral-7b-chat-dpo.Q4_K_M.gguf", - "ngl": 32 + "ngl": 33 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json b/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json index 5e77faa14..82aa4adf2 100644 --- a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json +++ b/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json @@ -15,7 +15,7 @@ "ctx_len": 16384, "prompt_template": "### Instruction:\n{prompt}\n### Response:", "llama_model_path": "wizardcoder-python-13b-v1.0.Q4_K_M.gguf", - "ngl": 40 + "ngl": 41 }, "parameters": { "temperature": 0.7, diff --git a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json b/extensions/inference-nitro-extension/resources/models/yi-34b/model.json index 637eec453..81700a046 100644 --- a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json +++ b/extensions/inference-nitro-extension/resources/models/yi-34b/model.json @@ -15,7 +15,7 @@ "ctx_len": 4096, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", "llama_model_path": "yi-34b-chat.Q4_K_M.gguf", - "ngl": 60 + "ngl": 61 }, "parameters": { "temperature": 0.7,