Feat: Correct context length for models (#2867)
* fix: correct ctx * version bump * fix: correct ctxlen * fix: correct ctxlen * version bump * fix: correct ctx + q4 * fix: correct ctxlen * fix: correct ctx * fix: correct ctx * fix: correct ctx len * fix: correct ctx * fix: correct ctx * fix: correct ctx * fix: correct ctx * fix: correct ctx * fix: correct ctx * fix: correct ctx * fix: correct ctx * version bump
This commit is contained in:
parent
d2266405cc
commit
2008aae100
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@janhq/inference-nitro-extension",
|
||||
"productName": "Nitro Inference Engine",
|
||||
"version": "1.0.4",
|
||||
"version": "1.0.5",
|
||||
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
|
||||
@ -8,11 +8,11 @@
|
||||
"id": "codeninja-1.0-7b",
|
||||
"object": "model",
|
||||
"name": "CodeNinja 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
|
||||
"llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 8192,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
|
||||
@ -8,11 +8,11 @@
|
||||
"id": "command-r-34b",
|
||||
"object": "model",
|
||||
"name": "Command-R v01 34B Q4",
|
||||
"version": "1.3",
|
||||
"version": "1.4",
|
||||
"description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 131072,
|
||||
"prompt_template": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
||||
"llama_model_path": "c4ai-command-r-v01-Q4_K_M.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 131072,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -8,11 +8,11 @@
|
||||
"id": "deepseek-coder-1.3b",
|
||||
"object": "model",
|
||||
"name": "Deepseek Coder 1.3B Q8",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 16384,
|
||||
"prompt_template": "### Instruction:\n{prompt}\n### Response:",
|
||||
"llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 16384,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -1,26 +1,26 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "deepseek-coder-33b-instruct.Q5_K_M.gguf",
|
||||
"url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf"
|
||||
"filename": "deepseek-coder-33b-instruct.Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "deepseek-coder-34b",
|
||||
"object": "model",
|
||||
"name": "Deepseek Coder 33B Q5",
|
||||
"version": "1.0",
|
||||
"name": "Deepseek Coder 33B Q4",
|
||||
"version": "1.1",
|
||||
"description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 16384,
|
||||
"prompt_template": "### Instruction:\n{prompt}\n### Response:",
|
||||
"llama_model_path": "deepseek-coder-33b-instruct.Q5_K_M.gguf"
|
||||
"llama_model_path": "deepseek-coder-33b-instruct.Q4_K_M.gguf"
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 16384,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -8,11 +8,11 @@
|
||||
"id": "gemma-2b",
|
||||
"object": "model",
|
||||
"name": "Gemma 2B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Gemma is built from the same technology with Google's Gemini.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
|
||||
"llama_model_path": "gemma-2b-it-q4_k_m.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 8192,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -8,11 +8,11 @@
|
||||
"id": "gemma-7b",
|
||||
"object": "model",
|
||||
"name": "Gemma 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Google's Gemma is built for multilingual purpose",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
|
||||
"llama_model_path": "gemma-7b-it-q4_K_M.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 8192,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -8,11 +8,11 @@
|
||||
"id": "mistral-ins-7b-q4",
|
||||
"object": "model",
|
||||
"name": "Mistral Instruct 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding of the world.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "[INST] {prompt} [/INST]",
|
||||
"llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
|
||||
},
|
||||
@ -20,8 +20,8 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"stop": [],
|
||||
"max_tokens": 32768,
|
||||
"stop": ["[/INST]"],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
|
||||
@ -8,11 +8,11 @@
|
||||
"id": "mixtral-8x7b-instruct",
|
||||
"object": "model",
|
||||
"name": "Mixtral 8x7B Instruct Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "[INST] {prompt} [/INST]",
|
||||
"llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 32768,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
|
||||
@ -8,11 +8,11 @@
|
||||
"id": "noromaid-7b",
|
||||
"object": "model",
|
||||
"name": "Noromaid 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "The Noromaid 7b model is designed for role-playing with human-like behavior.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "Noromaid-7B-0.4-DPO.q4_k_m.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 32768,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -8,11 +8,11 @@
|
||||
"id": "openchat-3.5-7b",
|
||||
"object": "model",
|
||||
"name": "Openchat-3.5 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "The performance of Openchat surpasses ChatGPT-3.5 and Grok-1 across various benchmarks.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 8192,
|
||||
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
|
||||
"llama_model_path": "openchat-3.5-0106.Q4_K_M.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 8192,
|
||||
"stop": ["<|end_of_turn|>"],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -8,11 +8,11 @@
|
||||
"id": "phind-34b",
|
||||
"object": "model",
|
||||
"name": "Phind 34B Q4",
|
||||
"version": "1.1",
|
||||
"version": "1.2",
|
||||
"description": "Phind 34B is the best Open-source coding model.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 16384,
|
||||
"prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant",
|
||||
"llama_model_path": "phind-codellama-34b-v2.Q4_K_M.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 16384,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -8,11 +8,11 @@
|
||||
"id": "qwen-7b",
|
||||
"object": "model",
|
||||
"name": "Qwen Chat 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Qwen is optimized at Chinese, ideal for everyday tasks.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "qwen1_5-7b-chat-q4_k_m.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 32768,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -12,7 +12,7 @@
|
||||
"description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "stealth-v1.3.Q4_K_M.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 32768,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
|
||||
@ -12,7 +12,7 @@
|
||||
"description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "trinity-v1.2.Q4_K_M.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 32768,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
|
||||
@ -8,11 +8,11 @@
|
||||
"id": "vistral-7b",
|
||||
"object": "model",
|
||||
"name": "Vistral 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Vistral 7B has a deep understanding of Vietnamese.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n{prompt} [/INST]",
|
||||
"llama_model_path": "vistral-7b-chat-dpo.Q4_K_M.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 32768,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
@ -12,7 +12,7 @@
|
||||
"description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"ctx_len": 16384,
|
||||
"prompt_template": "### Instruction:\n{prompt}\n### Response:",
|
||||
"llama_model_path": "wizardcoder-python-13b-v1.0.Q4_K_M.gguf"
|
||||
},
|
||||
@ -20,7 +20,7 @@
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 4096,
|
||||
"max_tokens": 16384,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user