diff --git a/models/capybara-34b/model.json b/models/capybara-34b/model.json
index d2da8d002..7c669bbb7 100644
--- a/models/capybara-34b/model.json
+++ b/models/capybara-34b/model.json
@@ -7,13 +7,13 @@
"description": "Nous Capybara 34B, a variant of the Yi-34B model, is the first Nous model with a 200K context length, trained for three epochs on the innovative Capybara dataset.",
"format": "gguf",
"settings": {
- "ctx_len": 4096,
+ "ctx_len": 2048,
"system_prompt": "",
"user_prompt": "USER: ",
"ai_prompt": "ASSISTANT: "
},
"parameters": {
- "max_tokens": 4096
+ "max_tokens": 2048
},
"metadata": {
"author": "NousResearch, The Bloke",
diff --git a/models/deepseek-coder-1.3b/model.json b/models/deepseek-coder-1.3b/model.json
index 3ee705d20..dac90423d 100644
--- a/models/deepseek-coder-1.3b/model.json
+++ b/models/deepseek-coder-1.3b/model.json
@@ -7,13 +7,13 @@
"description": "",
"format": "gguf",
"settings": {
- "ctx_len": 4096,
+ "ctx_len": 2048,
"system_prompt": "",
"user_prompt": "",
"ai_prompt": ""
},
"parameters": {
- "max_tokens": 4096
+ "max_tokens": 2048
},
"metadata": {
"author": "deepseek, The Bloke",
diff --git a/models/llama2-chat-7b-q4/model.json b/models/llama2-chat-7b-q4/model.json
new file mode 100644
index 000000000..be302d144
--- /dev/null
+++ b/models/llama2-chat-7b-q4/model.json
@@ -0,0 +1,24 @@
+{
+ "source_url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf",
+ "id": "llama2-chat-7b-q4",
+ "object": "model",
+ "name": "Llama 2 Chat 7B Q4",
+ "version": "1.0",
+ "description": "This is a 4-bit quantized version of Meta AI's Llama 2 Chat 7b model.",
+ "format": "gguf",
+ "settings": {
+ "ctx_len": 2048,
+ "system_prompt": "[INST] <>\n",
+ "user_prompt": "<>\n",
+ "ai_prompt": "[/INST]"
+ },
+ "parameters": {
+ "max_tokens": 2048
+ },
+ "metadata": {
+ "author": "MetaAI, The Bloke",
+ "tags": ["Foundational Model", "General", "Code"],
+ "size": 4080000000
+ }
+ }
+
\ No newline at end of file
diff --git a/models/llama2-chat-7b-q5/model.json b/models/llama2-chat-7b-q5/model.json
new file mode 100644
index 000000000..8a93327d6
--- /dev/null
+++ b/models/llama2-chat-7b-q5/model.json
@@ -0,0 +1,24 @@
+{
+ "source_url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf",
+ "id": "llama2-chat-7b-q5",
+ "object": "model",
+ "name": "Llama 2 Chat 7B Q5",
+ "version": "1.0",
+ "description": "This is a 5-bit quantized version of Meta AI's Llama 2 Chat 7b model.",
+ "format": "gguf",
+ "settings": {
+ "ctx_len": 2048,
+ "system_prompt": "[INST] <>\n",
+ "user_prompt": "<>\n",
+ "ai_prompt": "[/INST]"
+ },
+ "parameters": {
+ "max_tokens": 2048
+ },
+ "metadata": {
+ "author": "MetaAI, The Bloke",
+ "tags": ["Foundational Model", "General", "Code"],
+ "size": 4780000000
+ }
+ }
+
\ No newline at end of file