update llama 2 7b chat

2023-12-04 12:26:50 +07:00 · 2023-12-04 12:26:50 +07:00 · 6a54587319
commit 6a54587319
parent e76233113d
4 changed files with 52 additions and 4 deletions
--- a/models/capybara-34b/model.json
+++ b/models/capybara-34b/model.json
@ -7,13 +7,13 @@
    "description": "Nous Capybara 34B, a variant of the Yi-34B model, is the first Nous model with a 200K context length, trained for three epochs on the innovative Capybara dataset.",
    "format": "gguf",
    "settings": {
-      "ctx_len": 4096,
+      "ctx_len": 2048,
      "system_prompt": "",
      "user_prompt": "USER: ",
      "ai_prompt": "ASSISTANT: "
    },
    "parameters": {
-      "max_tokens": 4096
+      "max_tokens": 2048
    },
    "metadata": {
      "author": "NousResearch, The Bloke",
--- a/models/deepseek-coder-1.3b/model.json
+++ b/models/deepseek-coder-1.3b/model.json
@ -7,13 +7,13 @@
    "description": "",
    "format": "gguf",
    "settings": {
-      "ctx_len": 4096,
+      "ctx_len": 2048,
      "system_prompt": "",
      "user_prompt": "",
      "ai_prompt": ""
    },
    "parameters": {
-      "max_tokens": 4096
+      "max_tokens": 2048
    },
    "metadata": {
      "author": "deepseek, The Bloke",
--- a/models/llama2-chat-7b-q4/model.json
+++ b/models/llama2-chat-7b-q4/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf",
+    "id": "llama2-chat-7b-q4",
+    "object": "model",
+    "name": "Llama 2 Chat 7B Q4",
+    "version": "1.0",
+    "description": "This is a 4-bit quantized version of Meta AI's Llama 2 Chat 7b model.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 2048,
+      "system_prompt": "[INST] <<SYS>>\n",
+      "user_prompt": "<</SYS>>\n",
+      "ai_prompt": "[/INST]"
+    },
+    "parameters": {
+      "max_tokens": 2048
+    },
+    "metadata": {
+      "author": "MetaAI, The Bloke",
+      "tags": ["Foundational Model", "General", "Code"],
+      "size": 4080000000
+    }
+  }
+  
--- a/models/llama2-chat-7b-q5/model.json
+++ b/models/llama2-chat-7b-q5/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf",
+    "id": "llama2-chat-7b-q5",
+    "object": "model",
+    "name": "Llama 2 Chat 7B Q5",
+    "version": "1.0",
+    "description": "This is a 5-bit quantized version of Meta AI's Llama 2 Chat 7b model.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 2048,
+      "system_prompt": "[INST] <<SYS>>\n",
+      "user_prompt": "<</SYS>>\n",
+      "ai_prompt": "[/INST]"
+    },
+    "parameters": {
+      "max_tokens": 2048
+    },
+    "metadata": {
+      "author": "MetaAI, The Bloke",
+      "tags": ["Foundational Model", "General", "Code"],
+      "size": 4780000000
+    }
+  }
+