diff --git a/models/capybara-34b/model.json b/models/capybara-34b/model.json index d2da8d002..7c669bbb7 100644 --- a/models/capybara-34b/model.json +++ b/models/capybara-34b/model.json @@ -7,13 +7,13 @@ "description": "Nous Capybara 34B, a variant of the Yi-34B model, is the first Nous model with a 200K context length, trained for three epochs on the innovative Capybara dataset.", "format": "gguf", "settings": { - "ctx_len": 4096, + "ctx_len": 2048, "system_prompt": "", "user_prompt": "USER: ", "ai_prompt": "ASSISTANT: " }, "parameters": { - "max_tokens": 4096 + "max_tokens": 2048 }, "metadata": { "author": "NousResearch, The Bloke", diff --git a/models/deepseek-coder-1.3b/model.json b/models/deepseek-coder-1.3b/model.json index 3ee705d20..dac90423d 100644 --- a/models/deepseek-coder-1.3b/model.json +++ b/models/deepseek-coder-1.3b/model.json @@ -7,13 +7,13 @@ "description": "", "format": "gguf", "settings": { - "ctx_len": 4096, + "ctx_len": 2048, "system_prompt": "", "user_prompt": "", "ai_prompt": "" }, "parameters": { - "max_tokens": 4096 + "max_tokens": 2048 }, "metadata": { "author": "deepseek, The Bloke", diff --git a/models/llama2-chat-7b-q4/model.json b/models/llama2-chat-7b-q4/model.json new file mode 100644 index 000000000..be302d144 --- /dev/null +++ b/models/llama2-chat-7b-q4/model.json @@ -0,0 +1,24 @@ +{ + "source_url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf", + "id": "llama2-chat-7b-q4", + "object": "model", + "name": "Llama 2 Chat 7B Q4", + "version": "1.0", + "description": "This is a 4-bit quantized version of Meta AI's Llama 2 Chat 7b model.", + "format": "gguf", + "settings": { + "ctx_len": 2048, + "system_prompt": "[INST] <>\n", + "user_prompt": "<>\n", + "ai_prompt": "[/INST]" + }, + "parameters": { + "max_tokens": 2048 + }, + "metadata": { + "author": "MetaAI, The Bloke", + "tags": ["Foundational Model", "General", "Code"], + "size": 4080000000 + } + } + \ No newline at end of file diff --git a/models/llama2-chat-7b-q5/model.json b/models/llama2-chat-7b-q5/model.json new file mode 100644 index 000000000..8a93327d6 --- /dev/null +++ b/models/llama2-chat-7b-q5/model.json @@ -0,0 +1,24 @@ +{ + "source_url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf", + "id": "llama2-chat-7b-q5", + "object": "model", + "name": "Llama 2 Chat 7B Q5", + "version": "1.0", + "description": "This is a 5-bit quantized version of Meta AI's Llama 2 Chat 7b model.", + "format": "gguf", + "settings": { + "ctx_len": 2048, + "system_prompt": "[INST] <>\n", + "user_prompt": "<>\n", + "ai_prompt": "[/INST]" + }, + "parameters": { + "max_tokens": 2048 + }, + "metadata": { + "author": "MetaAI, The Bloke", + "tags": ["Foundational Model", "General", "Code"], + "size": 4780000000 + } + } + \ No newline at end of file