chore: pre-populate Jan's /models folder with model.jsons (#775)

* draft model.json * islm3b update * capybara 34b update * deepseek coder update * dolphin yi update * fix the maxtokens of islm * lzlv 70b update * marx3b update * mythomax 13b update * update neural chat 7b * noromaid 20b update * update openchat 7b * openhermes7b update * openorca 7b * orca 13b update * phind 34b update * rocket 3b update * starling 7b update * storytelling 70b update * tiefighter 13B * update tiefighter tags * tinyllama update * wizard coder 13b * update wizard coder 13b description * wizard coder 34b update * wizard coder minor fix * xwin 70b update * yarn 70b * yi 34b * zephyr beta 7b * neuralhermes-7b update * change path + ctxlen * update id * fix startling
2023-12-01 17:20:58 +07:00 · 2023-12-01 17:20:58 +07:00 · e6812b1247
commit e6812b1247
parent 31b2c7b309
27 changed files with 617 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,7 +2,6 @@
 .env

 # Jan inference
-models/**
 error.log
 node_modules
 *.tgz
--- a/models/capybara-34b/model.json
+++ b/models/capybara-34b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/Nous-Capybara-34B-GGUF/resolve/main/nous-capybara-34b.Q5_K_M.gguf",
+    "id": "capybara-34b",
+    "object": "model",
+    "name": "Capybara 200k 34B",
+    "version": 1.0,
+    "description": "Nous Capybara 34B, a variant of the Yi-34B model, is the first Nous model with a 200K context length, trained for three epochs on the innovative Capybara dataset.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "USER: ",
+      "ai_prompt": "ASSISTANT: "
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "NousResearch, The Bloke",
+      "tags": ["General", "Big Context Length"],
+      "size": 24320000000
+    }
+  }
+  
--- a/models/deepseek-coder-1.3b/model.json
+++ b/models/deepseek-coder-1.3b/model.json
@ -0,0 +1,23 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-base-GGUF/resolve/main/deepseek-coder-1.3b-base.Q4_K_M.gguf",
+    "id": "deepseek-coder-1.3b",
+    "object": "model",
+    "name": "Deepseek Coder 1.3B",
+    "version": "1.0",
+    "description": "",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "",
+      "ai_prompt": ""
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "deepseek, The Bloke",
+      "tags": ["Code"],
+      "size": 870000000
+    }
+  }
--- a/models/dolphin-yi-34b/model.json
+++ b/models/dolphin-yi-34b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/dolphin-2_2-yi-34b-GGUF/resolve/main/dolphin-2_2-yi-34b.Q5_K_M.gguf",
+    "id": "dolphin-yi-34b",
+    "object": "model",
+    "name": "Dolphin Yi 34B",
+    "version": "1.0",
+    "description": "Dolphin, based on the Yi-34B model and enhanced with features like conversation and empathy, is trained on a unique dataset for advanced multi-turn conversations. Notably uncensored, it requires careful implementation of an alignment layer for ethical use.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "<|im_start|>system\n",
+      "user_prompt": "<|im_end|>\n<|im_start|>user\n",
+      "ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "ehartford, The Bloke",
+      "tags": ["General Use", "Role-playing"],
+      "size": 24320000000
+    }
+  }
+  
--- a/models/islm-3b/model.json
+++ b/models/islm-3b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/UmbrellaCorp/IS-LM-3B_GGUF/resolve/main/IS-LM-Q4_K_M.gguf",
+    "id": "islm-3b",
+    "object": "model",
+    "name": "IS LM 3B",
+    "version": "1.0",
+    "description": "IS LM 3B, based on the StableLM 3B model is specifically finetuned for economic analysis using DataForge Economics and QLoRA over three epochs, enhancing its proficiency in economic forecasting and analysis.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "USER: ",
+      "ai_prompt": "ASSISTANT: "
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "UmbrellaCorp, The Bloke",
+      "tags": ["General Use", "Economics"],
+      "size": 1710000000
+    }
+  }
+  
--- a/models/lzlv-70b/model.json
+++ b/models/lzlv-70b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/lzlv_70B-GGUF/resolve/main/lzlv_70b_fp16_hf.Q5_K_M.gguf",
+    "id": "lzlv-70b",
+    "object": "model",
+    "name": "Lzlv 70B",
+    "version": "1.0",
+    "description": "lzlv_70B is a sophisticated AI model designed for roleplaying and creative tasks. This merge aims to combine intelligence with creativity, seemingly outperforming its individual components in complex scenarios and creative outputs.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "USER: ",
+      "ai_prompt": "ASSISTANT: "
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "lizpreciatior, The Bloke",
+      "tags": ["General Use", "Role-playing"],
+      "size": 48750000000
+    }
+  }
+  
--- a/models/marx-3b/model.json
+++ b/models/marx-3b/model.json
@ -0,0 +1,23 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/Marx-3B-v3-GGUF/resolve/main/marx-3b-v3.Q4_K_M.gguf",
+    "id": "marx-3b",
+    "object": "model",
+    "name": "Marx 3B",
+    "version": "1.0",
+    "description": "Marx 3B, based on the StableLM 3B model is specifically finetuned for chating using EverythingLM data and QLoRA over two epochs, enhancing its proficiency in understand general knowledege.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "### HUMAN: ",
+      "ai_prompt": "### RESPONSE: "
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "Bohan Du, The Bloke",
+      "tags": ["General Use"],
+      "size": 1620000000
+    }
+  }  
--- a/models/mythomax-13b/model.json
+++ b/models/mythomax-13b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q5_K_M.gguf",
+    "id": "mythomax-13b",
+    "object": "model",
+    "name": "Mythomax L2 13B",
+    "version": "1.0",
+    "description": "Mythomax L2 13b, an advanced AI model derived from MythoMix, merges MythoLogic-L2's deep comprehension with Huginn's writing skills through a unique tensor merge technique, excelling in roleplaying and storytelling.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "### Instruction: ",
+      "ai_prompt": "### Response: "
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "Gryphe, The Bloke",
+      "tags": ["Role-playing"],
+      "size": 9230000000
+    }
+  }
+  
--- a/models/neural-chat-7b/model.json
+++ b/models/neural-chat-7b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/neural-chat-7B-v3-1-GGUF/resolve/main/neural-chat-7b-v3-1.Q4_K_M.gguf",
+    "id": "neural-chat-7b",
+    "object": "model",
+    "name": "Neural Chat 7B",
+    "version": "1.0",
+    "description": "The Neural Chat 7B model, developed on the foundation of mistralai/Mistral-7B-v0.1, has been fine-tuned using the Open-Orca/SlimOrca dataset and aligned with the Direct Preference Optimization (DPO) algorithm. It has demonstrated substantial improvements in various AI tasks and performance well on the open_llm_leaderboard.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "### System: ",
+      "user_prompt": "### User: ",
+      "ai_prompt": "### Assistant: "
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "Intel, The Bloke",
+      "tags": ["General Use", "Role-playing", "Big Context Length"],
+      "size": 4370000000
+    }
+  }
+  
--- a/models/neuralhermes-7b/model.json
+++ b/models/neuralhermes-7b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/NeuralHermes-2.5-Mistral-7B-GGUF/resolve/main/neuralhermes-2.5-mistral-7b.Q4_K_M.gguf",
+    "id": "neuralhermes-7b",
+    "object": "model",
+    "name": "NeuralHermes 7B",
+    "version": "1.0",
+    "description": "NeuralHermes 2.5 has been enhanced using Direct Preference Optimization. This fine-tuning, inspired by the RLHF process of Neural-chat-7b and OpenHermes-2.5-Mistral-7B, has led to improved performance across several benchmarks.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "<|im_start|>system\n",
+      "user_prompt": "<|im_end|>\n<|im_start|>user\n",
+      "ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "Intel, The Bloke",
+      "tags": ["General Use", "Code", "Big Context Length"],
+      "size": 4370000000
+    }
+  }
+  
--- a/models/noromaid-20b/model.json
+++ b/models/noromaid-20b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/Noromaid-20B-v0.1.1-GGUF/resolve/main/noromaid-20b-v0.1.1.Q4_K_M.gguf",
+    "id": "noromaid-20b",
+    "object": "model",
+    "name": "Noromaid 20B",
+    "version": "1.0",
+    "description": "The Noromaid 20b model is designed for role-playing and general use, featuring a unique touch with the no_robots dataset that enhances human-like behavior.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "### Instruction: ",
+      "ai_prompt": "### Response: "
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "NeverSleep, The Bloke",
+      "tags": ["Role-playing"],
+      "size": 12040000000
+    }
+  }
+  
--- a/models/openchat-7b/model.json
+++ b/models/openchat-7b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/openchat_3.5-GGUF/resolve/main/openchat_3.5.Q4_K_M.gguf",
+    "id": "openchat-7b",
+    "object": "model",
+    "name": "Open Chat 3.5 7B",
+    "version": "1.0",
+    "description": "OpenChat represents a breakthrough in the realm of open-source language models. By implementing the C-RLFT fine-tuning strategy, inspired by offline reinforcement learning, this 7B model achieves results on par with ChatGPT (March).",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "GPT4 User: ",
+      "ai_prompt": "<|end_of_turn|>\nGPT4 Assistant: " 
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "OpenChat, The Bloke",
+      "tags": ["General", "Code"],
+      "size": 4370000000
+    }
+  }
+  
--- a/models/openhermes-mistral-7b/model.json
+++ b/models/openhermes-mistral-7b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/OpenHermes-2.5-Mistral-7B-GGUF/resolve/main/openhermes-2.5-mistral-7b.Q4_K_M.gguf",
+    "id": "openhermes-mistral-7b",
+    "object": "model",
+    "name": "Openhermes 2.5 Mistral 7B",
+    "version": "1.0",
+    "description": "The OpenHermes 2.5 Mistral 7B incorporates additional code datasets, more than a million GPT-4 generated data examples, and other high-quality open datasets. This enhancement led to significant improvement in benchmarks, highlighting its improved skill in handling code-centric tasks.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "<|im_start|>system\n",
+      "user_prompt": "<|im_end|>\n<|im_start|>user\n",
+      "ai_prompt": "<|im_end|>\n<|im_start|>assistant\n" 
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "Teknium, The Bloke",
+      "tags": ["General", "Roleplay"],
+      "size": 4370000000
+    }
+  }
+  
--- a/models/openorca-13b/model.json
+++ b/models/openorca-13b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/Orca-2-13B-GGUF/resolve/main/orca-2-13b.Q5_K_M.gguf",
+    "id": "openorca-13b",
+    "object": "model",
+    "name": "Orca 2 13B",
+    "version": "1.0",
+    "description": "Orca 2 is a finetuned version of LLAMA-2, designed primarily for single-turn responses in reasoning, reading comprehension, math problem solving, and text summarization.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "<|im_start|>system\n",
+      "user_prompt": "<|im_end|>\n<|im_start|>user\n",
+      "ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "Microsoft, The Bloke",
+      "tags": ["General Use"],
+      "size": 9230000000
+    }
+  }
+  
--- a/models/openorca-7b/model.json
+++ b/models/openorca-7b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q4_K_M.gguf",
+    "id": "openorca-7b",
+    "object": "model",
+    "name": "OpenOrca 7B",
+    "version": "1.0",
+    "description": "OpenOrca 8k 7B is a model based on Mistral 7B, fine-tuned using the OpenOrca dataset. Notably ranked first on the HF Leaderboard for models under 30B, it excels in efficiency and accessibility.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "<|im_start|>system\n",
+      "user_prompt": "<|im_end|>\n<|im_start|>user\n",
+      "ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "OpenOrca, The Bloke",
+      "tags": ["General", "Code"],
+      "size": 4370000000
+    }
+  }
+  
--- a/models/phind-34b/model.json
+++ b/models/phind-34b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v2-GGUF/resolve/main/phind-codellama-34b-v2.Q5_K_M.gguf",
+    "id": "phind-34b",
+    "object": "model",
+    "name": "Phind 34B",
+    "version": "1.0",
+    "description": "Phind-CodeLlama-34B-v2 is an AI model fine-tuned on 1.5B tokens of high-quality programming data. It's a SOTA open-source model in coding. This multi-lingual model excels in various programming languages, including Python, C/C++, TypeScript, Java, and is designed to be steerable and user-friendly.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "### System Prompt\n",
+      "user_prompt": "### User Message\n",
+      "ai_prompt": "### Assistant\n"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "Phind, The Bloke",
+      "tags": ["Code", "Big Context Length"],
+      "size": 24320000000
+    }
+  }
+  
--- a/models/rocket-3b/model.json
+++ b/models/rocket-3b/model.json
@ -0,0 +1,23 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/rocket-3B-GGUF/resolve/main/rocket-3b.Q4_K_M.gguf",
+    "id": "rocket-3b",
+    "object": "model",
+    "name": "Rocket 3B",
+    "version": "1.0",
+    "description": "Rocket-3B is a GPT-like model, primarily English, fine-tuned on diverse public datasets. It outperforms larger models in benchmarks, showcasing superior understanding and text generation, making it an effective chat model for its size.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "<|im_start|>system\n",
+      "user_prompt": "<|im_end|>\n<|im_start|>user\n",
+      "ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "pansophic, The Bloke",
+      "tags": ["General Use"],
+      "size": 1710000000
+    }
+  }  
--- a/models/starling-7b/model.json
+++ b/models/starling-7b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/resolve/main/starling-lm-7b-alpha.Q4_K_M.gguf",
+    "id": "starling-7b",
+    "object": "model",
+    "name": "Strarling alpha 7B",
+    "version": "1.0",
+    "description": "Starling-RM-7B-alpha is a language model finetuned with Reinforcement Learning from AI Feedback from Openchat 3.5. It stands out for its impressive performance using GPT-4 as a judge, making it one of the top-performing models in its category.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "GPT4 User: ",
+      "ai_prompt": "<|end_of_turn|>\nGPT4 Assistant: "
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "Berkeley-nest, The Bloke",
+      "tags": ["General", "Code"],
+      "size": 4370000000
+    }
+  }
+  
--- a/models/storytelling-70b/model.json
+++ b/models/storytelling-70b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/GOAT-70B-Storytelling-GGUF/resolve/main/goat-70b-storytelling.Q5_K_M.gguf",
+    "id": "storytelling-70b",
+    "object": "model",
+    "name": "Storytelling 70B",
+    "version": "1.0",
+    "description": "The GOAT-70B-Storytelling model is designed for autonomous story-writing, including crafting books and movie scripts. Based on the LLaMA 2 70B architecture, this model excels in generating cohesive and engaging narratives using inputs like plot outlines and character profiles.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "### USER: ",
+      "ai_prompt": "\n### ASSISTANT: "
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "GOAT-AI, The Bloke",
+      "tags": ["General Use", "Writing"],
+      "size": 48750000000
+    }
+  }
+  
--- a/models/tiefighter-13b/model.json
+++ b/models/tiefighter-13b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/LLaMA2-13B-Tiefighter-GGUF/resolve/main/llama2-13b-tiefighter.Q5_K_M.gguf",
+    "id": "tiefighter-13b",
+    "object": "model",
+    "name": "Tiefighter 13B",
+    "version": "1.0",
+    "description": "Tiefighter-13B is a highly creative, merged AI model achieved by combining various 'LORAs' on top of an existing merge, particularly focusing on storytelling and improvisation. This model excels in story writing, chatbots, and adventuring, and is designed to perform better with less detailed inputs, leveraging its inherent creativity.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "### Instruction: ",
+      "ai_prompt": "\n### Response: "
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "KoboldAI, The Bloke",
+      "tags": ["General Use", "Role-playing", "Writing"],
+      "size": 9230000000
+    }
+  }
+  
--- a/models/tinyllama-1.1b/model.json
+++ b/models/tinyllama-1.1b/model.json
@ -0,0 +1,23 @@
+{
+  "source_url": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6/resolve/main/ggml-model-q4_0.gguf",
+  "id": "tinyllama-1.1b",
+  "object": "model",
+  "name": "TinyLlama Chat 1.1B",
+  "version": "1.0",
+  "description": "The TinyLlama project, featuring a 1.1B parameter Llama model, is pretrained on an expansive 3 trillion token dataset. Its design ensures easy integration with various Llama-based open-source projects. Despite its smaller size, it efficiently utilizes lower computational and memory resources, drawing on GPT-4's analytical prowess to enhance its conversational abilities and versatility.",
+  "format": "gguf",
+  "settings": {
+      "ctx_len": 2048,
+      "system_prompt": "<|system|>\n",
+      "user_prompt": "<|user|>\n",
+      "ai_prompt": "<|assistant|>\n"
+  },
+  "parameters": {
+      "max_tokens": 2048
+  },
+  "metadata": {
+      "author": "TinyLlama",
+      "tags": ["General Use"],
+      "size": 637000000
+  }
+}
--- a/models/wizardcoder-13b/model.json
+++ b/models/wizardcoder-13b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q5_K_M.gguf",
+    "id": "wizardcoder-13b",
+    "object": "model",
+    "name": "Wizard Coder Python 13B",
+    "version": "1.0",
+    "description": "WizardCoder-Python-13B is a Python coding model major models like ChatGPT-3.5. This model based on the Llama2 architecture, demonstrate high proficiency in specific domains like coding and mathematics.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "### Instruction:\n",
+      "ai_prompt": "### Response:\n"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "WizardLM, The Bloke",
+      "tags": ["Code", "Big Context Length"],
+      "size": 9230000000
+    }
+  }
+  
--- a/models/wizardcoder-34b/model.json
+++ b/models/wizardcoder-34b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/WizardCoder-Python-34B-V1.0-GGUF/resolve/main/wizardcoder-python-34b-v1.0.Q5_K_M.gguf",
+    "id": "wizardcoder-34b",
+    "object": "model",
+    "name": "Wizard Coder Python 34B",
+    "version": "1.0",
+    "description": "WizardCoder-Python-34B is a Python coding model major models like ChatGPT-3.5. This model based on the Llama2 architecture, demonstrate high proficiency in specific domains like coding and mathematics.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "### Instruction:\n",
+      "ai_prompt": "### Response:\n"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "WizardLM, The Bloke",
+      "tags": ["Code", "Big Context Length"],
+      "size": 24320000000
+    }
+  }
+  
--- a/models/xwin-70b/model.json
+++ b/models/xwin-70b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/Xwin-LM-70B-V0.1-GGUF/resolve/main/xwin-lm-70b-v0.1.Q5_K_M.gguf",
+    "id": "xwin-70b",
+    "object": "model",
+    "name": "Xwin LM 70B",
+    "version": "1.0",
+    "description": "Xwin-LM, based on Llama2 models, emphasizes alignment and exhibits advanced language understanding, text generation, and role-playing abilities.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "",
+      "user_prompt": "USER: ",
+      "ai_prompt": "ASSISTANT: "
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "Xwin-LM, The Bloke",
+      "tags": ["General Use", "Role-playing"],
+      "size": 48750000000
+    }
+  }
+  
--- a/models/yarn-70b/model.json
+++ b/models/yarn-70b/model.json
@ -0,0 +1,21 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/Yarn-Llama-2-70B-32k-GGUF/resolve/main/yarn-llama-2-70b-32k.Q5_K_M.gguf",
+    "id": "yarn-70b",
+    "object": "model",
+    "name": "Yarn 32k 70B",
+    "version": "1,0",
+    "description": "Yarn-Llama-2-70b-32k is designed specifically for handling long contexts. It represents an extension of the Llama-2-70b-hf model, now supporting a 32k token context window.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "NousResearch, The Bloke",
+      "tags": ["General Use", "Big Context Length"],
+      "size": 48750000000
+    }
+  }
+  
--- a/models/yi-34b/model.json
+++ b/models/yi-34b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/Yi-34B-Chat-GGUF/resolve/main/yi-34b-chat.Q5_K_M.gguf",
+    "id": "yi-34b",
+    "object": "model",
+    "name": "Yi 34B",
+    "version": "1.0",
+    "description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "<|im_start|>system\n",
+      "user_prompt": "<|im_end|>\n<|im_start|>user\n",
+      "ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "01-ai, The Bloke",
+      "tags": ["General", "Role-playing", "Writing"],
+      "size": 24320000000
+    }
+  }
+  
--- a/models/zephyr-beta-7b/model.json
+++ b/models/zephyr-beta-7b/model.json
@ -0,0 +1,24 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_K_M.gguf",
+    "id": "zephyr-beta-7b",
+    "object": "model",
+    "name": "Zephyr Beta 7B",
+    "version": "1.0",
+    "description": "The Zephyr-7B-β model marks the second iteration in the Zephyr series, designed to function as an effective assistant. It has been fine-tuned from the mistralai/Mistral-7B-v0.1 base model, utilizing a combination of public and synthetic datasets with the application of Direct Preference Optimization.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "system_prompt": "<|system|>\n",
+      "user_prompt": "</s>\n<|user|>\n",
+      "ai_prompt": "</s>\n<|assistant|>\n"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "HuggingFaceH4, The Bloke",
+      "tags": ["General Use", "Big Context Length"],
+      "size": 4370000000
+    }
+  }
+