{ "source_url": "https://huggingface.co/TheBloke/Yarn-Llama-2-70B-32k-GGUF/resolve/main/yarn-llama-2-70b-32k.Q5_K_M.gguf", "id": "yarn-70b", "object": "model", "name": "Yarn 32k 70B", "version": "1,0", "description": "Yarn-Llama-2-70b-32k is designed specifically for handling long contexts. It represents an extension of the Llama-2-70b-hf model, now supporting a 32k token context window.", "format": "gguf", "settings": { "ctx_len": 4096 }, "parameters": { "max_tokens": 4096 }, "metadata": { "author": "NousResearch, The Bloke", "tags": ["General Use", "Big Context Length"], "size": 48750000000 } }