jan/models/yarn-70b/model.json

{
    "source_url": "https://huggingface.co/TheBloke/Yarn-Llama-2-70B-32k-GGUF/resolve/main/yarn-llama-2-70b-32k.Q5_K_M.gguf",
    "id": "yarn-70b",
    "object": "model",
    "name": "Yarn 32k 70B",
    "version": "1,0",
    "description": "Yarn-Llama-2-70b-32k is designed specifically for handling long contexts. It represents an extension of the Llama-2-70b-hf model, now supporting a 32k token context window.",
    "format": "gguf",
    "settings": {
      "ctx_len": 4096
    },
    "parameters": {
      "max_tokens": 4096
    },
    "metadata": {
      "author": "NousResearch, The Bloke",
      "tags": ["General Use", "Big Context Length"],
      "size": 48750000000
    }
  }