[
  {
    "key": "cont_batching",
    "title": "Continuous Batching",
    "description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
    "controllerType": "checkbox",
    "controllerProps": {
      "value": true
    }
  },
  {
    "key": "n_parallel",
    "title": "Parallel Operations",
    "description": "Number of prompts that can be processed simultaneously by the model.",
    "controllerType": "input",
    "controllerProps": {
      "value": "4",
      "placeholder": "4",
      "type": "number",
      "textAlign": "right"
    }
  },
  {
    "key": "cpu_threads",
    "title": "CPU Threads",
    "description": "Number of CPU cores used for model processing when running without GPU.",
    "controllerType": "input",
    "controllerProps": {
      "value": "-1",
      "placeholder": "Number of CPU threads",
      "type": "number",
      "textAlign": "right"
    }
  },
  {
    "key": "threads_batch",
    "title": "Threads (Batch)",
    "description": "Number of threads for batch and prompt processing (default: same as Threads).",
    "controllerType": "input",
    "controllerProps": {
      "value": -1,
      "placeholder": "-1 (same as Threads)",
      "type": "number"
    }
  },
  {
    "key": "flash_attn",
    "title": "Flash Attention",
    "description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
    "controllerType": "checkbox",
    "controllerProps": {
      "value": true
    }
  },

  {
    "key": "caching_enabled",
    "title": "Caching",
    "description": "Stores recent prompts and responses to improve speed when similar questions are asked.",
    "controllerType": "checkbox",
    "controllerProps": {
      "value": true
    }
  },
  {
    "key": "cache_type",
    "title": "KV Cache Type",
    "description": "Controls memory usage and precision trade-off.",
    "controllerType": "dropdown",
    "controllerProps": {
      "value": "f16",
      "options": [
        {
          "value": "q4_0",
          "name": "q4_0"
        },
        {
          "value": "q8_0",
          "name": "q8_0"
        },
        {
          "value": "f16",
          "name": "f16"
        }
      ]
    }
  },
  {
    "key": "use_mmap",
    "title": "mmap",
    "description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
    "controllerType": "checkbox",
    "controllerProps": {
      "value": true
    }
  }
]