jan/docs/openapi/specs/models.yaml

components:
  schemas:
    ListModelsResponse:
      type: object
      properties:
        object:
          type: string
          enum: [list]
        data:
          type: array
          items:
            $ref: "#/components/schemas/Model"
      required:
        - object
        - data

    Model:
      type: object
      properties:
        type:
          type: string
          default: "model"
          description: "The type of the object."
        version:
          type: string
          default: "1"
          description: "The version number of the model."
        id:
          type: string
          description: "Unique identifier used in chat-completions model_name, matches folder name."
          example: "zephyr-7b"
        name:
          type: string
          description: "Name of the model."
          example: "Zephyr 7B"
        owned_by:
          type: string
          description: "Compatibility field for OpenAI."
          default: ""
        created:
          type: integer
          format: int64
          description: "Unix timestamp representing the creation time."
        description:
          type: string
          description: "Description of the model."
        state:
          type: string
          enum: [null, "downloading", "ready", "starting", "stopping"]
          description: "Current state of the model."
        format:
          type: string
          description: "State format of the model, distinct from the engine."
          example: "ggufv3"
        source_url:
          type: string
          format: uri
          description: "URL to the source of the model."
          example: "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf"
        settings:
          type: object
          properties:
            ctx_len:
              type: string
              description: "Context length."
              example: "2048"
            ngl:
              type: string
              description: "Number of layers."
              example: "100"
            embedding:
              type: string
              description: "Indicates if embedding is enabled."
              example: "true"
            n_parallel:
              type: string
              description: "Number of parallel processes."
              example: "4"
          additionalProperties: false
        parameters:
          type: object
          properties:
            temperature:
              type: string
              description: "Temperature setting for the model."
              example: "0.7"
            token_limit:
              type: string
              description: "Token limit for the model."
              example: "2048"
            top_k:
              type: string
              description: "Top-k setting for the model."
              example: "0"
            top_p:
              type: string
              description: "Top-p setting for the model."
              example: "1"
            stream:
              type: string
              description: "Indicates if streaming is enabled."
              example: "true"
          additionalProperties: false
        metadata:
          type: object
          description: "Additional metadata."
        assets:
          type: array
          items:
            type: string
          description: "List of assets related to the model."
      required:
        - source_url

    ModelObject:
      type: object
      properties:
        id:
          type: string
          description: |
            "The identifier of the model."

          example: "zephyr-7b"
        object:
          type: string
          description: |
            "The type of the object, indicating it's a model."

          default: "model"
        created:
          type: integer
          format: int64
          description: |
            "Unix timestamp representing the creation time of the model."

          example: "1253935178"
        owned_by:
          type: string
          description: |
            "The entity that owns the model."

          example: "_"

    GetModelResponse:
      type: object
      properties:
        id:
          type: string
          description: "The identifier of the model."
          example: "zephyr-7b"
        object:
          type: string
          description: "Type of the object, indicating it's a model."
          default: "model"
        created:
          type: integer
          format: int64
          description: "Unix timestamp representing the creation time of the model."
        owned_by:
          type: string
          description: "The entity that owns the model."
          example: "_"
        state:
          type: string
          enum: [not_downloaded, downloaded, running, stopped]
          description: "The current state of the model."
        source_url:
          type: string
          format: uri
          description: "URL to the source of the model."
          example: "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf"
        engine_parameters:
          type: object
          properties:
            pre_prompt:
              type: string
              description: "Predefined prompt used for setting up internal configurations."
              default: ""
              example: "Initial setup complete."
            system_prompt:
              type: string
              description: "Prefix used for system-level prompts."
              default: "SYSTEM: "
            user_prompt:
              type: string
              description: "Prefix used for user prompts."
              default:  "USER: "
            ai_prompt:
              type: string
              description: "Prefix used for assistant prompts."
              default: "ASSISTANT: "
            ngl:
              type: integer
              description: "Number of neural network layers loaded onto the GPU for acceleration."
              minimum: 0
              maximum: 100
              default: 100
              example: 100
            ctx_len:
              type: integer
              description: "Context length for model operations, varies based on the specific model."
              minimum: 128
              maximum: 4096
              default: 2048
              example: 2048
            n_parallel:
              type: integer
              description: "Number of parallel operations, relevant when continuous batching is enabled."
              minimum: 1
              maximum: 10
              default: 1
              example: 4
            cont_batching:
              type: boolean
              description: "Indicates if continuous batching is used for processing."
              default: false
              example: false
            cpu_threads:
              type: integer
              description: "Number of threads allocated for CPU-based inference."
              minimum: 1
              example: 8
            embedding:
              type: boolean
              description: "Indicates if embedding layers are enabled in the model."
              default: true
              example: true
        model_parameters:
          type: object
          properties:
            ctx_len:
              type: integer
              description: "Maximum context length the model can handle."
              minimum: 0
              maximum: 4096
              default: 2048
              example: 2048
            ngl:
              type: integer
              description: "Number of layers in the neural network."
              minimum: 1
              maximum: 100
              default: 100
              example: 100
            embedding:
              type: boolean
              description: "Indicates if embedding layers are used."
              default: true
              example: true
            n_parallel:
              type: integer
              description: "Number of parallel processes the model can run."
              minimum: 1
              maximum: 10
              default: 1
              example: 4
            temperature:
              type: number
              description: "Controls randomness in model's responses. Higher values lead to more random responses."
              minimum: 0.0
              maximum: 2.0
              default: 0.7
              example: 0.7
            token_limit:
              type: integer
              description: "Maximum number of tokens the model can generate in a single response."
              minimum: 1
              maximum: 4096
              default: 2048
              example: 2048
            top_k:
              type: integer
              description: "Limits the model to consider only the top k most likely next tokens at each step."
              minimum: 0
              maximum: 100
              default: 0
              example: 0
            top_p:
              type: number
              description: "Nucleus sampling parameter. The model considers the smallest set of tokens whose cumulative probability exceeds the top_p value."
              minimum: 0.0
              maximum: 1.0
              default: 1.0
              example: 1.0

        metadata:
          type: object
          properties:
            engine:
              type: string
              description: "The engine used by the model."
              example: "llamacpp"
            quantization:
              type: string
              description: "Quantization parameter of the model."
              example: "Q3_K_L"
            size:
              type: string
              description: "Size of the model."
              example: "7B"
      required:
        - id
        - object
        - created
        - owned_by
        - state
        - source_url
        - parameters
        - metadata

    DeleteModelResponse:
      type: object
      properties:
        id:
          type: string
          description: "The identifier of the model that was deleted."
          example: "model-zephyr-7B"
        object:
          type: string
          description: "Type of the object, indicating it's a model."
          default: "model"
        deleted:
          type: boolean
          description: "Indicates whether the model was successfully deleted."
          example: true


    StartModelResponse:
      type: object
      properties:
        id:
          type: string
          description: "The identifier of the model that was started."
          example: "model-zephyr-7B"
        object:
          type: string
          description: "Type of the object, indicating it's a model."
          default: "model"
        state:
          type: string
          description: "The current state of the model after the start operation."
          example: "running"
      required:
        - id
        - object
        - state

    StopModelResponse:
      type: object
      properties:
        id:
          type: string
          description: "The identifier of the model that was started."
          example: "model-zephyr-7B"
        object:
          type: string
          description: "Type of the object, indicating it's a model."
          default: "model"
        state:
          type: string
          description: "The current state of the model after the start operation."
          example: "stopped"
      required:
        - id
        - object
        - state

    DownloadModelResponse:
      type: object
      properties:
        id:
          type: string
          description: "The identifier of the model that was started."
          example: "model-zephyr-7B"
        object:
          type: string
          description: "Type of the object, indicating it's a model."
          default: "model"
        state:
          type: string
          description: "The current state of the model after the start operation."
          example: "downloaded"