jan/docs/openapi/specs/chat.yaml

components:
  schemas:
    ChatObject:
      type: object
      properties:
        messages:
          type: arrays
          description: |
            Contains input data or prompts for the model to process
          example:
            [
              { "content": "Hello there :wave:", "role": "assistant" },
              { "content": "Can you write a long story", "role": "user" },
            ]
        stream:
          type: boolean
          default: true
          description: Enables continuous output generation, allowing for streaming of model responses
        model:
          type: string
          example: "gpt-3.5-turbo"
          description: Specifies the model being used for inference or processing tasks
        max_tokens:
          type: number
          default: 2048
          description: The maximum number of tokens the model will generate in a single response
        stop:
          type: arrays
          example: ["hello"]
          description: Defines specific tokens or phrases at which the model will stop generating further output
        frequency_penalty:
          type: number
          default: 0
          description: Adjusts the likelihood of the model repeating words or phrases in its output
        presence_penalty:
          type: number
          default: 0
          description: Influences the generation of new and varied concepts in the model's output
        temperature:
          type: number
          default: 0.7
          min: 0
          max: 1
          description: Controls the randomness of the model's output
        top_p:
          type: number
          default: 0.95
          min: 0
          max: 1
          description: Set probability threshold for more relevant outputs
        cache_prompt:
          type: boolean
          default: true
          description: Optimize performance in repeated or similar requests.
    ChatCompletionRequest:
      type: object
      properties:
        messages:
          type: arrays
          description: |
            Contains input data or prompts for the model to process
          example:
            [
              { "content": "Hello there :wave:", "role": "assistant" },
              { "content": "Can you write a long story", "role": "user" },
            ]
        model:
          type: string
          example: model-zephyr-7B
          description: |
            Specifies the model being used for inference or processing tasks
        stream:
          type: boolean
          default: true
          description: |
            Enables continuous output generation, allowing for streaming of model responses
        max_tokens:
          type: number
          default: 2048
          description: |
            The maximum number of tokens the model will generate in a single response
        stop:
          type: arrays
          example: ["hello"]
          description: |
            Defines specific tokens or phrases at which the model will stop generating further output
        frequency_penalty:
          type: number
          default: 0
          description: |
            Adjusts the likelihood of the model repeating words or phrases in its output
        presence_penalty:
          type: number
          default: 0
          description: |
            Influences the generation of new and varied concepts in the model's output
        temperature:
          type: number
          default: 0.7
          min: 0
          max: 1
          description: |
            Controls the randomness of the model's output
        top_p:
          type: number
          default: 0.95
          min: 0
          max: 1
          description: |
            Set probability threshold for more relevant outputs


    ChatCompletionResponse:
      type: object
      description: Description of the response structure
      properties:
        choices:
          type: array
          description: Array of choice objects
          items:
            type: object
            properties:
              finish_reason:
                type: string
                nullable: true
                example: null
                description: Reason for finishing the response, if applicable
              index:
                type: integer
                example: 0
                description: Index of the choice
              message:
                type: object
                properties:
                  content:
                    type: string
                    example: "Hello user. What can I help you with?"
                    description: Content of the message
                  role:
                    type: string
                    example: assistant
                    description: Role of the sender
        created:
          type: integer
          example: 1700193928
          description: Timestamp of when the response was created
        id:
          type: string
          example: ebwd2niJvJB1Q2Whyvkz
          description: Unique identifier of the response
        model:
          type: string
          nullable: true
          example: _
          description: Model used for generating the response
        object:
          type: string
          example: chat.completion
          description: Type of the response object
        system_fingerprint:
          type: string
          nullable: true
          example: _
          description: System fingerprint
        usage:
          type: object
          description: Information about the usage of tokens
          properties:
            completion_tokens:
              type: integer
              example: 500
              description: Number of tokens used for completion
            prompt_tokens:
              type: integer
              example: 33
              description: Number of tokens used in the prompt
            total_tokens:
              type: integer
              example: 533
              description: Total number of tokens used