jan/docs/openapi/specs/chat.yaml

components:
  schemas:
    ChatObject:
      type: object
      properties:
        messages:
          type: arrays
          description: |
            Contains input data or prompts for the model to process.
          example:
            - content: "Hello there :wave:"
              role: assistant
            - content: Can you write a long story
              role: user
        stream:
          type: boolean
          default: true
          description: >-
            Enables continuous output generation, allowing for streaming of
            model responses.
        model:
          type: string
          example: gpt-3.5-turbo
          description: Specifies the model being used for inference or processing tasks.
        max_tokens:
          type: number
          default: 2048
          description: >-
            The maximum number of tokens the model will generate in a single
            response.
        stop:
          type: arrays
          example:
            - hello
          description: >-
            Defines specific tokens or phrases at which the model will stop
            generating further output/
        frequency_penalty:
          type: number
          default: 0
          description: >-
            Adjusts the likelihood of the model repeating words or phrases in
            its output.
        presence_penalty:
          type: number
          default: 0
          description: >-
            Influences the generation of new and varied concepts in the model's
            output.
        temperature:
          type: number
          default: 0.7
          min: 0
          max: 1
          description: Controls the randomness of the model's output.
        top_p:
          type: number
          default: 0.95
          min: 0
          max: 1
          description: Set probability threshold for more relevant outputs.
        cache_prompt:
          type: boolean
          default: true
          description: Optimize performance in repeated or similar requests.
    ChatCompletionRequest:
      type: object
      properties:
        messages:
          type: arrays
          description: |
            Contains input data or prompts for the model to process.
          example:
            - content: "Hello there :wave:"
              role: assistant
            - content: Can you write a long story
              role: user
        model:
          type: string
          example: model-zephyr-7B
          description: |
            Specifies the model being used for inference or processing tasks.
        stream:
          type: boolean
          default: true
          description: >
            Enables continuous output generation, allowing for streaming of
            model responses.
        max_tokens:
          type: number
          default: 2048
          description: >
            The maximum number of tokens the model will generate in a single
            response.
        stop:
          type: arrays
          example:
            - hello
          description: >
            Defines specific tokens or phrases at which the model will stop
            generating further output.
        frequency_penalty:
          type: number
          default: 0
          description: >
            Adjusts the likelihood of the model repeating words or phrases in
            its output.
        presence_penalty:
          type: number
          default: 0
          description: >
            Influences the generation of new and varied concepts in the model's
            output.
        temperature:
          type: number
          default: 0.7
          min: 0
          max: 1
          description: |
            Controls the randomness of the model's output.
        top_p:
          type: number
          default: 0.95
          min: 0
          max: 1
          description: |
            Set probability threshold for more relevant outputs.
    ChatCompletionResponse:
      type: object
      description: Description of the response structure
      properties:
        choices:
          type: array
          description: Array of choice objects
          items:
            type: object
            properties:
              finish_reason:
                type: string
                nullable: true
                example: null
                description: "Reason for finishing the response, if applicable"
              index:
                type: integer
                example: 0
                description: Index of the choice
              message:
                type: object
                properties:
                  content:
                    type: string
                    example: Hello user. What can I help you with?
                    description: Content of the message
                  role:
                    type: string
                    example: assistant
                    description: Role of the sender
        created:
          type: integer
          example: 1700193928
          description: Timestamp of when the response was created
        id:
          type: string
          example: ebwd2niJvJB1Q2Whyvkz
          description: Unique identifier of the response
        model:
          type: string
          nullable: true
          example: _
          description: Model used for generating the response
        object:
          type: string
          example: chat.completion
          description: Type of the response object
        system_fingerprint:
          type: string
          nullable: true
          example: _
          description: System fingerprint
        usage:
          type: object
          description: Information about the usage of tokens
          properties:
            completion_tokens:
              type: integer
              example: 500
              description: Number of tokens used for completion
            prompt_tokens:
              type: integer
              example: 33
              description: Number of tokens used in the prompt
            total_tokens:
              type: integer
              example: 533
              description: Total number of tokens used