jan/docs/openapi/specs/chat.yaml
2023-12-10 11:37:51 +09:00

196 lines
6.0 KiB
YAML

components:
schemas:
ChatObject:
type: object
properties:
messages:
type: arrays
description: |
Contains input data or prompts for the model to process.
example:
- content: "Hello there :wave:"
role: assistant
- content: Can you write a long story
role: user
stream:
type: boolean
default: true
description: >-
Enables continuous output generation, allowing for streaming of
model responses.
model:
type: string
example: gpt-3.5-turbo
description: Specifies the model being used for inference or processing tasks.
max_tokens:
type: number
default: 2048
description: >-
The maximum number of tokens the model will generate in a single
response.
stop:
type: arrays
example:
- hello
description: >-
Defines specific tokens or phrases at which the model will stop
generating further output/
frequency_penalty:
type: number
default: 0
description: >-
Adjusts the likelihood of the model repeating words or phrases in
its output.
presence_penalty:
type: number
default: 0
description: >-
Influences the generation of new and varied concepts in the model's
output.
temperature:
type: number
default: 0.7
min: 0
max: 1
description: Controls the randomness of the model's output.
top_p:
type: number
default: 0.95
min: 0
max: 1
description: Set probability threshold for more relevant outputs.
cache_prompt:
type: boolean
default: true
description: Optimize performance in repeated or similar requests.
ChatCompletionRequest:
type: object
properties:
messages:
type: arrays
description: |
Contains input data or prompts for the model to process.
example:
- content: "Hello there :wave:"
role: assistant
- content: Can you write a long story
role: user
model:
type: string
example: model-zephyr-7B
description: |
Specifies the model being used for inference or processing tasks.
stream:
type: boolean
default: true
description: >
Enables continuous output generation, allowing for streaming of
model responses.
max_tokens:
type: number
default: 2048
description: >
The maximum number of tokens the model will generate in a single
response.
stop:
type: arrays
example:
- hello
description: >
Defines specific tokens or phrases at which the model will stop
generating further output.
frequency_penalty:
type: number
default: 0
description: >
Adjusts the likelihood of the model repeating words or phrases in
its output.
presence_penalty:
type: number
default: 0
description: >
Influences the generation of new and varied concepts in the model's
output.
temperature:
type: number
default: 0.7
min: 0
max: 1
description: |
Controls the randomness of the model's output.
top_p:
type: number
default: 0.95
min: 0
max: 1
description: |
Set probability threshold for more relevant outputs.
ChatCompletionResponse:
type: object
description: Description of the response structure
properties:
choices:
type: array
description: Array of choice objects
items:
type: object
properties:
finish_reason:
type: string
nullable: true
example: null
description: "Reason for finishing the response, if applicable"
index:
type: integer
example: 0
description: Index of the choice
message:
type: object
properties:
content:
type: string
example: Hello user. What can I help you with?
description: Content of the message
role:
type: string
example: assistant
description: Role of the sender
created:
type: integer
example: 1700193928
description: Timestamp of when the response was created
id:
type: string
example: ebwd2niJvJB1Q2Whyvkz
description: Unique identifier of the response
model:
type: string
nullable: true
example: _
description: Model used for generating the response
object:
type: string
example: chat.completion
description: Type of the response object
system_fingerprint:
type: string
nullable: true
example: _
description: System fingerprint
usage:
type: object
description: Information about the usage of tokens
properties:
completion_tokens:
type: integer
example: 500
description: Number of tokens used for completion
prompt_tokens:
type: integer
example: 33
description: Number of tokens used in the prompt
total_tokens:
type: integer
example: 533
description: Total number of tokens used