jan/docs/openapi/specs/chat.yaml
2023-11-30 10:27:04 +09:00

182 lines
5.9 KiB
YAML

components:
schemas:
ChatObject:
type: object
properties:
messages:
type: arrays
description: |
Contains input data or prompts for the model to process
example:
[
{ "content": "Hello there :wave:", "role": "assistant" },
{ "content": "Can you write a long story", "role": "user" },
]
stream:
type: boolean
default: true
description: Enables continuous output generation, allowing for streaming of model responses
model:
type: string
example: "gpt-3.5-turbo"
description: Specifies the model being used for inference or processing tasks
max_tokens:
type: number
default: 2048
description: The maximum number of tokens the model will generate in a single response
stop:
type: arrays
example: ["hello"]
description: Defines specific tokens or phrases at which the model will stop generating further output
frequency_penalty:
type: number
default: 0
description: Adjusts the likelihood of the model repeating words or phrases in its output
presence_penalty:
type: number
default: 0
description: Influences the generation of new and varied concepts in the model's output
temperature:
type: number
default: 0.7
min: 0
max: 1
description: Controls the randomness of the model's output
top_p:
type: number
default: 0.95
min: 0
max: 1
description: Set probability threshold for more relevant outputs
cache_prompt:
type: boolean
default: true
description: Optimize performance in repeated or similar requests.
ChatCompletionRequest:
type: object
properties:
messages:
type: arrays
description: |
Contains input data or prompts for the model to process
example:
[
{ "content": "Hello there :wave:", "role": "assistant" },
{ "content": "Can you write a long story", "role": "user" },
]
model:
type: string
example: model-zephyr-7B
description: |
Specifies the model being used for inference or processing tasks
stream:
type: boolean
default: true
description: |
Enables continuous output generation, allowing for streaming of model responses
max_tokens:
type: number
default: 2048
description: |
The maximum number of tokens the model will generate in a single response
stop:
type: arrays
example: ["hello"]
description: |
Defines specific tokens or phrases at which the model will stop generating further output
frequency_penalty:
type: number
default: 0
description: |
Adjusts the likelihood of the model repeating words or phrases in its output
presence_penalty:
type: number
default: 0
description: |
Influences the generation of new and varied concepts in the model's output
temperature:
type: number
default: 0.7
min: 0
max: 1
description: |
Controls the randomness of the model's output
top_p:
type: number
default: 0.95
min: 0
max: 1
description: |
Set probability threshold for more relevant outputs
ChatCompletionResponse:
type: object
description: Description of the response structure
properties:
choices:
type: array
description: Array of choice objects
items:
type: object
properties:
finish_reason:
type: string
nullable: true
example: null
description: Reason for finishing the response, if applicable
index:
type: integer
example: 0
description: Index of the choice
message:
type: object
properties:
content:
type: string
example: "Hello user. What can I help you with?"
description: Content of the message
role:
type: string
example: assistant
description: Role of the sender
created:
type: integer
example: 1700193928
description: Timestamp of when the response was created
id:
type: string
example: ebwd2niJvJB1Q2Whyvkz
description: Unique identifier of the response
model:
type: string
nullable: true
example: _
description: Model used for generating the response
object:
type: string
example: chat.completion
description: Type of the response object
system_fingerprint:
type: string
nullable: true
example: _
description: System fingerprint
usage:
type: object
description: Information about the usage of tokens
properties:
completion_tokens:
type: integer
example: 500
description: Number of tokens used for completion
prompt_tokens:
type: integer
example: 33
description: Number of tokens used in the prompt
total_tokens:
type: integer
example: 533
description: Total number of tokens used