182 lines
5.9 KiB
YAML
182 lines
5.9 KiB
YAML
components:
|
|
schemas:
|
|
ChatObject:
|
|
type: object
|
|
properties:
|
|
messages:
|
|
type: arrays
|
|
description: |
|
|
Contains input data or prompts for the model to process
|
|
example:
|
|
[
|
|
{ "content": "Hello there :wave:", "role": "assistant" },
|
|
{ "content": "Can you write a long story", "role": "user" },
|
|
]
|
|
stream:
|
|
type: boolean
|
|
default: true
|
|
description: Enables continuous output generation, allowing for streaming of model responses
|
|
model:
|
|
type: string
|
|
example: "gpt-3.5-turbo"
|
|
description: Specifies the model being used for inference or processing tasks
|
|
max_tokens:
|
|
type: number
|
|
default: 2048
|
|
description: The maximum number of tokens the model will generate in a single response
|
|
stop:
|
|
type: arrays
|
|
example: ["hello"]
|
|
description: Defines specific tokens or phrases at which the model will stop generating further output
|
|
frequency_penalty:
|
|
type: number
|
|
default: 0
|
|
description: Adjusts the likelihood of the model repeating words or phrases in its output
|
|
presence_penalty:
|
|
type: number
|
|
default: 0
|
|
description: Influences the generation of new and varied concepts in the model's output
|
|
temperature:
|
|
type: number
|
|
default: 0.7
|
|
min: 0
|
|
max: 1
|
|
description: Controls the randomness of the model's output
|
|
top_p:
|
|
type: number
|
|
default: 0.95
|
|
min: 0
|
|
max: 1
|
|
description: Set probability threshold for more relevant outputs
|
|
cache_prompt:
|
|
type: boolean
|
|
default: true
|
|
description: Optimize performance in repeated or similar requests.
|
|
ChatCompletionRequest:
|
|
type: object
|
|
properties:
|
|
messages:
|
|
type: arrays
|
|
description: |
|
|
Contains input data or prompts for the model to process
|
|
example:
|
|
[
|
|
{ "content": "Hello there :wave:", "role": "assistant" },
|
|
{ "content": "Can you write a long story", "role": "user" },
|
|
]
|
|
model:
|
|
type: string
|
|
example: model-zephyr-7B
|
|
description: |
|
|
Specifies the model being used for inference or processing tasks
|
|
stream:
|
|
type: boolean
|
|
default: true
|
|
description: |
|
|
Enables continuous output generation, allowing for streaming of model responses
|
|
max_tokens:
|
|
type: number
|
|
default: 2048
|
|
description: |
|
|
The maximum number of tokens the model will generate in a single response
|
|
stop:
|
|
type: arrays
|
|
example: ["hello"]
|
|
description: |
|
|
Defines specific tokens or phrases at which the model will stop generating further output
|
|
frequency_penalty:
|
|
type: number
|
|
default: 0
|
|
description: |
|
|
Adjusts the likelihood of the model repeating words or phrases in its output
|
|
presence_penalty:
|
|
type: number
|
|
default: 0
|
|
description: |
|
|
Influences the generation of new and varied concepts in the model's output
|
|
temperature:
|
|
type: number
|
|
default: 0.7
|
|
min: 0
|
|
max: 1
|
|
description: |
|
|
Controls the randomness of the model's output
|
|
top_p:
|
|
type: number
|
|
default: 0.95
|
|
min: 0
|
|
max: 1
|
|
description: |
|
|
Set probability threshold for more relevant outputs
|
|
|
|
|
|
|
|
ChatCompletionResponse:
|
|
type: object
|
|
description: Description of the response structure
|
|
properties:
|
|
choices:
|
|
type: array
|
|
description: Array of choice objects
|
|
items:
|
|
type: object
|
|
properties:
|
|
finish_reason:
|
|
type: string
|
|
nullable: true
|
|
example: null
|
|
description: Reason for finishing the response, if applicable
|
|
index:
|
|
type: integer
|
|
example: 0
|
|
description: Index of the choice
|
|
message:
|
|
type: object
|
|
properties:
|
|
content:
|
|
type: string
|
|
example: "Hello user. What can I help you with?"
|
|
description: Content of the message
|
|
role:
|
|
type: string
|
|
example: assistant
|
|
description: Role of the sender
|
|
created:
|
|
type: integer
|
|
example: 1700193928
|
|
description: Timestamp of when the response was created
|
|
id:
|
|
type: string
|
|
example: ebwd2niJvJB1Q2Whyvkz
|
|
description: Unique identifier of the response
|
|
model:
|
|
type: string
|
|
nullable: true
|
|
example: _
|
|
description: Model used for generating the response
|
|
object:
|
|
type: string
|
|
example: chat.completion
|
|
description: Type of the response object
|
|
system_fingerprint:
|
|
type: string
|
|
nullable: true
|
|
example: _
|
|
description: System fingerprint
|
|
usage:
|
|
type: object
|
|
description: Information about the usage of tokens
|
|
properties:
|
|
completion_tokens:
|
|
type: integer
|
|
example: 500
|
|
description: Number of tokens used for completion
|
|
prompt_tokens:
|
|
type: integer
|
|
example: 33
|
|
description: Number of tokens used in the prompt
|
|
total_tokens:
|
|
type: integer
|
|
example: 533
|
|
description: Total number of tokens used
|