197 lines
6.0 KiB
YAML
197 lines
6.0 KiB
YAML
---
|
|
components:
|
|
schemas:
|
|
ChatObject:
|
|
type: object
|
|
properties:
|
|
messages:
|
|
type: arrays
|
|
description: |
|
|
Contains input data or prompts for the model to process.
|
|
example:
|
|
- content: 'Hello there :wave:'
|
|
role: assistant
|
|
- content: Can you write a long story
|
|
role: user
|
|
stream:
|
|
type: boolean
|
|
default: true
|
|
description:
|
|
Enables continuous output generation, allowing for streaming of
|
|
model responses.
|
|
model:
|
|
type: string
|
|
example: gpt-3.5-turbo
|
|
description: Specifies the model being used for inference or processing tasks.
|
|
max_tokens:
|
|
type: number
|
|
default: 2048
|
|
description:
|
|
The maximum number of tokens the model will generate in a single
|
|
response.
|
|
stop:
|
|
type: arrays
|
|
example:
|
|
- hello
|
|
description:
|
|
Defines specific tokens or phrases at which the model will stop
|
|
generating further output/
|
|
frequency_penalty:
|
|
type: number
|
|
default: 0
|
|
description:
|
|
Adjusts the likelihood of the model repeating words or phrases in
|
|
its output.
|
|
presence_penalty:
|
|
type: number
|
|
default: 0
|
|
description:
|
|
Influences the generation of new and varied concepts in the model's
|
|
output.
|
|
temperature:
|
|
type: number
|
|
default: 0.7
|
|
min: 0
|
|
max: 1
|
|
description: Controls the randomness of the model's output.
|
|
top_p:
|
|
type: number
|
|
default: 0.95
|
|
min: 0
|
|
max: 1
|
|
description: Set probability threshold for more relevant outputs.
|
|
cache_prompt:
|
|
type: boolean
|
|
default: true
|
|
description: Optimize performance in repeated or similar requests.
|
|
ChatCompletionRequest:
|
|
type: object
|
|
properties:
|
|
messages:
|
|
type: arrays
|
|
description: |
|
|
Contains input data or prompts for the model to process.
|
|
example:
|
|
- content: You are a helpful assistant.
|
|
role: system
|
|
- content: Hello!
|
|
role: user
|
|
model:
|
|
type: string
|
|
example: tinyllama-1.1b
|
|
description: |
|
|
Specifies the model being used for inference or processing tasks.
|
|
stream:
|
|
type: boolean
|
|
default: true
|
|
description: >
|
|
Enables continuous output generation, allowing for streaming of
|
|
model responses.
|
|
max_tokens:
|
|
type: number
|
|
default: 2048
|
|
description: >
|
|
The maximum number of tokens the model will generate in a single
|
|
response.
|
|
stop:
|
|
type: arrays
|
|
example:
|
|
- hello
|
|
description: >
|
|
Defines specific tokens or phrases at which the model will stop
|
|
generating further output.
|
|
frequency_penalty:
|
|
type: number
|
|
default: 0
|
|
description: >
|
|
Adjusts the likelihood of the model repeating words or phrases in
|
|
its output.
|
|
presence_penalty:
|
|
type: number
|
|
default: 0
|
|
description: >
|
|
Influences the generation of new and varied concepts in the model's
|
|
output.
|
|
temperature:
|
|
type: number
|
|
default: 0.7
|
|
min: 0
|
|
max: 1
|
|
description: |
|
|
Controls the randomness of the model's output.
|
|
top_p:
|
|
type: number
|
|
default: 0.95
|
|
min: 0
|
|
max: 1
|
|
description: |
|
|
Set probability threshold for more relevant outputs.
|
|
ChatCompletionResponse:
|
|
type: object
|
|
description: Description of the response structure
|
|
properties:
|
|
choices:
|
|
type: array
|
|
description: Array of choice objects
|
|
items:
|
|
type: object
|
|
properties:
|
|
finish_reason:
|
|
type: string
|
|
nullable: true
|
|
example: null
|
|
description: Reason for finishing the response, if applicable
|
|
index:
|
|
type: integer
|
|
example: 0
|
|
description: Index of the choice
|
|
message:
|
|
type: object
|
|
properties:
|
|
content:
|
|
type: string
|
|
example: Hello user. What can I help you with?
|
|
description: Content of the message
|
|
role:
|
|
type: string
|
|
example: assistant
|
|
description: Role of the sender
|
|
created:
|
|
type: integer
|
|
example: 1700193928
|
|
description: Timestamp of when the response was created
|
|
id:
|
|
type: string
|
|
example: ebwd2niJvJB1Q2Whyvkz
|
|
description: Unique identifier of the response
|
|
model:
|
|
type: string
|
|
nullable: true
|
|
example: _
|
|
description: Model used for generating the response
|
|
object:
|
|
type: string
|
|
example: chat.completion
|
|
description: Type of the response object
|
|
system_fingerprint:
|
|
type: string
|
|
nullable: true
|
|
example: _
|
|
description: System fingerprint
|
|
usage:
|
|
type: object
|
|
description: Information about the usage of tokens
|
|
properties:
|
|
completion_tokens:
|
|
type: integer
|
|
example: 500
|
|
description: Number of tokens used for completion
|
|
prompt_tokens:
|
|
type: integer
|
|
example: 33
|
|
description: Number of tokens used in the prompt
|
|
total_tokens:
|
|
type: integer
|
|
example: 533
|
|
description: Total number of tokens used
|