docs: add ChatCompletion

This commit is contained in:
hieu-jan 2023-11-28 06:51:48 +09:00
parent a2a0c38144
commit ed3548444c
2 changed files with 247 additions and 11 deletions

View File

@ -14,8 +14,10 @@ servers:
tags: tags:
- name: Models - name: Models
description: List and describe the various models available in the API. description: List and describe the various models available in the API.
- name: Chat Completion - name: Chat
description: Given a list of messages comprising a conversation, the model will return a response. description: |
Given a list of messages comprising a conversation, the model will return a response.
- name: Messages - name: Messages
description: | description: |
Messages capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats). Messages capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats).
@ -38,13 +40,51 @@ x-tagGroups:
- name: Endpoints - name: Endpoints
tags: tags:
- Models - Models
- Chat Completion - Chat
- name: Chat - name: Chat
tags: tags:
- Assistants - Assistants
- Messages - Messages
- Threads - Threads
paths: paths:
/chat/completions:
post:
operationId: createChatCompletion
tags:
- Chat
summary: Create chat completion
description: |
<a href = "https://platform.openai.com/docs/api-reference/chat/create"> <button style = "color: #388434"> OpenAI compatible </button></a>
and <span style = "color: #fec928"> Jan specified </span>
Creates a model response for the given chat conversation.
requestBody:
content:
application/json:
schema:
$ref: "specs/chat.yaml#/components/schemas/ChatCompletionRequest"
responses:
"200":
description: OK
content:
application/json:
schema:
$ref: "specs/chat.yaml#/components/schemas/ChatCompletionResponse"
x-codeSamples:
- lang: "curl"
source: |
curl -X POST 'http://localhost:3982/inferences/llamacpp/chat_completion' \
-H "Content-Type: application/json" \
-d '{
"llama_model_path": "/path/to/your/model.gguf",
"messages": [
{
"role": "user",
"content": "hello"
},
]
}'
### MODELS ### MODELS
/models: /models:
get: get:
@ -54,10 +94,9 @@ paths:
summary: List models summary: List models
description: | description: |
<a href = "https://platform.openai.com/docs/api-reference/models/list"> <button style = "color: #388434"> OpenAI Compatible </button></a> <a href = "https://platform.openai.com/docs/api-reference/models/list"> <button style = "color: #388434"> OpenAI compatible </button></a>
Lists the currently available models, and provides basic information about each one such as the owner and availability. Lists the currently available models, and provides basic information about each one such as the owner and availability.
responses: responses:
"200": "200":
description: OK description: OK
@ -96,7 +135,7 @@ paths:
- Models - Models
summary: Retrieve model summary: Retrieve model
description: | description: |
<a href = "https://platform.openai.com/docs/api-reference/models/retrieve"> <button style = "color: #388434"> OpenAI Compatible </button></a> <a href = "https://platform.openai.com/docs/api-reference/models/retrieve"> <button style = "color: #388434"> OpenAI compatible </button></a>
Get a model instance, providing basic information about the model such as the owner and permissioning. Get a model instance, providing basic information about the model such as the owner and permissioning.
parameters: parameters:
@ -128,7 +167,7 @@ paths:
- Models - Models
summary: Delete model summary: Delete model
description: | description: |
<a href = "https://platform.openai.com/docs/api-reference/models/delete"> <button style = "color: #388434"> OpenAI Compatible </button></a> <a href = "https://platform.openai.com/docs/api-reference/models/delete"> <button style = "color: #388434"> OpenAI compatible </button></a>
Delete a model. Delete a model.
parameters: parameters:
@ -589,12 +628,12 @@ paths:
'write:pets': modify pets in your account 'write:pets': modify pets in your account
'read:pets': read your pets 'read:pets': read your pets
description: | description: |
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #388434"> OpenAI Compatible </button></a> <a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #388434"> OpenAI compatible </button></a>
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #fec928"> Jan </button></a> <a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #fec928"> Jan </button></a>
Returns a list of message files. Returns a list of message files.
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #388434">OpenAI Compatible </button></a> <a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #388434">OpenAI compatible </button></a>
parameters: parameters:
@ -675,7 +714,7 @@ x-webhooks:
post: post:
summary: The model object summary: The model object
description: | description: |
<a href = "https://platform.openai.com/docs/api-reference/models/object"> <button style = "color: #388434"> OpenAI Compatible </button></a> <a href = "https://platform.openai.com/docs/api-reference/models/object"> <button style = "color: #388434"> OpenAI compatible </button></a>
Describe a model offering that can be used with the API. Describe a model offering that can be used with the API.

View File

@ -0,0 +1,197 @@
components:
schemas:
ChatObject:
type: object
properties:
messages:
type: arrays
description: |
Contains input data or prompts for the model to process
example:
[
{ "content": "Hello there :wave:", "role": "assistant" },
{ "content": "Can you write a long story", "role": "user" },
]
stream:
type: boolean
default: true
description: Enables continuous output generation, allowing for streaming of model responses
model:
type: string
example: "gpt-3.5-turbo"
description: Specifies the model being used for inference or processing tasks
max_tokens:
type: number
default: 2048
description: The maximum number of tokens the model will generate in a single response
stop:
type: arrays
example: ["hello"]
description: Defines specific tokens or phrases at which the model will stop generating further output
frequency_penalty:
type: number
default: 0
description: Adjusts the likelihood of the model repeating words or phrases in its output
presence_penalty:
type: number
default: 0
description: Influences the generation of new and varied concepts in the model's output
temperature:
type: number
default: 0.7
min: 0
max: 1
description: Controls the randomness of the model's output
top_p:
type: number
default: 0.95
min: 0
max: 1
description: Set probability threshold for more relevant outputs
cache_prompt:
type: boolean
default: true
description: Optimize performance in repeated or similar requests.
ChatCompletionRequest:
type: object
properties:
messages:
type: arrays
description: |
Contains input data or prompts for the model to process
<span style="color:#388434">OpenAI compatible</span>
example:
[
{ "content": "Hello there :wave:", "role": "assistant" },
{ "content": "Can you write a long story", "role": "user" },
]
model:
type: string
example: model-zephyr-7B
description: |
Specifies the model being used for inference or processing tasks
<span style="color:#388434">OpenAI compatible</span>
stream:
type: boolean
default: true
description: |
Enables continuous output generation, allowing for streaming of model responses
<span style="color:#388434">OpenAI compatible</span>
max_tokens:
type: number
default: 2048
description: |
The maximum number of tokens the model will generate in a single response
<span style="color:#388434">OpenAI compatible</span>
stop:
type: arrays
example: ["hello"]
description: |
Defines specific tokens or phrases at which the model will stop generating further output
<span style="color:#388434">OpenAI compatible</span>
frequency_penalty:
type: number
default: 0
description: |
Adjusts the likelihood of the model repeating words or phrases in its output
<span style="color:#388434">OpenAI compatible</span>
presence_penalty:
type: number
default: 0
description: |
Influences the generation of new and varied concepts in the model's output
<span style="color:#388434">OpenAI compatible</span>
temperature:
type: number
default: 0.7
min: 0
max: 1
description: |
Controls the randomness of the model's output
<span style="color:#388434">OpenAI compatible</span>
top_p:
type: number
default: 0.95
min: 0
max: 1
description: |
Set probability threshold for more relevant outputs
<span style="color:#388434">OpenAI compatible</span>
ChatCompletionResponse:
type: object
description: Description of the response structure
properties:
choices:
type: array
description: Array of choice objects
items:
type: object
properties:
finish_reason:
type: string
nullable: true
example: null
description: Reason for finishing the response, if applicable
index:
type: integer
example: 0
description: Index of the choice
message:
type: object
properties:
content:
type: string
example: "Hello user. What can I help you with?"
description: Content of the message
role:
type: string
example: assistant
description: Role of the sender
created:
type: integer
example: 1700193928
description: Timestamp of when the response was created
id:
type: string
example: ebwd2niJvJB1Q2Whyvkz
description: Unique identifier of the response
model:
type: string
nullable: true
example: _
description: Model used for generating the response
object:
type: string
example: chat.completion
description: Type of the response object
system_fingerprint:
type: string
nullable: true
example: _
description: System fingerprint
usage:
type: object
description: Information about the usage of tokens
properties:
completion_tokens:
type: integer
example: 500
description: Number of tokens used for completion
prompt_tokens:
type: integer
example: 33
description: Number of tokens used in the prompt
total_tokens:
type: integer
example: 533
description: Total number of tokens used