docs: add ChatCompletion
This commit is contained in:
parent
a2a0c38144
commit
ed3548444c
@ -14,8 +14,10 @@ servers:
|
||||
tags:
|
||||
- name: Models
|
||||
description: List and describe the various models available in the API.
|
||||
- name: Chat Completion
|
||||
description: Given a list of messages comprising a conversation, the model will return a response.
|
||||
- name: Chat
|
||||
description: |
|
||||
Given a list of messages comprising a conversation, the model will return a response.
|
||||
|
||||
- name: Messages
|
||||
description: |
|
||||
Messages capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats).
|
||||
@ -38,13 +40,51 @@ x-tagGroups:
|
||||
- name: Endpoints
|
||||
tags:
|
||||
- Models
|
||||
- Chat Completion
|
||||
- Chat
|
||||
- name: Chat
|
||||
tags:
|
||||
- Assistants
|
||||
- Messages
|
||||
- Threads
|
||||
paths:
|
||||
/chat/completions:
|
||||
post:
|
||||
operationId: createChatCompletion
|
||||
tags:
|
||||
- Chat
|
||||
summary: Create chat completion
|
||||
description: |
|
||||
<a href = "https://platform.openai.com/docs/api-reference/chat/create"> <button style = "color: #388434"> OpenAI compatible </button></a>
|
||||
and <span style = "color: #fec928"> Jan specified </span>
|
||||
|
||||
Creates a model response for the given chat conversation.
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "specs/chat.yaml#/components/schemas/ChatCompletionRequest"
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "specs/chat.yaml#/components/schemas/ChatCompletionResponse"
|
||||
x-codeSamples:
|
||||
- lang: "curl"
|
||||
source: |
|
||||
curl -X POST 'http://localhost:3982/inferences/llamacpp/chat_completion' \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"llama_model_path": "/path/to/your/model.gguf",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hello"
|
||||
},
|
||||
]
|
||||
}'
|
||||
|
||||
### MODELS
|
||||
/models:
|
||||
get:
|
||||
@ -54,10 +94,9 @@ paths:
|
||||
|
||||
summary: List models
|
||||
description: |
|
||||
<a href = "https://platform.openai.com/docs/api-reference/models/list"> <button style = "color: #388434"> OpenAI Compatible </button></a>
|
||||
<a href = "https://platform.openai.com/docs/api-reference/models/list"> <button style = "color: #388434"> OpenAI compatible </button></a>
|
||||
|
||||
Lists the currently available models, and provides basic information about each one such as the owner and availability.
|
||||
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
@ -96,7 +135,7 @@ paths:
|
||||
- Models
|
||||
summary: Retrieve model
|
||||
description: |
|
||||
<a href = "https://platform.openai.com/docs/api-reference/models/retrieve"> <button style = "color: #388434"> OpenAI Compatible </button></a>
|
||||
<a href = "https://platform.openai.com/docs/api-reference/models/retrieve"> <button style = "color: #388434"> OpenAI compatible </button></a>
|
||||
|
||||
Get a model instance, providing basic information about the model such as the owner and permissioning.
|
||||
parameters:
|
||||
@ -128,7 +167,7 @@ paths:
|
||||
- Models
|
||||
summary: Delete model
|
||||
description: |
|
||||
<a href = "https://platform.openai.com/docs/api-reference/models/delete"> <button style = "color: #388434"> OpenAI Compatible </button></a>
|
||||
<a href = "https://platform.openai.com/docs/api-reference/models/delete"> <button style = "color: #388434"> OpenAI compatible </button></a>
|
||||
|
||||
Delete a model.
|
||||
parameters:
|
||||
@ -192,7 +231,7 @@ paths:
|
||||
summary: Stop model
|
||||
description: |
|
||||
<span style = "color: #fec928"> Jan </span>
|
||||
|
||||
|
||||
Stop an imported model.
|
||||
parameters:
|
||||
- in: path
|
||||
@ -589,12 +628,12 @@ paths:
|
||||
'write:pets': modify pets in your account
|
||||
'read:pets': read your pets
|
||||
description: |
|
||||
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #388434"> OpenAI Compatible </button></a>
|
||||
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #388434"> OpenAI compatible </button></a>
|
||||
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #fec928"> Jan </button></a>
|
||||
|
||||
Returns a list of message files.
|
||||
|
||||
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #388434">OpenAI Compatible </button></a>
|
||||
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #388434">OpenAI compatible </button></a>
|
||||
|
||||
|
||||
parameters:
|
||||
@ -675,7 +714,7 @@ x-webhooks:
|
||||
post:
|
||||
summary: The model object
|
||||
description: |
|
||||
<a href = "https://platform.openai.com/docs/api-reference/models/object"> <button style = "color: #388434"> OpenAI Compatible </button></a>
|
||||
<a href = "https://platform.openai.com/docs/api-reference/models/object"> <button style = "color: #388434"> OpenAI compatible </button></a>
|
||||
|
||||
Describe a model offering that can be used with the API.
|
||||
|
||||
|
||||
197
docs/openapi/specs/chat.yaml
Normal file
197
docs/openapi/specs/chat.yaml
Normal file
@ -0,0 +1,197 @@
|
||||
components:
|
||||
schemas:
|
||||
ChatObject:
|
||||
type: object
|
||||
properties:
|
||||
messages:
|
||||
type: arrays
|
||||
description: |
|
||||
Contains input data or prompts for the model to process
|
||||
example:
|
||||
[
|
||||
{ "content": "Hello there :wave:", "role": "assistant" },
|
||||
{ "content": "Can you write a long story", "role": "user" },
|
||||
]
|
||||
stream:
|
||||
type: boolean
|
||||
default: true
|
||||
description: Enables continuous output generation, allowing for streaming of model responses
|
||||
model:
|
||||
type: string
|
||||
example: "gpt-3.5-turbo"
|
||||
description: Specifies the model being used for inference or processing tasks
|
||||
max_tokens:
|
||||
type: number
|
||||
default: 2048
|
||||
description: The maximum number of tokens the model will generate in a single response
|
||||
stop:
|
||||
type: arrays
|
||||
example: ["hello"]
|
||||
description: Defines specific tokens or phrases at which the model will stop generating further output
|
||||
frequency_penalty:
|
||||
type: number
|
||||
default: 0
|
||||
description: Adjusts the likelihood of the model repeating words or phrases in its output
|
||||
presence_penalty:
|
||||
type: number
|
||||
default: 0
|
||||
description: Influences the generation of new and varied concepts in the model's output
|
||||
temperature:
|
||||
type: number
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
description: Controls the randomness of the model's output
|
||||
top_p:
|
||||
type: number
|
||||
default: 0.95
|
||||
min: 0
|
||||
max: 1
|
||||
description: Set probability threshold for more relevant outputs
|
||||
cache_prompt:
|
||||
type: boolean
|
||||
default: true
|
||||
description: Optimize performance in repeated or similar requests.
|
||||
ChatCompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
messages:
|
||||
type: arrays
|
||||
description: |
|
||||
Contains input data or prompts for the model to process
|
||||
|
||||
<span style="color:#388434">OpenAI compatible</span>
|
||||
example:
|
||||
[
|
||||
{ "content": "Hello there :wave:", "role": "assistant" },
|
||||
{ "content": "Can you write a long story", "role": "user" },
|
||||
]
|
||||
model:
|
||||
type: string
|
||||
example: model-zephyr-7B
|
||||
description: |
|
||||
Specifies the model being used for inference or processing tasks
|
||||
|
||||
<span style="color:#388434">OpenAI compatible</span>
|
||||
stream:
|
||||
type: boolean
|
||||
default: true
|
||||
description: |
|
||||
Enables continuous output generation, allowing for streaming of model responses
|
||||
|
||||
<span style="color:#388434">OpenAI compatible</span>
|
||||
max_tokens:
|
||||
type: number
|
||||
default: 2048
|
||||
description: |
|
||||
The maximum number of tokens the model will generate in a single response
|
||||
|
||||
<span style="color:#388434">OpenAI compatible</span>
|
||||
stop:
|
||||
type: arrays
|
||||
example: ["hello"]
|
||||
description: |
|
||||
Defines specific tokens or phrases at which the model will stop generating further output
|
||||
|
||||
<span style="color:#388434">OpenAI compatible</span>
|
||||
frequency_penalty:
|
||||
type: number
|
||||
default: 0
|
||||
description: |
|
||||
Adjusts the likelihood of the model repeating words or phrases in its output
|
||||
|
||||
<span style="color:#388434">OpenAI compatible</span>
|
||||
presence_penalty:
|
||||
type: number
|
||||
default: 0
|
||||
description: |
|
||||
Influences the generation of new and varied concepts in the model's output
|
||||
|
||||
<span style="color:#388434">OpenAI compatible</span>
|
||||
temperature:
|
||||
type: number
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
description: |
|
||||
Controls the randomness of the model's output
|
||||
|
||||
<span style="color:#388434">OpenAI compatible</span>
|
||||
top_p:
|
||||
type: number
|
||||
default: 0.95
|
||||
min: 0
|
||||
max: 1
|
||||
description: |
|
||||
Set probability threshold for more relevant outputs
|
||||
|
||||
<span style="color:#388434">OpenAI compatible</span>
|
||||
|
||||
ChatCompletionResponse:
|
||||
type: object
|
||||
description: Description of the response structure
|
||||
properties:
|
||||
choices:
|
||||
type: array
|
||||
description: Array of choice objects
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
finish_reason:
|
||||
type: string
|
||||
nullable: true
|
||||
example: null
|
||||
description: Reason for finishing the response, if applicable
|
||||
index:
|
||||
type: integer
|
||||
example: 0
|
||||
description: Index of the choice
|
||||
message:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: string
|
||||
example: "Hello user. What can I help you with?"
|
||||
description: Content of the message
|
||||
role:
|
||||
type: string
|
||||
example: assistant
|
||||
description: Role of the sender
|
||||
created:
|
||||
type: integer
|
||||
example: 1700193928
|
||||
description: Timestamp of when the response was created
|
||||
id:
|
||||
type: string
|
||||
example: ebwd2niJvJB1Q2Whyvkz
|
||||
description: Unique identifier of the response
|
||||
model:
|
||||
type: string
|
||||
nullable: true
|
||||
example: _
|
||||
description: Model used for generating the response
|
||||
object:
|
||||
type: string
|
||||
example: chat.completion
|
||||
description: Type of the response object
|
||||
system_fingerprint:
|
||||
type: string
|
||||
nullable: true
|
||||
example: _
|
||||
description: System fingerprint
|
||||
usage:
|
||||
type: object
|
||||
description: Information about the usage of tokens
|
||||
properties:
|
||||
completion_tokens:
|
||||
type: integer
|
||||
example: 500
|
||||
description: Number of tokens used for completion
|
||||
prompt_tokens:
|
||||
type: integer
|
||||
example: 33
|
||||
description: Number of tokens used in the prompt
|
||||
total_tokens:
|
||||
type: integer
|
||||
example: 533
|
||||
description: Total number of tokens used
|
||||
Loading…
x
Reference in New Issue
Block a user