docs: add ChatCompletion
This commit is contained in:
parent
a2a0c38144
commit
ed3548444c
@ -14,8 +14,10 @@ servers:
|
|||||||
tags:
|
tags:
|
||||||
- name: Models
|
- name: Models
|
||||||
description: List and describe the various models available in the API.
|
description: List and describe the various models available in the API.
|
||||||
- name: Chat Completion
|
- name: Chat
|
||||||
description: Given a list of messages comprising a conversation, the model will return a response.
|
description: |
|
||||||
|
Given a list of messages comprising a conversation, the model will return a response.
|
||||||
|
|
||||||
- name: Messages
|
- name: Messages
|
||||||
description: |
|
description: |
|
||||||
Messages capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats).
|
Messages capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats).
|
||||||
@ -38,13 +40,51 @@ x-tagGroups:
|
|||||||
- name: Endpoints
|
- name: Endpoints
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
- Chat Completion
|
- Chat
|
||||||
- name: Chat
|
- name: Chat
|
||||||
tags:
|
tags:
|
||||||
- Assistants
|
- Assistants
|
||||||
- Messages
|
- Messages
|
||||||
- Threads
|
- Threads
|
||||||
paths:
|
paths:
|
||||||
|
/chat/completions:
|
||||||
|
post:
|
||||||
|
operationId: createChatCompletion
|
||||||
|
tags:
|
||||||
|
- Chat
|
||||||
|
summary: Create chat completion
|
||||||
|
description: |
|
||||||
|
<a href = "https://platform.openai.com/docs/api-reference/chat/create"> <button style = "color: #388434"> OpenAI compatible </button></a>
|
||||||
|
and <span style = "color: #fec928"> Jan specified </span>
|
||||||
|
|
||||||
|
Creates a model response for the given chat conversation.
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "specs/chat.yaml#/components/schemas/ChatCompletionRequest"
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: OK
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "specs/chat.yaml#/components/schemas/ChatCompletionResponse"
|
||||||
|
x-codeSamples:
|
||||||
|
- lang: "curl"
|
||||||
|
source: |
|
||||||
|
curl -X POST 'http://localhost:3982/inferences/llamacpp/chat_completion' \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"llama_model_path": "/path/to/your/model.gguf",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "hello"
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
|
||||||
### MODELS
|
### MODELS
|
||||||
/models:
|
/models:
|
||||||
get:
|
get:
|
||||||
@ -54,10 +94,9 @@ paths:
|
|||||||
|
|
||||||
summary: List models
|
summary: List models
|
||||||
description: |
|
description: |
|
||||||
<a href = "https://platform.openai.com/docs/api-reference/models/list"> <button style = "color: #388434"> OpenAI Compatible </button></a>
|
<a href = "https://platform.openai.com/docs/api-reference/models/list"> <button style = "color: #388434"> OpenAI compatible </button></a>
|
||||||
|
|
||||||
Lists the currently available models, and provides basic information about each one such as the owner and availability.
|
Lists the currently available models, and provides basic information about each one such as the owner and availability.
|
||||||
|
|
||||||
responses:
|
responses:
|
||||||
"200":
|
"200":
|
||||||
description: OK
|
description: OK
|
||||||
@ -96,7 +135,7 @@ paths:
|
|||||||
- Models
|
- Models
|
||||||
summary: Retrieve model
|
summary: Retrieve model
|
||||||
description: |
|
description: |
|
||||||
<a href = "https://platform.openai.com/docs/api-reference/models/retrieve"> <button style = "color: #388434"> OpenAI Compatible </button></a>
|
<a href = "https://platform.openai.com/docs/api-reference/models/retrieve"> <button style = "color: #388434"> OpenAI compatible </button></a>
|
||||||
|
|
||||||
Get a model instance, providing basic information about the model such as the owner and permissioning.
|
Get a model instance, providing basic information about the model such as the owner and permissioning.
|
||||||
parameters:
|
parameters:
|
||||||
@ -128,7 +167,7 @@ paths:
|
|||||||
- Models
|
- Models
|
||||||
summary: Delete model
|
summary: Delete model
|
||||||
description: |
|
description: |
|
||||||
<a href = "https://platform.openai.com/docs/api-reference/models/delete"> <button style = "color: #388434"> OpenAI Compatible </button></a>
|
<a href = "https://platform.openai.com/docs/api-reference/models/delete"> <button style = "color: #388434"> OpenAI compatible </button></a>
|
||||||
|
|
||||||
Delete a model.
|
Delete a model.
|
||||||
parameters:
|
parameters:
|
||||||
@ -589,12 +628,12 @@ paths:
|
|||||||
'write:pets': modify pets in your account
|
'write:pets': modify pets in your account
|
||||||
'read:pets': read your pets
|
'read:pets': read your pets
|
||||||
description: |
|
description: |
|
||||||
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #388434"> OpenAI Compatible </button></a>
|
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #388434"> OpenAI compatible </button></a>
|
||||||
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #fec928"> Jan </button></a>
|
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #fec928"> Jan </button></a>
|
||||||
|
|
||||||
Returns a list of message files.
|
Returns a list of message files.
|
||||||
|
|
||||||
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #388434">OpenAI Compatible </button></a>
|
<a href = "https://platform.openai.com/docs/api-reference/messages/listMessages"> <button style = "color: #388434">OpenAI compatible </button></a>
|
||||||
|
|
||||||
|
|
||||||
parameters:
|
parameters:
|
||||||
@ -675,7 +714,7 @@ x-webhooks:
|
|||||||
post:
|
post:
|
||||||
summary: The model object
|
summary: The model object
|
||||||
description: |
|
description: |
|
||||||
<a href = "https://platform.openai.com/docs/api-reference/models/object"> <button style = "color: #388434"> OpenAI Compatible </button></a>
|
<a href = "https://platform.openai.com/docs/api-reference/models/object"> <button style = "color: #388434"> OpenAI compatible </button></a>
|
||||||
|
|
||||||
Describe a model offering that can be used with the API.
|
Describe a model offering that can be used with the API.
|
||||||
|
|
||||||
|
|||||||
197
docs/openapi/specs/chat.yaml
Normal file
197
docs/openapi/specs/chat.yaml
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
components:
|
||||||
|
schemas:
|
||||||
|
ChatObject:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
messages:
|
||||||
|
type: arrays
|
||||||
|
description: |
|
||||||
|
Contains input data or prompts for the model to process
|
||||||
|
example:
|
||||||
|
[
|
||||||
|
{ "content": "Hello there :wave:", "role": "assistant" },
|
||||||
|
{ "content": "Can you write a long story", "role": "user" },
|
||||||
|
]
|
||||||
|
stream:
|
||||||
|
type: boolean
|
||||||
|
default: true
|
||||||
|
description: Enables continuous output generation, allowing for streaming of model responses
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
example: "gpt-3.5-turbo"
|
||||||
|
description: Specifies the model being used for inference or processing tasks
|
||||||
|
max_tokens:
|
||||||
|
type: number
|
||||||
|
default: 2048
|
||||||
|
description: The maximum number of tokens the model will generate in a single response
|
||||||
|
stop:
|
||||||
|
type: arrays
|
||||||
|
example: ["hello"]
|
||||||
|
description: Defines specific tokens or phrases at which the model will stop generating further output
|
||||||
|
frequency_penalty:
|
||||||
|
type: number
|
||||||
|
default: 0
|
||||||
|
description: Adjusts the likelihood of the model repeating words or phrases in its output
|
||||||
|
presence_penalty:
|
||||||
|
type: number
|
||||||
|
default: 0
|
||||||
|
description: Influences the generation of new and varied concepts in the model's output
|
||||||
|
temperature:
|
||||||
|
type: number
|
||||||
|
default: 0.7
|
||||||
|
min: 0
|
||||||
|
max: 1
|
||||||
|
description: Controls the randomness of the model's output
|
||||||
|
top_p:
|
||||||
|
type: number
|
||||||
|
default: 0.95
|
||||||
|
min: 0
|
||||||
|
max: 1
|
||||||
|
description: Set probability threshold for more relevant outputs
|
||||||
|
cache_prompt:
|
||||||
|
type: boolean
|
||||||
|
default: true
|
||||||
|
description: Optimize performance in repeated or similar requests.
|
||||||
|
ChatCompletionRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
messages:
|
||||||
|
type: arrays
|
||||||
|
description: |
|
||||||
|
Contains input data or prompts for the model to process
|
||||||
|
|
||||||
|
<span style="color:#388434">OpenAI compatible</span>
|
||||||
|
example:
|
||||||
|
[
|
||||||
|
{ "content": "Hello there :wave:", "role": "assistant" },
|
||||||
|
{ "content": "Can you write a long story", "role": "user" },
|
||||||
|
]
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
example: model-zephyr-7B
|
||||||
|
description: |
|
||||||
|
Specifies the model being used for inference or processing tasks
|
||||||
|
|
||||||
|
<span style="color:#388434">OpenAI compatible</span>
|
||||||
|
stream:
|
||||||
|
type: boolean
|
||||||
|
default: true
|
||||||
|
description: |
|
||||||
|
Enables continuous output generation, allowing for streaming of model responses
|
||||||
|
|
||||||
|
<span style="color:#388434">OpenAI compatible</span>
|
||||||
|
max_tokens:
|
||||||
|
type: number
|
||||||
|
default: 2048
|
||||||
|
description: |
|
||||||
|
The maximum number of tokens the model will generate in a single response
|
||||||
|
|
||||||
|
<span style="color:#388434">OpenAI compatible</span>
|
||||||
|
stop:
|
||||||
|
type: arrays
|
||||||
|
example: ["hello"]
|
||||||
|
description: |
|
||||||
|
Defines specific tokens or phrases at which the model will stop generating further output
|
||||||
|
|
||||||
|
<span style="color:#388434">OpenAI compatible</span>
|
||||||
|
frequency_penalty:
|
||||||
|
type: number
|
||||||
|
default: 0
|
||||||
|
description: |
|
||||||
|
Adjusts the likelihood of the model repeating words or phrases in its output
|
||||||
|
|
||||||
|
<span style="color:#388434">OpenAI compatible</span>
|
||||||
|
presence_penalty:
|
||||||
|
type: number
|
||||||
|
default: 0
|
||||||
|
description: |
|
||||||
|
Influences the generation of new and varied concepts in the model's output
|
||||||
|
|
||||||
|
<span style="color:#388434">OpenAI compatible</span>
|
||||||
|
temperature:
|
||||||
|
type: number
|
||||||
|
default: 0.7
|
||||||
|
min: 0
|
||||||
|
max: 1
|
||||||
|
description: |
|
||||||
|
Controls the randomness of the model's output
|
||||||
|
|
||||||
|
<span style="color:#388434">OpenAI compatible</span>
|
||||||
|
top_p:
|
||||||
|
type: number
|
||||||
|
default: 0.95
|
||||||
|
min: 0
|
||||||
|
max: 1
|
||||||
|
description: |
|
||||||
|
Set probability threshold for more relevant outputs
|
||||||
|
|
||||||
|
<span style="color:#388434">OpenAI compatible</span>
|
||||||
|
|
||||||
|
ChatCompletionResponse:
|
||||||
|
type: object
|
||||||
|
description: Description of the response structure
|
||||||
|
properties:
|
||||||
|
choices:
|
||||||
|
type: array
|
||||||
|
description: Array of choice objects
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
finish_reason:
|
||||||
|
type: string
|
||||||
|
nullable: true
|
||||||
|
example: null
|
||||||
|
description: Reason for finishing the response, if applicable
|
||||||
|
index:
|
||||||
|
type: integer
|
||||||
|
example: 0
|
||||||
|
description: Index of the choice
|
||||||
|
message:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
content:
|
||||||
|
type: string
|
||||||
|
example: "Hello user. What can I help you with?"
|
||||||
|
description: Content of the message
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
example: assistant
|
||||||
|
description: Role of the sender
|
||||||
|
created:
|
||||||
|
type: integer
|
||||||
|
example: 1700193928
|
||||||
|
description: Timestamp of when the response was created
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
example: ebwd2niJvJB1Q2Whyvkz
|
||||||
|
description: Unique identifier of the response
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
nullable: true
|
||||||
|
example: _
|
||||||
|
description: Model used for generating the response
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
example: chat.completion
|
||||||
|
description: Type of the response object
|
||||||
|
system_fingerprint:
|
||||||
|
type: string
|
||||||
|
nullable: true
|
||||||
|
example: _
|
||||||
|
description: System fingerprint
|
||||||
|
usage:
|
||||||
|
type: object
|
||||||
|
description: Information about the usage of tokens
|
||||||
|
properties:
|
||||||
|
completion_tokens:
|
||||||
|
type: integer
|
||||||
|
example: 500
|
||||||
|
description: Number of tokens used for completion
|
||||||
|
prompt_tokens:
|
||||||
|
type: integer
|
||||||
|
example: 33
|
||||||
|
description: Number of tokens used in the prompt
|
||||||
|
total_tokens:
|
||||||
|
type: integer
|
||||||
|
example: 533
|
||||||
|
description: Total number of tokens used
|
||||||
Loading…
x
Reference in New Issue
Block a user