From ed3548444c0fc0548a49ba99e9af2106f2f7d060 Mon Sep 17 00:00:00 2001
From: hieu-jan <150573299+hieu-jan@users.noreply.github.com>
Date: Tue, 28 Nov 2023 06:51:48 +0900
Subject: [PATCH] docs: add ChatCompletion
---
docs/openapi/jan.yaml | 61 +++++++++--
docs/openapi/specs/chat.yaml | 197 +++++++++++++++++++++++++++++++++++
2 files changed, 247 insertions(+), 11 deletions(-)
create mode 100644 docs/openapi/specs/chat.yaml
diff --git a/docs/openapi/jan.yaml b/docs/openapi/jan.yaml
index 2a04467de..2eb9b815b 100644
--- a/docs/openapi/jan.yaml
+++ b/docs/openapi/jan.yaml
@@ -14,8 +14,10 @@ servers:
tags:
- name: Models
description: List and describe the various models available in the API.
- - name: Chat Completion
- description: Given a list of messages comprising a conversation, the model will return a response.
+ - name: Chat
+ description: |
+ Given a list of messages comprising a conversation, the model will return a response.
+
- name: Messages
description: |
Messages capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats).
@@ -38,13 +40,51 @@ x-tagGroups:
- name: Endpoints
tags:
- Models
- - Chat Completion
+ - Chat
- name: Chat
tags:
- Assistants
- Messages
- Threads
paths:
+ /chat/completions:
+ post:
+ operationId: createChatCompletion
+ tags:
+ - Chat
+ summary: Create chat completion
+ description: |
+
+ and Jan specified
+
+ Creates a model response for the given chat conversation.
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: "specs/chat.yaml#/components/schemas/ChatCompletionRequest"
+ responses:
+ "200":
+ description: OK
+ content:
+ application/json:
+ schema:
+ $ref: "specs/chat.yaml#/components/schemas/ChatCompletionResponse"
+ x-codeSamples:
+ - lang: "curl"
+ source: |
+ curl -X POST 'http://localhost:3982/inferences/llamacpp/chat_completion' \
+ -H "Content-Type: application/json" \
+ -d '{
+ "llama_model_path": "/path/to/your/model.gguf",
+ "messages": [
+ {
+ "role": "user",
+ "content": "hello"
+ },
+ ]
+ }'
+
### MODELS
/models:
get:
@@ -54,10 +94,9 @@ paths:
summary: List models
description: |
-
+
Lists the currently available models, and provides basic information about each one such as the owner and availability.
-
responses:
"200":
description: OK
@@ -96,7 +135,7 @@ paths:
- Models
summary: Retrieve model
description: |
-
+
Get a model instance, providing basic information about the model such as the owner and permissioning.
parameters:
@@ -128,7 +167,7 @@ paths:
- Models
summary: Delete model
description: |
-
+
Delete a model.
parameters:
@@ -192,7 +231,7 @@ paths:
summary: Stop model
description: |
Jan
-
+
Stop an imported model.
parameters:
- in: path
@@ -589,12 +628,12 @@ paths:
'write:pets': modify pets in your account
'read:pets': read your pets
description: |
-
+
Returns a list of message files.
-
+
parameters:
@@ -675,7 +714,7 @@ x-webhooks:
post:
summary: The model object
description: |
-
+
Describe a model offering that can be used with the API.
diff --git a/docs/openapi/specs/chat.yaml b/docs/openapi/specs/chat.yaml
new file mode 100644
index 000000000..7aef0cfe0
--- /dev/null
+++ b/docs/openapi/specs/chat.yaml
@@ -0,0 +1,197 @@
+components:
+ schemas:
+ ChatObject:
+ type: object
+ properties:
+ messages:
+ type: arrays
+ description: |
+ Contains input data or prompts for the model to process
+ example:
+ [
+ { "content": "Hello there :wave:", "role": "assistant" },
+ { "content": "Can you write a long story", "role": "user" },
+ ]
+ stream:
+ type: boolean
+ default: true
+ description: Enables continuous output generation, allowing for streaming of model responses
+ model:
+ type: string
+ example: "gpt-3.5-turbo"
+ description: Specifies the model being used for inference or processing tasks
+ max_tokens:
+ type: number
+ default: 2048
+ description: The maximum number of tokens the model will generate in a single response
+ stop:
+ type: arrays
+ example: ["hello"]
+ description: Defines specific tokens or phrases at which the model will stop generating further output
+ frequency_penalty:
+ type: number
+ default: 0
+ description: Adjusts the likelihood of the model repeating words or phrases in its output
+ presence_penalty:
+ type: number
+ default: 0
+ description: Influences the generation of new and varied concepts in the model's output
+ temperature:
+ type: number
+ default: 0.7
+ min: 0
+ max: 1
+ description: Controls the randomness of the model's output
+ top_p:
+ type: number
+ default: 0.95
+ min: 0
+ max: 1
+ description: Set probability threshold for more relevant outputs
+ cache_prompt:
+ type: boolean
+ default: true
+ description: Optimize performance in repeated or similar requests.
+ ChatCompletionRequest:
+ type: object
+ properties:
+ messages:
+ type: arrays
+ description: |
+ Contains input data or prompts for the model to process
+
+ OpenAI compatible
+ example:
+ [
+ { "content": "Hello there :wave:", "role": "assistant" },
+ { "content": "Can you write a long story", "role": "user" },
+ ]
+ model:
+ type: string
+ example: model-zephyr-7B
+ description: |
+ Specifies the model being used for inference or processing tasks
+
+ OpenAI compatible
+ stream:
+ type: boolean
+ default: true
+ description: |
+ Enables continuous output generation, allowing for streaming of model responses
+
+ OpenAI compatible
+ max_tokens:
+ type: number
+ default: 2048
+ description: |
+ The maximum number of tokens the model will generate in a single response
+
+ OpenAI compatible
+ stop:
+ type: arrays
+ example: ["hello"]
+ description: |
+ Defines specific tokens or phrases at which the model will stop generating further output
+
+ OpenAI compatible
+ frequency_penalty:
+ type: number
+ default: 0
+ description: |
+ Adjusts the likelihood of the model repeating words or phrases in its output
+
+ OpenAI compatible
+ presence_penalty:
+ type: number
+ default: 0
+ description: |
+ Influences the generation of new and varied concepts in the model's output
+
+ OpenAI compatible
+ temperature:
+ type: number
+ default: 0.7
+ min: 0
+ max: 1
+ description: |
+ Controls the randomness of the model's output
+
+ OpenAI compatible
+ top_p:
+ type: number
+ default: 0.95
+ min: 0
+ max: 1
+ description: |
+ Set probability threshold for more relevant outputs
+
+ OpenAI compatible
+
+ ChatCompletionResponse:
+ type: object
+ description: Description of the response structure
+ properties:
+ choices:
+ type: array
+ description: Array of choice objects
+ items:
+ type: object
+ properties:
+ finish_reason:
+ type: string
+ nullable: true
+ example: null
+ description: Reason for finishing the response, if applicable
+ index:
+ type: integer
+ example: 0
+ description: Index of the choice
+ message:
+ type: object
+ properties:
+ content:
+ type: string
+ example: "Hello user. What can I help you with?"
+ description: Content of the message
+ role:
+ type: string
+ example: assistant
+ description: Role of the sender
+ created:
+ type: integer
+ example: 1700193928
+ description: Timestamp of when the response was created
+ id:
+ type: string
+ example: ebwd2niJvJB1Q2Whyvkz
+ description: Unique identifier of the response
+ model:
+ type: string
+ nullable: true
+ example: _
+ description: Model used for generating the response
+ object:
+ type: string
+ example: chat.completion
+ description: Type of the response object
+ system_fingerprint:
+ type: string
+ nullable: true
+ example: _
+ description: System fingerprint
+ usage:
+ type: object
+ description: Information about the usage of tokens
+ properties:
+ completion_tokens:
+ type: integer
+ example: 500
+ description: Number of tokens used for completion
+ prompt_tokens:
+ type: integer
+ example: 33
+ description: Number of tokens used in the prompt
+ total_tokens:
+ type: integer
+ example: 533
+ description: Total number of tokens used