docs: add ChatCompletion

2023-11-28 06:51:48 +09:00 · 2023-11-28 06:51:48 +09:00 · ed3548444c
commit ed3548444c
parent a2a0c38144
2 changed files with 247 additions and 11 deletions
--- a/docs/openapi/jan.yaml
+++ b/docs/openapi/jan.yaml
@ -14,8 +14,10 @@ servers:
 tags:
  - name: Models
    description: List and describe the various models available in the API.
-  - name: Chat Completion
-    description: Given a list of messages comprising a conversation, the model will return a response.
+  - name: Chat 
+    description: |
+      Given a list of messages comprising a conversation, the model will return a response.
+
  - name: Messages
    description: |
      Messages capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats).
@ -38,13 +40,51 @@ x-tagGroups:
  - name: Endpoints
    tags:
      - Models
-      - Chat Completion
+      - Chat
  - name: Chat
    tags:
      - Assistants
      - Messages
      - Threads
 paths:
+  /chat/completions:
+    post:
+      operationId: createChatCompletion
+      tags:
+        - Chat
+      summary: Create chat completion
+      description: |
+        <a  href = "https://platform.openai.com/docs/api-reference/chat/create">  <button style = "color: #388434"> OpenAI compatible </button></a>
+        and <span style = "color: #fec928">  Jan specified </span>
+        
+        Creates a model response for the given chat conversation.
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "specs/chat.yaml#/components/schemas/ChatCompletionRequest"
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: "specs/chat.yaml#/components/schemas/ChatCompletionResponse"
+      x-codeSamples:
+        - lang: "curl"
+          source: |
+            curl -X POST 'http://localhost:3982/inferences/llamacpp/chat_completion' \
+                  -H "Content-Type: application/json" \
+                  -d '{
+                    "llama_model_path": "/path/to/your/model.gguf",
+                    "messages": [
+                      {
+                        "role": "user",
+                        "content": "hello"
+                      },
+                    ]
+                  }'
+      
  ### MODELS
  /models:
    get:
@ -54,10 +94,9 @@ paths:

      summary: List models
      description: |
-        <a  href = "https://platform.openai.com/docs/api-reference/models/list">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/models/list">  <button style = "color: #388434"> OpenAI compatible </button></a>

        Lists the currently available models, and provides basic information about each one such as the owner and availability.
-        
      responses:
        "200":
          description: OK
@ -96,7 +135,7 @@ paths:
        - Models
      summary: Retrieve model
      description: |
-        <a  href = "https://platform.openai.com/docs/api-reference/models/retrieve">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/models/retrieve">  <button style = "color: #388434"> OpenAI compatible </button></a>
        
        Get a model instance, providing basic information about the model such as the owner and permissioning.
      parameters:
@ -128,7 +167,7 @@ paths:
        - Models
      summary: Delete model
      description: |
-        <a  href = "https://platform.openai.com/docs/api-reference/models/delete">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/models/delete">  <button style = "color: #388434"> OpenAI compatible </button></a>
        
        Delete a model.
      parameters:
@ -589,12 +628,12 @@ paths:
                'write:pets': modify pets in your account
                'read:pets': read your pets
      description: | 
-        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #388434"> OpenAI compatible </button></a>
        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #fec928"> Jan </button></a>

        Returns a list of message files.

-        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #388434">OpenAI Compatible    </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #388434">OpenAI compatible    </button></a>
        

      parameters:
@ -675,7 +714,7 @@ x-webhooks:
    post:
      summary: The model object
      description: |
-        <a  href = "https://platform.openai.com/docs/api-reference/models/object">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/models/object">  <button style = "color: #388434"> OpenAI compatible </button></a>
        
        Describe a model offering that can be used with the API.

--- a/docs/openapi/specs/chat.yaml
+++ b/docs/openapi/specs/chat.yaml
@ -0,0 +1,197 @@
+components:
+  schemas:
+    ChatObject:
+      type: object
+      properties:
+        messages:
+          type: arrays
+          description: |
+            Contains input data or prompts for the model to process
+          example:
+            [
+              { "content": "Hello there :wave:", "role": "assistant" },
+              { "content": "Can you write a long story", "role": "user" },
+            ]
+        stream:
+          type: boolean
+          default: true
+          description: Enables continuous output generation, allowing for streaming of model responses
+        model:
+          type: string
+          example: "gpt-3.5-turbo"
+          description: Specifies the model being used for inference or processing tasks
+        max_tokens:
+          type: number
+          default: 2048
+          description: The maximum number of tokens the model will generate in a single response
+        stop:
+          type: arrays
+          example: ["hello"]
+          description: Defines specific tokens or phrases at which the model will stop generating further output
+        frequency_penalty:
+          type: number
+          default: 0
+          description: Adjusts the likelihood of the model repeating words or phrases in its output
+        presence_penalty:
+          type: number
+          default: 0
+          description: Influences the generation of new and varied concepts in the model's output
+        temperature:
+          type: number
+          default: 0.7
+          min: 0
+          max: 1
+          description: Controls the randomness of the model's output
+        top_p:
+          type: number
+          default: 0.95
+          min: 0
+          max: 1
+          description: Set probability threshold for more relevant outputs
+        cache_prompt:
+          type: boolean
+          default: true
+          description: Optimize performance in repeated or similar requests.
+    ChatCompletionRequest:
+      type: object
+      properties:
+        messages:
+          type: arrays
+          description: |
+            Contains input data or prompts for the model to process
+
+            <span style="color:#388434">OpenAI compatible</span>
+          example:
+            [
+              { "content": "Hello there :wave:", "role": "assistant" },
+              { "content": "Can you write a long story", "role": "user" },
+            ]
+        model:
+          type: string
+          example: model-zephyr-7B
+          description: |
+            Specifies the model being used for inference or processing tasks
+
+            <span style="color:#388434">OpenAI compatible</span>
+        stream:
+          type: boolean
+          default: true
+          description: |
+            Enables continuous output generation, allowing for streaming of model responses
+
+            <span style="color:#388434">OpenAI compatible</span>
+        max_tokens:
+          type: number
+          default: 2048
+          description: |
+            The maximum number of tokens the model will generate in a single response
+
+            <span style="color:#388434">OpenAI compatible</span>
+        stop:
+          type: arrays
+          example: ["hello"]
+          description: |
+            Defines specific tokens or phrases at which the model will stop generating further output
+
+            <span style="color:#388434">OpenAI compatible</span>
+        frequency_penalty:
+          type: number
+          default: 0
+          description: |
+            Adjusts the likelihood of the model repeating words or phrases in its output
+
+            <span style="color:#388434">OpenAI compatible</span>
+        presence_penalty:
+          type: number
+          default: 0
+          description: |
+            Influences the generation of new and varied concepts in the model's output
+
+            <span style="color:#388434">OpenAI compatible</span>
+        temperature:
+          type: number
+          default: 0.7
+          min: 0
+          max: 1
+          description: |
+            Controls the randomness of the model's output
+
+            <span style="color:#388434">OpenAI compatible</span>
+        top_p:
+          type: number
+          default: 0.95
+          min: 0
+          max: 1
+          description: |
+            Set probability threshold for more relevant outputs
+
+            <span style="color:#388434">OpenAI compatible</span>
+
+    ChatCompletionResponse:
+      type: object
+      description: Description of the response structure
+      properties:
+        choices:
+          type: array
+          description: Array of choice objects
+          items:
+            type: object
+            properties:
+              finish_reason:
+                type: string
+                nullable: true
+                example: null
+                description: Reason for finishing the response, if applicable
+              index:
+                type: integer
+                example: 0
+                description: Index of the choice
+              message:
+                type: object
+                properties:
+                  content:
+                    type: string
+                    example: "Hello user. What can I help you with?"
+                    description: Content of the message
+                  role:
+                    type: string
+                    example: assistant
+                    description: Role of the sender
+        created:
+          type: integer
+          example: 1700193928
+          description: Timestamp of when the response was created
+        id:
+          type: string
+          example: ebwd2niJvJB1Q2Whyvkz
+          description: Unique identifier of the response
+        model:
+          type: string
+          nullable: true
+          example: _
+          description: Model used for generating the response
+        object:
+          type: string
+          example: chat.completion
+          description: Type of the response object
+        system_fingerprint:
+          type: string
+          nullable: true
+          example: _
+          description: System fingerprint
+        usage:
+          type: object
+          description: Information about the usage of tokens
+          properties:
+            completion_tokens:
+              type: integer
+              example: 500
+              description: Number of tokens used for completion
+            prompt_tokens:
+              type: integer
+              example: 33
+              description: Number of tokens used in the prompt
+            total_tokens:
+              type: integer
+              example: 533
+              description: Total number of tokens used