docs: add ChatCompletion

2023-11-28 06:51:48 +09:00 · 2023-11-28 06:51:48 +09:00 · ed3548444c
commit ed3548444c
parent a2a0c38144
2 changed files with 247 additions and 11 deletions
--- a/docs/openapi/jan.yaml
+++ b/docs/openapi/jan.yaml
@ -14,8 +14,10 @@ servers:
 tags:
  - name: Models
    description: List and describe the various models available in the API.
-  - name: Chat Completion
+  - name: Chat 
-    description: Given a list of messages comprising a conversation, the model will return a response.
+    description: |
      Given a list of messages comprising a conversation, the model will return a response.
  - name: Messages
    description: |
      Messages capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats).
@ -38,13 +40,51 @@ x-tagGroups:
  - name: Endpoints
    tags:
      - Models
-      - Chat Completion
+      - Chat
  - name: Chat
    tags:
      - Assistants
      - Messages
      - Threads
 paths:
  /chat/completions:
    post:
      operationId: createChatCompletion
      tags:
        - Chat
      summary: Create chat completion
      description: |
        <a  href = "https://platform.openai.com/docs/api-reference/chat/create">  <button style = "color: #388434"> OpenAI compatible </button></a>
        and <span style = "color: #fec928">  Jan specified </span>
        Creates a model response for the given chat conversation.
      requestBody:
        content:
          application/json:
            schema:
              $ref: "specs/chat.yaml#/components/schemas/ChatCompletionRequest"
      responses:
        "200":
          description: OK
          content:
            application/json:
              schema:
                $ref: "specs/chat.yaml#/components/schemas/ChatCompletionResponse"
      x-codeSamples:
        - lang: "curl"
          source: |
            curl -X POST 'http://localhost:3982/inferences/llamacpp/chat_completion' \
                  -H "Content-Type: application/json" \
                  -d '{
                    "llama_model_path": "/path/to/your/model.gguf",
                    "messages": [
                      {
                        "role": "user",
                        "content": "hello"
                      },
                    ]
                  }'
  ### MODELS
  /models:
    get:
@ -54,10 +94,9 @@ paths:
      summary: List models
      description: |
-        <a  href = "https://platform.openai.com/docs/api-reference/models/list">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/models/list">  <button style = "color: #388434"> OpenAI compatible </button></a>
        Lists the currently available models, and provides basic information about each one such as the owner and availability.
      responses:
        "200":
          description: OK
@ -96,7 +135,7 @@ paths:
        - Models
      summary: Retrieve model
      description: |
-        <a  href = "https://platform.openai.com/docs/api-reference/models/retrieve">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/models/retrieve">  <button style = "color: #388434"> OpenAI compatible </button></a>
        Get a model instance, providing basic information about the model such as the owner and permissioning.
      parameters:
@ -128,7 +167,7 @@ paths:
        - Models
      summary: Delete model
      description: |
-        <a  href = "https://platform.openai.com/docs/api-reference/models/delete">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/models/delete">  <button style = "color: #388434"> OpenAI compatible </button></a>
        Delete a model.
      parameters:
@ -589,12 +628,12 @@ paths:
                'write:pets': modify pets in your account
                'read:pets': read your pets
      description: | 
-        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #388434"> OpenAI compatible </button></a>
        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #fec928"> Jan </button></a>
        Returns a list of message files.
-        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #388434">OpenAI Compatible    </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #388434">OpenAI compatible    </button></a>
      parameters:
@ -675,7 +714,7 @@ x-webhooks:
    post:
      summary: The model object
      description: |
-        <a  href = "https://platform.openai.com/docs/api-reference/models/object">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/models/object">  <button style = "color: #388434"> OpenAI compatible </button></a>
        Describe a model offering that can be used with the API.
--- a/docs/openapi/specs/chat.yaml
+++ b/docs/openapi/specs/chat.yaml
@ -0,0 +1,197 @@
 components:
  schemas:
    ChatObject:
      type: object
      properties:
        messages:
          type: arrays
          description: |
            Contains input data or prompts for the model to process
          example:
            [
              { "content": "Hello there :wave:", "role": "assistant" },
              { "content": "Can you write a long story", "role": "user" },
            ]
        stream:
          type: boolean
          default: true
          description: Enables continuous output generation, allowing for streaming of model responses
        model:
          type: string
          example: "gpt-3.5-turbo"
          description: Specifies the model being used for inference or processing tasks
        max_tokens:
          type: number
          default: 2048
          description: The maximum number of tokens the model will generate in a single response
        stop:
          type: arrays
          example: ["hello"]
          description: Defines specific tokens or phrases at which the model will stop generating further output
        frequency_penalty:
          type: number
          default: 0
          description: Adjusts the likelihood of the model repeating words or phrases in its output
        presence_penalty:
          type: number
          default: 0
          description: Influences the generation of new and varied concepts in the model's output
        temperature:
          type: number
          default: 0.7
          min: 0
          max: 1
          description: Controls the randomness of the model's output
        top_p:
          type: number
          default: 0.95
          min: 0
          max: 1
          description: Set probability threshold for more relevant outputs
        cache_prompt:
          type: boolean
          default: true
          description: Optimize performance in repeated or similar requests.
    ChatCompletionRequest:
      type: object
      properties:
        messages:
          type: arrays
          description: |
            Contains input data or prompts for the model to process
            <span style="color:#388434">OpenAI compatible</span>
          example:
            [
              { "content": "Hello there :wave:", "role": "assistant" },
              { "content": "Can you write a long story", "role": "user" },
            ]
        model:
          type: string
          example: model-zephyr-7B
          description: |
            Specifies the model being used for inference or processing tasks
            <span style="color:#388434">OpenAI compatible</span>
        stream:
          type: boolean
          default: true
          description: |
            Enables continuous output generation, allowing for streaming of model responses
            <span style="color:#388434">OpenAI compatible</span>
        max_tokens:
          type: number
          default: 2048
          description: |
            The maximum number of tokens the model will generate in a single response
            <span style="color:#388434">OpenAI compatible</span>
        stop:
          type: arrays
          example: ["hello"]
          description: |
            Defines specific tokens or phrases at which the model will stop generating further output
            <span style="color:#388434">OpenAI compatible</span>
        frequency_penalty:
          type: number
          default: 0
          description: |
            Adjusts the likelihood of the model repeating words or phrases in its output
            <span style="color:#388434">OpenAI compatible</span>
        presence_penalty:
          type: number
          default: 0
          description: |
            Influences the generation of new and varied concepts in the model's output
            <span style="color:#388434">OpenAI compatible</span>
        temperature:
          type: number
          default: 0.7
          min: 0
          max: 1
          description: |
            Controls the randomness of the model's output
            <span style="color:#388434">OpenAI compatible</span>
        top_p:
          type: number
          default: 0.95
          min: 0
          max: 1
          description: |
            Set probability threshold for more relevant outputs
            <span style="color:#388434">OpenAI compatible</span>
    ChatCompletionResponse:
      type: object
      description: Description of the response structure
      properties:
        choices:
          type: array
          description: Array of choice objects
          items:
            type: object
            properties:
              finish_reason:
                type: string
                nullable: true
                example: null
                description: Reason for finishing the response, if applicable
              index:
                type: integer
                example: 0
                description: Index of the choice
              message:
                type: object
                properties:
                  content:
                    type: string
                    example: "Hello user. What can I help you with?"
                    description: Content of the message
                  role:
                    type: string
                    example: assistant
                    description: Role of the sender
        created:
          type: integer
          example: 1700193928
          description: Timestamp of when the response was created
        id:
          type: string
          example: ebwd2niJvJB1Q2Whyvkz
          description: Unique identifier of the response
        model:
          type: string
          nullable: true
          example: _
          description: Model used for generating the response
        object:
          type: string
          example: chat.completion
          description: Type of the response object
        system_fingerprint:
          type: string
          nullable: true
          example: _
          description: System fingerprint
        usage:
          type: object
          description: Information about the usage of tokens
          properties:
            completion_tokens:
              type: integer
              example: 500
              description: Number of tokens used for completion
            prompt_tokens:
              type: integer
              example: 33
              description: Number of tokens used in the prompt
            total_tokens:
              type: integer
              example: 533
              description: Total number of tokens used