docs: update API Reference page

2024-01-24 01:40:08 +09:00 · 2024-01-24 01:40:08 +09:00 · 0afdee4a98
commit 0afdee4a98
parent fe1baed116
2 changed files with 150 additions and 275 deletions
--- a/docs/openapi/jan.yaml
+++ b/docs/openapi/jan.yaml
@ -67,21 +67,32 @@ paths:
      x-codeSamples:
        - lang: cURL
          source: |
-            curl http://localhost:1337/v1/chat/completions \
+            curl -X 'POST' \
-              -H "Content-Type: application/json" \
+              'http://127.0.0.1:1337/v1/chat/completions' \
              -H 'accept: application/json' \
              -H 'Content-Type: application/json' \
              -d '{
-                "model": "tinyllama-1.1b",
+              "messages": [
-                "messages": [
+                {
-                  {
+                  "content": "You are a helpful assistant.",
-                    "role": "system",
+                  "role": "system"
-                    "content": "You are a helpful assistant."
+                },
-                  },
+                {
-                  {
+                  "content": "Hello!",
-                    "role": "user",
+                  "role": "user"
-                    "content": "Hello!"
+                }
-                  }
+              ],
-                ]
+              "model": "tinyllama-1.1b",
-              }'
+              "stream": true,
              "max_tokens": 2048,
              "stop": [
                "hello"
              ],
              "frequency_penalty": 0,
              "presence_penalty": 0,
              "temperature": 0.7,
              "top_p": 0.95
            }'
  /models:
    get:
      operationId: listModels
@ -103,7 +114,9 @@ paths:
      x-codeSamples:
        - lang: cURL
          source: |
-            curl http://localhost:1337/v1/models
+            curl -X 'GET' \
              'http://127.0.0.1:1337/v1/models' \
              -H 'accept: application/json'
  "/models/download/{model_id}":
    get:
      operationId: downloadModel
@ -131,7 +144,9 @@ paths:
      x-codeSamples:
        - lang: cURL
          source: |
-            curl -X POST http://localhost:1337/v1/models/download/{model_id}
+            curl -X 'GET' \
              'http://127.0.0.1:1337/v1/models/download/{model_id}' \
              -H 'accept: application/json'
  "/models/{model_id}":
    get:
      operationId: retrieveModel
@ -162,7 +177,9 @@ paths:
      x-codeSamples:
        - lang: cURL
          source: |
-            curl http://localhost:1337/v1/models/{model_id}
+            curl -X 'GET' \
              'http://127.0.0.1:1337/v1/models/{model_id}' \
              -H 'accept: application/json'
    delete:
      operationId: deleteModel
      tags:
@ -191,7 +208,9 @@ paths:
      x-codeSamples:
        - lang: cURL
          source: |
-            curl -X DELETE http://localhost:1337/v1/models/{model_id}
+            curl -X 'DELETE' \
              'http://127.0.0.1:1337/v1/models/{model_id}' \
              -H 'accept: application/json'
  /threads:
    post:
      operationId: createThread
--- a/docs/openapi/specs/models.yaml
+++ b/docs/openapi/specs/models.yaml
@ -18,106 +18,77 @@ components:
    Model:
      type: object
      properties:
        type:
          type: string
          default: model
          description: The type of the object.
        version:
          type: string
          default: "1"
          description: The version number of the model.
        id:
          type: string
          description: Unique identifier used in chat-completions model_name, matches
            folder name.
          example: zephyr-7b
        name:
          type: string
          description: Name of the model.
          example: Zephyr 7B
        owned_by:
          type: string
          description: Compatibility field for OpenAI.
          default: ""
        created:
          type: integer
          format: int64
          description: Unix timestamp representing the creation time.
        description:
          type: string
          description: Description of the model.
        state:
          type: string
          enum:
            - null
            - downloading
            - ready
            - starting
            - stopping
          description: Current state of the model.
        format:
          type: string
          description: State format of the model, distinct from the engine.
          example: ggufv3
        source_url:
          type: string
          format: uri
          description: URL to the source of the model.
-          example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
+          example: https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf
        id:
          type: string
          description:
            Unique identifier used in chat-completions model_name, matches
            folder name.
          example: trinity-v1.2-7b
        object:
          type: string
          example: model
        name:
          type: string
          description: Name of the model.
          example: Trinity-v1.2 7B Q4
        version:
          type: string
          default: "1.0"
          description: The version number of the model.
        description:
          type: string
          description: Description of the model.
          example: Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.
        format:
          type: string
          description: State format of the model, distinct from the engine.
          example: gguf
        settings:
          type: object
          properties:
            ctx_len:
-              type: string
+              type: integer
              description: Context length.
-              example: "2048"
+              example: 4096
-            ngl:
+            prompt_template:
              type: string
-              description: Number of layers.
+              example: "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
              example: "100"
            embedding:
              type: string
              description: Indicates if embedding is enabled.
              example: "true"
            n_parallel:
              type: string
              description: Number of parallel processes.
              example: "4"
          additionalProperties: false
        parameters:
          type: object
          properties:
            temperature:
-              type: string
+              example: 0.7
              description: Temperature setting for the model.
              example: "0.7"
            token_limit:
              type: string
              description: Token limit for the model.
              example: "2048"
            top_k:
              type: string
              description: Top-k setting for the model.
              example: "0"
            top_p:
-              type: string
+              example: 0.95
              description: Top-p setting for the model.
              example: "1"
            stream:
-              type: string
+              example: true
-              description: Indicates if streaming is enabled.
+            max_tokens:
-              example: "true"
+              example: 4096
            stop:
              example: []
            frequency_penalty:
              example: 0
            presence_penalty:
              example: 0
          additionalProperties: false
        metadata:
-          type: object
+          author:
          description: Additional metadata.
        assets:
          type: array
          items:
            type: string
-          description: List of assets related to the model.
+            example: Jan
-      required:
+          tags:
-        - source_url
+            example: ["7B", "Merged", "Featured"]
          size:
            example: 4370000000,
          cover:
            example: "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png"
        engine:
          example: nitro
    ModelObject:
      type: object
      properties:
@ -125,7 +96,7 @@ components:
          type: string
          description: |
            The identifier of the model.
-          example: zephyr-7b
+          example: ztrinity-v1.2-7b
        object:
          type: string
          description: |
@ -145,197 +116,82 @@ components:
    GetModelResponse:
      type: object
      properties:
        id:
          type: string
          description: The identifier of the model.
          example: zephyr-7b
        object:
          type: string
          description: Type of the object, indicating it's a model.
          default: model
        created:
          type: integer
          format: int64
          description: Unix timestamp representing the creation time of the model.
        owned_by:
          type: string
          description: The entity that owns the model.
          example: _
        state:
          type: string
          enum:
            - not_downloaded
            - downloaded
            - running
            - stopped
          description: The current state of the model.
        source_url:
          type: string
          format: uri
          description: URL to the source of the model.
-          example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
+          example: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf
-        engine_parameters:
+        id:
-          type: object
+          type: string
-          properties:
+          description:
-            pre_prompt:
+            Unique identifier used in chat-completions model_name, matches
-              type: string
+            folder name.
-              description: Predefined prompt used for setting up internal configurations.
+          example: mistral-ins-7b-q4
-              default: ""
+        object:
-              example: Initial setup complete.
+          type: string
-            system_prompt:
+          example: model
-              type: string
+        name:
-              description: Prefix used for system-level prompts.
+          type: string
-              default: "SYSTEM: "
+          description: Name of the model.
-            user_prompt:
+          example: Mistral Instruct 7B Q4
-              type: string
+        version:
-              description: Prefix used for user prompts.
+          type: string
-              default: "USER: "
+          default: "1.0"
-            ai_prompt:
+          description: The version number of the model.
-              type: string
+        description:
-              description: Prefix used for assistant prompts.
+          type: string
-              default: "ASSISTANT: "
+          description: Description of the model.
-            ngl:
+          example: Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.
-              type: integer
+        format:
-              description: Number of neural network layers loaded onto the GPU for
+          type: string
-                acceleration.
+          description: State format of the model, distinct from the engine.
-              minimum: 0
+          example: gguf
-              maximum: 100
+        settings:
              default: 100
              example: 100
            ctx_len:
              type: integer
              description: Context length for model operations, varies based on the specific
                model.
              minimum: 128
              maximum: 4096
              default: 2048
              example: 2048
            n_parallel:
              type: integer
              description: Number of parallel operations, relevant when continuous batching is
                enabled.
              minimum: 1
              maximum: 10
              default: 1
              example: 4
            cont_batching:
              type: boolean
              description: Indicates if continuous batching is used for processing.
              default: false
              example: false
            cpu_threads:
              type: integer
              description: Number of threads allocated for CPU-based inference.
              minimum: 1
              example: 8
            embedding:
              type: boolean
              description: Indicates if embedding layers are enabled in the model.
              default: true
              example: true
        model_parameters:
          type: object
          properties:
            ctx_len:
              type: integer
-              description: Maximum context length the model can handle.
+              description: Context length.
-              minimum: 0
+              example: 4096
-              maximum: 4096
+            prompt_template:
-              default: 2048
+              type: string
-              example: 2048
+              example: "[INST] {prompt} [/INST]"
-            ngl:
+          additionalProperties: false
-              type: integer
+        parameters:
-              description: Number of layers in the neural network.
+          type: object
-              minimum: 1
+          properties:
              maximum: 100
              default: 100
              example: 100
            embedding:
              type: boolean
              description: Indicates if embedding layers are used.
              default: true
              example: true
            n_parallel:
              type: integer
              description: Number of parallel processes the model can run.
              minimum: 1
              maximum: 10
              default: 1
              example: 4
            temperature:
              type: number
              description: Controls randomness in model's responses. Higher values lead to
                more random responses.
              minimum: 0
              maximum: 2
              default: 0.7
              example: 0.7
            token_limit:
              type: integer
              description: Maximum number of tokens the model can generate in a single
                response.
              minimum: 1
              maximum: 4096
              default: 2048
              example: 2048
            top_k:
              type: integer
              description: Limits the model to consider only the top k most likely next tokens
                at each step.
              minimum: 0
              maximum: 100
              default: 0
              example: 0
            top_p:
-              type: number
+              example: 0.95
-              description: Nucleus sampling parameter. The model considers the smallest set of
+            stream:
-                tokens whose cumulative probability exceeds the top_p value.
+              example: true
-              minimum: 0
+            max_tokens:
-              maximum: 1
+              example: 4096
-              default: 1
+            stop:
-              example: 1
+              example: []
            frequency_penalty:
              example: 0
            presence_penalty:
              example: 0
          additionalProperties: false
        metadata:
-          type: object
+          author:
-          properties:
+            type: string
-            engine:
+            example: MistralAI
-              type: string
+          tags:
-              description: The engine used by the model.
+            example: ["7B", "Featured", "Foundation Model"]
-              enum:
+          size:
-                - nitro
+            example: 4370000000,
-                - openai
+          cover:
-                - hf_inference
+            example: "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png"
-            quantization:
+        engine:
-              type: string
+          example: nitro
              description: Quantization parameter of the model.
              example: Q3_K_L
            size:
              type: string
              description: Size of the model.
              example: 7B
      required:
        - id
        - object
        - created
        - owned_by
        - state
        - source_url
        - parameters
        - metadata
    DeleteModelResponse:
      type: object
      properties:
-        id:
+        message:
-          type: string
+          example: Not found
          description: The identifier of the model that was deleted.
          example: model-zephyr-7B
        object:
          type: string
          description: Type of the object, indicating it's a model.
          default: model
        deleted:
          type: boolean
          description: Indicates whether the model was successfully deleted.
          example: true
    StartModelResponse:
      type: object
      properties: