Merge pull request #840 from janhq/813-feat-json-schema-for-frontend-parameters

docs: add json schema for engine and model parameters
2023-12-08 16:25:41 +07:00 · 2023-12-08 16:25:41 +07:00 · ca0ad7d3b8
commit ca0ad7d3b8
parent a77e50b160 c664ab8811
1 changed files with 96 additions and 29 deletions
--- a/docs/openapi/specs/models.yaml
+++ b/docs/openapi/specs/models.yaml
@ -169,53 +169,120 @@ components:
          format: uri
          description: "URL to the source of the model."
          example: "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf"
-        parameters:
+        engine_parameters:
+          type: object
+          properties:
+            pre_prompt:
+              type: string
+              description: "Predefined prompt used for setting up internal configurations."
+              default: ""
+              example: "Initial setup complete."
+            system_prompt:
+              type: string
+              description: "Prefix used for system-level prompts."
+              default: "SYSTEM: "
+            user_prompt:
+              type: string
+              description: "Prefix used for user prompts."
+              default:  "USER: "
+            ai_prompt:
+              type: string
+              description: "Prefix used for assistant prompts."
+              default: "ASSISTANT: "
+            ngl:
+              type: integer
+              description: "Number of neural network layers loaded onto the GPU for acceleration."
+              minimum: 0
+              maximum: 100
+              default: 100
+              example: 100
+            ctx_len:
+              type: integer
+              description: "Context length for model operations, varies based on the specific model."
+              minimum: 128
+              maximum: 4096
+              default: 2048
+              example: 2048
+            n_parallel:
+              type: integer
+              description: "Number of parallel operations, relevant when continuous batching is enabled."
+              minimum: 1
+              maximum: 10
+              default: 1
+              example: 4
+            cont_batching:
+              type: boolean
+              description: "Indicates if continuous batching is used for processing."
+              default: false
+              example: false
+            cpu_threads:
+              type: integer
+              description: "Number of threads allocated for CPU-based inference."
+              minimum: 1
+              example: 8
+            embedding:
+              type: boolean
+              description: "Indicates if embedding layers are enabled in the model."
+              default: true
+              example: true
+        model_parameters:
          type: object
          properties:
            ctx_len:
              type: integer
-              description: "Context length."
+              description: "Maximum context length the model can handle."
+              minimum: 0
+              maximum: 4096
+              default: 2048
              example: 2048
            ngl:
              type: integer
-              description: "Number of layers."
+              description: "Number of layers in the neural network."
+              minimum: 1
+              maximum: 100
+              default: 100
              example: 100
            embedding:
              type: boolean
-              description: "Indicates if embedding is enabled."
+              description: "Indicates if embedding layers are used."
+              default: true
              example: true
            n_parallel:
              type: integer
-              description: "Number of parallel processes."
+              description: "Number of parallel processes the model can run."
+              minimum: 1
+              maximum: 10
+              default: 1
              example: 4
-            # pre_prompt:
-            #   type: string
-            #   description: "Predefined prompt for initiating the chat."
-            #   example: "A chat between a curious user and an artificial intelligence"
-            # user_prompt:
-            #   type: string
-            #   description: "Format of user's prompt."
-            #   example: "USER: "
-            # ai_prompt:
-            #   type: string
-            #   description: "Format of AI's response."
-            #   example: "ASSISTANT: "
            temperature:
-              type: string
-              description: "Temperature setting for the model."
-              example: "0.7"
+              type: number
+              description: "Controls randomness in model's responses. Higher values lead to more random responses."
+              minimum: 0.0
+              maximum: 2.0
+              default: 0.7
+              example: 0.7
            token_limit:
-              type: string
-              description: "Token limit for the model."
-              example: "2048"
+              type: integer
+              description: "Maximum number of tokens the model can generate in a single response."
+              minimum: 1
+              maximum: 4096
+              default: 2048
+              example: 2048
            top_k:
-              type: string
-              description: "Top-k setting for the model."
-              example: "0"
+              type: integer
+              description: "Limits the model to consider only the top k most likely next tokens at each step."
+              minimum: 0
+              maximum: 100
+              default: 0
+              example: 0
            top_p:
-              type: string
-              description: "Top-p setting for the model."
-              example: "1"
+              type: number
+              description: "Nucleus sampling parameter. The model considers the smallest set of tokens whose cumulative probability exceeds the top_p value."
+              minimum: 0.0
+              maximum: 1.0
+              default: 1.0
+              example: 1.0
+
        metadata:
          type: object
          properties: