diff --git a/docs/openapi/jan.yaml b/docs/openapi/jan.yaml
index bfff0ad73..9981f7308 100644
--- a/docs/openapi/jan.yaml
+++ b/docs/openapi/jan.yaml
@@ -67,21 +67,32 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl http://localhost:1337/v1/chat/completions \
-              -H "Content-Type: application/json" \
+            curl -X 'POST' \
+              'http://127.0.0.1:1337/v1/chat/completions' \
+              -H 'accept: application/json' \
+              -H 'Content-Type: application/json' \
               -d '{
-                "model": "tinyllama-1.1b",
-                "messages": [
-                  {
-                    "role": "system",
-                    "content": "You are a helpful assistant."
-                  },
-                  {
-                    "role": "user",
-                    "content": "Hello!"
-                  }
-                ]
-              }'
+              "messages": [
+                {
+                  "content": "You are a helpful assistant.",
+                  "role": "system"
+                },
+                {
+                  "content": "Hello!",
+                  "role": "user"
+                }
+              ],
+              "model": "tinyllama-1.1b",
+              "stream": true,
+              "max_tokens": 2048,
+              "stop": [
+                "hello"
+              ],
+              "frequency_penalty": 0,
+              "presence_penalty": 0,
+              "temperature": 0.7,
+              "top_p": 0.95
+            }'
   /models:
     get:
       operationId: listModels
@@ -103,7 +114,9 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl http://localhost:1337/v1/models
+            curl -X 'GET' \
+              'http://127.0.0.1:1337/v1/models' \
+              -H 'accept: application/json'
   "/models/download/{model_id}":
     get:
       operationId: downloadModel
@@ -131,7 +144,9 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl -X POST http://localhost:1337/v1/models/download/{model_id}
+            curl -X 'GET' \
+              'http://127.0.0.1:1337/v1/models/download/{model_id}' \
+              -H 'accept: application/json'
   "/models/{model_id}":
     get:
       operationId: retrieveModel
@@ -162,7 +177,9 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl http://localhost:1337/v1/models/{model_id}
+            curl -X 'GET' \
+              'http://127.0.0.1:1337/v1/models/{model_id}' \
+              -H 'accept: application/json'
     delete:
       operationId: deleteModel
       tags:
@@ -191,7 +208,9 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl -X DELETE http://localhost:1337/v1/models/{model_id}
+            curl -X 'DELETE' \
+              'http://127.0.0.1:1337/v1/models/{model_id}' \
+              -H 'accept: application/json'
   /threads:
     post:
       operationId: createThread
diff --git a/docs/openapi/specs/models.yaml b/docs/openapi/specs/models.yaml
index 418be9563..791d14880 100644
--- a/docs/openapi/specs/models.yaml
+++ b/docs/openapi/specs/models.yaml
@@ -18,106 +18,77 @@ components:
     Model:
       type: object
       properties:
-        type:
-          type: string
-          default: model
-          description: The type of the object.
-        version:
-          type: string
-          default: "1"
-          description: The version number of the model.
-        id:
-          type: string
-          description: Unique identifier used in chat-completions model_name, matches
-            folder name.
-          example: zephyr-7b
-        name:
-          type: string
-          description: Name of the model.
-          example: Zephyr 7B
-        owned_by:
-          type: string
-          description: Compatibility field for OpenAI.
-          default: ""
-        created:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time.
-        description:
-          type: string
-          description: Description of the model.
-        state:
-          type: string
-          enum:
-            - null
-            - downloading
-            - ready
-            - starting
-            - stopping
-          description: Current state of the model.
-        format:
-          type: string
-          description: State format of the model, distinct from the engine.
-          example: ggufv3
         source_url:
           type: string
           format: uri
           description: URL to the source of the model.
-          example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
+          example: https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf
+        id:
+          type: string
+          description:
+            Unique identifier used in chat-completions model_name, matches
+            folder name.
+          example: trinity-v1.2-7b
+        object:
+          type: string
+          example: model
+        name:
+          type: string
+          description: Name of the model.
+          example: Trinity-v1.2 7B Q4
+        version:
+          type: string
+          default: "1.0"
+          description: The version number of the model.
+        description:
+          type: string
+          description: Description of the model.
+          example: Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.
+        format:
+          type: string
+          description: State format of the model, distinct from the engine.
+          example: gguf
         settings:
           type: object
           properties:
             ctx_len:
-              type: string
+              type: integer
               description: Context length.
-              example: "2048"
-            ngl:
+              example: 4096
+            prompt_template:
               type: string
-              description: Number of layers.
-              example: "100"
-            embedding:
-              type: string
-              description: Indicates if embedding is enabled.
-              example: "true"
-            n_parallel:
-              type: string
-              description: Number of parallel processes.
-              example: "4"
+              example: "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
           additionalProperties: false
         parameters:
           type: object
           properties:
             temperature:
-              type: string
-              description: Temperature setting for the model.
-              example: "0.7"
-            token_limit:
-              type: string
-              description: Token limit for the model.
-              example: "2048"
-            top_k:
-              type: string
-              description: Top-k setting for the model.
-              example: "0"
+              example: 0.7
             top_p:
-              type: string
-              description: Top-p setting for the model.
-              example: "1"
+              example: 0.95
             stream:
-              type: string
-              description: Indicates if streaming is enabled.
-              example: "true"
+              example: true
+            max_tokens:
+              example: 4096
+            stop:
+              example: []
+            frequency_penalty:
+              example: 0
+            presence_penalty:
+              example: 0
           additionalProperties: false
         metadata:
-          type: object
-          description: Additional metadata.
-        assets:
-          type: array
-          items:
+          author:
             type: string
-          description: List of assets related to the model.
-      required:
-        - source_url
+            example: Jan
+          tags:
+            example: ["7B", "Merged", "Featured"]
+          size:
+            example: 4370000000,
+          cover:
+            example: "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png"
+        engine:
+          example: nitro
     ModelObject:
       type: object
       properties:
@@ -125,7 +96,7 @@ components:
           type: string
           description: |
             The identifier of the model.
-          example: zephyr-7b
+          example: ztrinity-v1.2-7b
         object:
           type: string
           description: |
@@ -145,197 +116,82 @@ components:
     GetModelResponse:
       type: object
       properties:
-        id:
-          type: string
-          description: The identifier of the model.
-          example: zephyr-7b
-        object:
-          type: string
-          description: Type of the object, indicating it's a model.
-          default: model
-        created:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the model.
-        owned_by:
-          type: string
-          description: The entity that owns the model.
-          example: _
-        state:
-          type: string
-          enum:
-            - not_downloaded
-            - downloaded
-            - running
-            - stopped
-          description: The current state of the model.
         source_url:
           type: string
           format: uri
           description: URL to the source of the model.
-          example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
-        engine_parameters:
-          type: object
-          properties:
-            pre_prompt:
-              type: string
-              description: Predefined prompt used for setting up internal configurations.
-              default: ""
-              example: Initial setup complete.
-            system_prompt:
-              type: string
-              description: Prefix used for system-level prompts.
-              default: "SYSTEM: "
-            user_prompt:
-              type: string
-              description: Prefix used for user prompts.
-              default: "USER: "
-            ai_prompt:
-              type: string
-              description: Prefix used for assistant prompts.
-              default: "ASSISTANT: "
-            ngl:
-              type: integer
-              description: Number of neural network layers loaded onto the GPU for
-                acceleration.
-              minimum: 0
-              maximum: 100
-              default: 100
-              example: 100
-            ctx_len:
-              type: integer
-              description: Context length for model operations, varies based on the specific
-                model.
-              minimum: 128
-              maximum: 4096
-              default: 2048
-              example: 2048
-            n_parallel:
-              type: integer
-              description: Number of parallel operations, relevant when continuous batching is
-                enabled.
-              minimum: 1
-              maximum: 10
-              default: 1
-              example: 4
-            cont_batching:
-              type: boolean
-              description: Indicates if continuous batching is used for processing.
-              default: false
-              example: false
-            cpu_threads:
-              type: integer
-              description: Number of threads allocated for CPU-based inference.
-              minimum: 1
-              example: 8
-            embedding:
-              type: boolean
-              description: Indicates if embedding layers are enabled in the model.
-              default: true
-              example: true
-        model_parameters:
+          example: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf
+        id:
+          type: string
+          description:
+            Unique identifier used in chat-completions model_name, matches
+            folder name.
+          example: mistral-ins-7b-q4
+        object:
+          type: string
+          example: model
+        name:
+          type: string
+          description: Name of the model.
+          example: Mistral Instruct 7B Q4
+        version:
+          type: string
+          default: "1.0"
+          description: The version number of the model.
+        description:
+          type: string
+          description: Description of the model.
+          example: Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.
+        format:
+          type: string
+          description: State format of the model, distinct from the engine.
+          example: gguf
+        settings:
           type: object
           properties:
             ctx_len:
               type: integer
-              description: Maximum context length the model can handle.
-              minimum: 0
-              maximum: 4096
-              default: 2048
-              example: 2048
-            ngl:
-              type: integer
-              description: Number of layers in the neural network.
-              minimum: 1
-              maximum: 100
-              default: 100
-              example: 100
-            embedding:
-              type: boolean
-              description: Indicates if embedding layers are used.
-              default: true
-              example: true
-            n_parallel:
-              type: integer
-              description: Number of parallel processes the model can run.
-              minimum: 1
-              maximum: 10
-              default: 1
-              example: 4
+              description: Context length.
+              example: 4096
+            prompt_template:
+              type: string
+              example: "[INST] {prompt} [/INST]"
+          additionalProperties: false
+        parameters:
+          type: object
+          properties:
             temperature:
-              type: number
-              description: Controls randomness in model's responses. Higher values lead to
-                more random responses.
-              minimum: 0
-              maximum: 2
-              default: 0.7
               example: 0.7
-            token_limit:
-              type: integer
-              description: Maximum number of tokens the model can generate in a single
-                response.
-              minimum: 1
-              maximum: 4096
-              default: 2048
-              example: 2048
-            top_k:
-              type: integer
-              description: Limits the model to consider only the top k most likely next tokens
-                at each step.
-              minimum: 0
-              maximum: 100
-              default: 0
-              example: 0
             top_p:
-              type: number
-              description: Nucleus sampling parameter. The model considers the smallest set of
-                tokens whose cumulative probability exceeds the top_p value.
-              minimum: 0
-              maximum: 1
-              default: 1
-              example: 1
+              example: 0.95
+            stream:
+              example: true
+            max_tokens:
+              example: 4096
+            stop:
+              example: []
+            frequency_penalty:
+              example: 0
+            presence_penalty:
+              example: 0
+          additionalProperties: false
         metadata:
-          type: object
-          properties:
-            engine:
-              type: string
-              description: The engine used by the model.
-              enum:
-                - nitro
-                - openai
-                - hf_inference
-            quantization:
-              type: string
-              description: Quantization parameter of the model.
-              example: Q3_K_L
-            size:
-              type: string
-              description: Size of the model.
-              example: 7B
-      required:
-        - id
-        - object
-        - created
-        - owned_by
-        - state
-        - source_url
-        - parameters
-        - metadata
+          author:
+            type: string
+            example: MistralAI
+          tags:
+            example: ["7B", "Featured", "Foundation Model"]
+          size:
+            example: 4370000000,
+          cover:
+            example: "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png"
+        engine:
+          example: nitro
     DeleteModelResponse:
       type: object
       properties:
-        id:
-          type: string
-          description: The identifier of the model that was deleted.
-          example: model-zephyr-7B
-        object:
-          type: string
-          description: Type of the object, indicating it's a model.
-          default: model
-        deleted:
-          type: boolean
-          description: Indicates whether the model was successfully deleted.
-          example: true
+        message:
+          example: Not found
     StartModelResponse:
       type: object
       properties: