diff --git a/docs/openapi/jan.yaml b/docs/openapi/jan.yaml
index 2a04467de..2eb9b815b 100644
--- a/docs/openapi/jan.yaml
+++ b/docs/openapi/jan.yaml
@@ -14,8 +14,10 @@ servers:
 tags:
   - name: Models
     description: List and describe the various models available in the API.
-  - name: Chat Completion
-    description: Given a list of messages comprising a conversation, the model will return a response.
+  - name: Chat 
+    description: |
+      Given a list of messages comprising a conversation, the model will return a response.
+
   - name: Messages
     description: |
       Messages capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats).
@@ -38,13 +40,51 @@ x-tagGroups:
   - name: Endpoints
     tags:
       - Models
-      - Chat Completion
+      - Chat
   - name: Chat
     tags:
       - Assistants
       - Messages
       - Threads
 paths:
+  /chat/completions:
+    post:
+      operationId: createChatCompletion
+      tags:
+        - Chat
+      summary: Create chat completion
+      description: |
+        <a  href = "https://platform.openai.com/docs/api-reference/chat/create">  <button style = "color: #388434"> OpenAI compatible </button></a>
+        and <span style = "color: #fec928">  Jan specified </span>
+        
+        Creates a model response for the given chat conversation.
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "specs/chat.yaml#/components/schemas/ChatCompletionRequest"
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: "specs/chat.yaml#/components/schemas/ChatCompletionResponse"
+      x-codeSamples:
+        - lang: "curl"
+          source: |
+            curl -X POST 'http://localhost:3982/inferences/llamacpp/chat_completion' \
+                  -H "Content-Type: application/json" \
+                  -d '{
+                    "llama_model_path": "/path/to/your/model.gguf",
+                    "messages": [
+                      {
+                        "role": "user",
+                        "content": "hello"
+                      },
+                    ]
+                  }'
+      
   ### MODELS
   /models:
     get:
@@ -54,10 +94,9 @@ paths:
 
       summary: List models
       description: |
-        <a  href = "https://platform.openai.com/docs/api-reference/models/list">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/models/list">  <button style = "color: #388434"> OpenAI compatible </button></a>
 
         Lists the currently available models, and provides basic information about each one such as the owner and availability.
-        
       responses:
         "200":
           description: OK
@@ -96,7 +135,7 @@ paths:
         - Models
       summary: Retrieve model
       description: |
-        <a  href = "https://platform.openai.com/docs/api-reference/models/retrieve">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/models/retrieve">  <button style = "color: #388434"> OpenAI compatible </button></a>
         
         Get a model instance, providing basic information about the model such as the owner and permissioning.
       parameters:
@@ -128,7 +167,7 @@ paths:
         - Models
       summary: Delete model
       description: |
-        <a  href = "https://platform.openai.com/docs/api-reference/models/delete">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/models/delete">  <button style = "color: #388434"> OpenAI compatible </button></a>
         
         Delete a model.
       parameters:
@@ -192,7 +231,7 @@ paths:
       summary: Stop model
       description: |
         <span style = "color: #fec928"> Jan </span>
-        
+
         Stop an imported model.
       parameters:
         - in: path
@@ -589,12 +628,12 @@ paths:
                 'write:pets': modify pets in your account
                 'read:pets': read your pets
       description: | 
-        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #388434"> OpenAI compatible </button></a>
         <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #fec928"> Jan </button></a>
 
         Returns a list of message files.
 
-        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #388434">OpenAI Compatible    </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/messages/listMessages">  <button style = "color: #388434">OpenAI compatible    </button></a>
         
 
       parameters:
@@ -675,7 +714,7 @@ x-webhooks:
     post:
       summary: The model object
       description: |
-        <a  href = "https://platform.openai.com/docs/api-reference/models/object">  <button style = "color: #388434"> OpenAI Compatible </button></a>
+        <a  href = "https://platform.openai.com/docs/api-reference/models/object">  <button style = "color: #388434"> OpenAI compatible </button></a>
         
         Describe a model offering that can be used with the API.
 
diff --git a/docs/openapi/specs/chat.yaml b/docs/openapi/specs/chat.yaml
new file mode 100644
index 000000000..7aef0cfe0
--- /dev/null
+++ b/docs/openapi/specs/chat.yaml
@@ -0,0 +1,197 @@
+components:
+  schemas:
+    ChatObject:
+      type: object
+      properties:
+        messages:
+          type: arrays
+          description: |
+            Contains input data or prompts for the model to process
+          example:
+            [
+              { "content": "Hello there :wave:", "role": "assistant" },
+              { "content": "Can you write a long story", "role": "user" },
+            ]
+        stream:
+          type: boolean
+          default: true
+          description: Enables continuous output generation, allowing for streaming of model responses
+        model:
+          type: string
+          example: "gpt-3.5-turbo"
+          description: Specifies the model being used for inference or processing tasks
+        max_tokens:
+          type: number
+          default: 2048
+          description: The maximum number of tokens the model will generate in a single response
+        stop:
+          type: arrays
+          example: ["hello"]
+          description: Defines specific tokens or phrases at which the model will stop generating further output
+        frequency_penalty:
+          type: number
+          default: 0
+          description: Adjusts the likelihood of the model repeating words or phrases in its output
+        presence_penalty:
+          type: number
+          default: 0
+          description: Influences the generation of new and varied concepts in the model's output
+        temperature:
+          type: number
+          default: 0.7
+          min: 0
+          max: 1
+          description: Controls the randomness of the model's output
+        top_p:
+          type: number
+          default: 0.95
+          min: 0
+          max: 1
+          description: Set probability threshold for more relevant outputs
+        cache_prompt:
+          type: boolean
+          default: true
+          description: Optimize performance in repeated or similar requests.
+    ChatCompletionRequest:
+      type: object
+      properties:
+        messages:
+          type: arrays
+          description: |
+            Contains input data or prompts for the model to process
+
+            <span style="color:#388434">OpenAI compatible</span>
+          example:
+            [
+              { "content": "Hello there :wave:", "role": "assistant" },
+              { "content": "Can you write a long story", "role": "user" },
+            ]
+        model:
+          type: string
+          example: model-zephyr-7B
+          description: |
+            Specifies the model being used for inference or processing tasks
+
+            <span style="color:#388434">OpenAI compatible</span>
+        stream:
+          type: boolean
+          default: true
+          description: |
+            Enables continuous output generation, allowing for streaming of model responses
+
+            <span style="color:#388434">OpenAI compatible</span>
+        max_tokens:
+          type: number
+          default: 2048
+          description: |
+            The maximum number of tokens the model will generate in a single response
+
+            <span style="color:#388434">OpenAI compatible</span>
+        stop:
+          type: arrays
+          example: ["hello"]
+          description: |
+            Defines specific tokens or phrases at which the model will stop generating further output
+
+            <span style="color:#388434">OpenAI compatible</span>
+        frequency_penalty:
+          type: number
+          default: 0
+          description: |
+            Adjusts the likelihood of the model repeating words or phrases in its output
+
+            <span style="color:#388434">OpenAI compatible</span>
+        presence_penalty:
+          type: number
+          default: 0
+          description: |
+            Influences the generation of new and varied concepts in the model's output
+
+            <span style="color:#388434">OpenAI compatible</span>
+        temperature:
+          type: number
+          default: 0.7
+          min: 0
+          max: 1
+          description: |
+            Controls the randomness of the model's output
+
+            <span style="color:#388434">OpenAI compatible</span>
+        top_p:
+          type: number
+          default: 0.95
+          min: 0
+          max: 1
+          description: |
+            Set probability threshold for more relevant outputs
+
+            <span style="color:#388434">OpenAI compatible</span>
+
+    ChatCompletionResponse:
+      type: object
+      description: Description of the response structure
+      properties:
+        choices:
+          type: array
+          description: Array of choice objects
+          items:
+            type: object
+            properties:
+              finish_reason:
+                type: string
+                nullable: true
+                example: null
+                description: Reason for finishing the response, if applicable
+              index:
+                type: integer
+                example: 0
+                description: Index of the choice
+              message:
+                type: object
+                properties:
+                  content:
+                    type: string
+                    example: "Hello user. What can I help you with?"
+                    description: Content of the message
+                  role:
+                    type: string
+                    example: assistant
+                    description: Role of the sender
+        created:
+          type: integer
+          example: 1700193928
+          description: Timestamp of when the response was created
+        id:
+          type: string
+          example: ebwd2niJvJB1Q2Whyvkz
+          description: Unique identifier of the response
+        model:
+          type: string
+          nullable: true
+          example: _
+          description: Model used for generating the response
+        object:
+          type: string
+          example: chat.completion
+          description: Type of the response object
+        system_fingerprint:
+          type: string
+          nullable: true
+          example: _
+          description: System fingerprint
+        usage:
+          type: object
+          description: Information about the usage of tokens
+          properties:
+            completion_tokens:
+              type: integer
+              example: 500
+              description: Number of tokens used for completion
+            prompt_tokens:
+              type: integer
+              example: 33
+              description: Number of tokens used in the prompt
+            total_tokens:
+              type: integer
+              example: 533
+              description: Total number of tokens used