diff --git a/Dockerfile b/Dockerfile
index 48b2d254f..e205a3f4f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -44,7 +44,7 @@ COPY --from=builder /app/web ./web/
 COPY --from=builder /app/models ./models/
 
 RUN yarn workspace @janhq/uikit install && yarn workspace @janhq/uikit build
-RUN yarn workspace jan-web install
+RUN yarn workspace @janhq/web install
 
 RUN npm install -g serve@latest
 
@@ -55,7 +55,7 @@ ENV JAN_API_PORT 1337
 
 ENV API_BASE_URL http://localhost:1337
 
-CMD ["sh", "-c", "export NODE_ENV=production && yarn workspace jan-web build && cd web && npx serve out & cd server && node build/main.js"]
+CMD ["sh", "-c", "export NODE_ENV=production && yarn workspace @janhq/web build && cd web && npx serve out & cd server && node build/main.js"]
 
 # docker build -t jan .
 # docker run -p 1337:1337 -p 3000:3000 -p 3928:3928 jan
diff --git a/Dockerfile.gpu b/Dockerfile.gpu
index 832e2c18c..d703b8b43 100644
--- a/Dockerfile.gpu
+++ b/Dockerfile.gpu
@@ -68,7 +68,7 @@ COPY --from=builder /app/web ./web/
 COPY --from=builder /app/models ./models/
 
 RUN yarn workspace @janhq/uikit install && yarn workspace @janhq/uikit build
-RUN yarn workspace jan-web install
+RUN yarn workspace @janhq/web install
 
 RUN npm install -g serve@latest
 
@@ -81,7 +81,7 @@ ENV JAN_API_PORT 1337
 
 ENV API_BASE_URL http://localhost:1337
 
-CMD ["sh", "-c", "export NODE_ENV=production && yarn workspace jan-web build && cd web && npx serve out & cd server && node build/main.js"]
+CMD ["sh", "-c", "export NODE_ENV=production && yarn workspace @janhq/web build && cd web && npx serve out & cd server && node build/main.js"]
 
 # pre-requisites: nvidia-docker
 # docker build -t jan-gpu . -f Dockerfile.gpu
diff --git a/charts/server/values.yaml b/charts/server/values.yaml
index 70f463174..73d4e8916 100644
--- a/charts/server/values.yaml
+++ b/charts/server/values.yaml
@@ -150,7 +150,7 @@ common:
       command: ['/bin/sh', '-c']
       args:
         [
-          'export NODE_ENV=production && yarn workspace jan-web build && cd web && npx serve out',
+          'export NODE_ENV=production && yarn workspace @janhq/web build && cd web && npx serve out',
         ]
 
       replicaCount: 1
diff --git a/docs/openapi/.gitkeep b/docs/openapi/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/docs/openapi/jan.json b/docs/openapi/jan.json
new file mode 100644
index 000000000..844a8f7ce
--- /dev/null
+++ b/docs/openapi/jan.json
@@ -0,0 +1,2397 @@
+{
+  "openapi": "3.0.0",
+  "info": {
+    "title": "API Reference",
+    "description": "# Introduction\nJan API is compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference).\n",
+    "version": "0.1.8",
+    "contact": {
+      "name": "Jan Discord",
+      "url": "https://discord.gg/7EcEz7MrvA"
+    },
+    "license": {
+      "name": "AGPLv3",
+      "url": "https://github.com/janhq/nitro/blob/main/LICENSE"
+    }
+  },
+  "servers": [
+    {
+      "url": "http://localhost:1337/v1"
+    }
+  ],
+  "tags": [
+    {
+      "name": "Models",
+      "description": "List and describe the various models available in the API."
+    },
+    {
+      "name": "Chat",
+      "description": "Given a list of messages comprising a conversation, the model will return a response.\n"
+    },
+    {
+      "name": "Messages",
+      "description": "Messages capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats).\n"
+    },
+    {
+      "name": "Threads"
+    },
+    {
+      "name": "Assistants",
+      "description": "Configures and utilizes different AI assistants for varied tasks"
+    }
+  ],
+  "x-tagGroups": [
+    {
+      "name": "Endpoints",
+      "tags": ["Models", "Chat"]
+    },
+    {
+      "name": "Chat",
+      "tags": ["Assistants", "Messages", "Threads"]
+    }
+  ],
+  "paths": {
+    "/chat/completions": {
+      "post": {
+        "operationId": "createChatCompletion",
+        "tags": ["Chat"],
+        "summary": "Create chat completion\n",
+        "description": "Creates a model response for the given chat conversation.  <a  href = \"https://platform.openai.com/docs/api-reference/chat/create\"> Equivalent to OpenAI's create chat completion. </a>\n",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ChatCompletionRequest"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ChatCompletionResponse"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/models": {
+      "get": {
+        "operationId": "listModels",
+        "tags": ["Models"],
+        "summary": "List models",
+        "description": "Lists the currently available models, and provides basic information about each one such as the owner and availability.  <a  href = \"https://platform.openai.com/docs/api-reference/models/list\"> Equivalent to OpenAI's list model. </a>\n",
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ListModelsResponse"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/models/download/{model_id}": {
+      "get": {
+        "operationId": "downloadModel",
+        "tags": ["Models"],
+        "summary": "Download a specific model.",
+        "description": "Download a model.\n",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "model_id",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "example": "mistral-ins-7b-q4"
+            },
+            "description": "The ID of the model to use for this request.\n"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/DownloadModelResponse"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/models/{model_id}": {
+      "get": {
+        "operationId": "retrieveModel",
+        "tags": ["Models"],
+        "summary": "Retrieve model",
+        "description": "Get a model instance, providing basic information about the model such as the owner and permissioning.  <a  href = \"https://platform.openai.com/docs/api-reference/models/retrieve\"> Equivalent to OpenAI's retrieve model. </a>\n",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "model_id",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "example": "mistral-ins-7b-q4"
+            },
+            "description": "The ID of the model to use for this request.\n"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/GetModelResponse"
+                }
+              }
+            }
+          }
+        }
+      },
+      "delete": {
+        "operationId": "deleteModel",
+        "tags": ["Models"],
+        "summary": "Delete model",
+        "description": "Delete a model. <a  href = \"https://platform.openai.com/docs/api-reference/models/delete\"> Equivalent to OpenAI's delete model. </a>\n",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "model_id",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "example": "mistral-ins-7b-q4"
+            },
+            "description": "The model id to delete\n"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/DeleteModelResponse"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/threads": {
+      "post": {
+        "operationId": "createThread",
+        "tags": ["Threads"],
+        "summary": "Create thread",
+        "description": "Create a thread.  <a  href = \"https://platform.openai.com/docs/api-reference/threads/createThread\"> Equivalent to OpenAI's create thread. </a>\n",
+        "requestBody": {
+          "required": false,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/CreateThreadObject"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Thread created successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/CreateThreadResponse"
+                }
+              }
+            }
+          }
+        }
+      },
+      "get": {
+        "operationId": "listThreads",
+        "tags": ["Threads"],
+        "summary": "List threads",
+        "description": "Retrieves a list of all threads available in the system.\n",
+        "responses": {
+          "200": {
+            "description": "List of threads retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/ThreadObject"
+                  },
+                  "example": [
+                    {
+                      "id": "thread_abc123",
+                      "object": "thread",
+                      "created_at": 1699014083,
+                      "assistants": ["assistant-001"],
+                      "metadata": {},
+                      "messages": []
+                    },
+                    {
+                      "id": "thread_abc456",
+                      "object": "thread",
+                      "created_at": 1699014083,
+                      "assistants": ["assistant-002", "assistant-003"],
+                      "metadata": {}
+                    }
+                  ]
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/threads/{thread_id}": {
+      "get": {
+        "operationId": "getThread",
+        "tags": ["Threads"],
+        "summary": "Retrieve thread",
+        "description": "Retrieves detailed information about a specific thread using its thread_id.  <a  href = \"https://platform.openai.com/docs/api-reference/threads/getThread\"> Equivalent to OpenAI's retrieve thread. </a>\n",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "thread_id",
+            "required": true,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The ID of the thread to retrieve.\n"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Thread details retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/GetThreadResponse"
+                }
+              }
+            }
+          }
+        }
+      },
+      "patch": {
+        "operationId": "modifyThread",
+        "tags": ["Threads"],
+        "summary": "Modify thread",
+        "description": "Modifies a thread.  <a  href = \"https://platform.openai.com/docs/api-reference/threads/modifyThread\"> Equivalent to OpenAI's modify thread. </a>\n",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "thread_id",
+            "required": true,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The ID of the thread to be modified.\n"
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "title": {
+                    "type": "string",
+                    "description": "Set the title of the thread",
+                    "items": {
+                      "$ref": "#/components/schemas/ThreadMessageObject"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Thread modified successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ModifyThreadResponse"
+                }
+              }
+            }
+          }
+        }
+      },
+      "delete": {
+        "operationId": "deleteThread",
+        "tags": ["Threads"],
+        "summary": "Delete thread",
+        "description": "Delete a thread.  <a  href = \"https://platform.openai.com/docs/api-reference/threads/deleteThread\"> Equivalent to OpenAI's delete thread. </a>\n",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "thread_id",
+            "required": true,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The ID of the thread to be deleted.\n"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Thread deleted successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/DeleteThreadResponse"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/assistants": {
+      "get": {
+        "operationId": "listAssistants",
+        "tags": ["Assistants"],
+        "summary": "List assistants",
+        "description": "Return a list of assistants. <a href = \"https://platform.openai.com/docs/api-reference/assistants/listAssistants\"> Equivalent to OpenAI's list assistants. </a>\n",
+        "responses": {
+          "200": {
+            "description": "List of assistants retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "type": "object",
+                    "properties": {
+                      "id": {
+                        "type": "string"
+                      },
+                      "object": {
+                        "type": "string"
+                      },
+                      "version": {
+                        "type": "integer"
+                      },
+                      "created_at": {
+                        "type": "integer"
+                      },
+                      "name": {
+                        "type": "string"
+                      },
+                      "description": {
+                        "type": "string"
+                      },
+                      "avatar": {
+                        "type": "string",
+                        "format": "uri"
+                      },
+                      "models": {
+                        "type": "array",
+                        "items": {
+                          "type": "object",
+                          "properties": {
+                            "model_id": {
+                              "type": "string"
+                            }
+                          }
+                        }
+                      },
+                      "instructions": {
+                        "type": "string"
+                      },
+                      "events": {
+                        "type": "object",
+                        "properties": {
+                          "in": {
+                            "type": "array",
+                            "items": {}
+                          },
+                          "out": {
+                            "type": "array",
+                            "items": {}
+                          }
+                        }
+                      },
+                      "metadata": {
+                        "type": "object"
+                      },
+                      "x-codeSamples": {
+                        "type": "object",
+                        "properties": {
+                          "cURL": {
+                            "type": "object",
+                            "properties": {
+                              "lang": {
+                                "type": "string",
+                                "example": "cURL"
+                              },
+                              "source": {
+                                "type": "string",
+                                "example": "curl http://localhost:1337/v1/assistants \\\n  -H \"Content-Type: application/json\"\n"
+                              }
+                            }
+                          },
+                          "JavaScript": {
+                            "type": "object",
+                            "properties": {
+                              "lang": {
+                                "type": "string",
+                                "example": "JavaScript"
+                              },
+                              "source": {
+                                "type": "string",
+                                "example": "fetch('http://localhost:1337/v1/assistants', {\n  method: 'GET',\n  headers: {\n    'Content-Type': 'application/json'\n  }\n})\n"
+                              }
+                            }
+                          },
+                          "Node.js": {
+                            "type": "object",
+                            "properties": {
+                              "lang": {
+                                "type": "string",
+                                "example": "Node.js"
+                              },
+                              "source": {
+                                "type": "string",
+                                "example": "const fetch = require('node-fetch');\n\nfetch('http://localhost:1337/v1/assistants', {\n  method: 'GET',\n  headers: {\n    'Content-Type': 'application/json'\n  }\n})\n"
+                              }
+                            }
+                          },
+                          "Python": {
+                            "type": "object",
+                            "properties": {
+                              "lang": {
+                                "type": "string",
+                                "example": "Python"
+                              },
+                              "source": {
+                                "type": "string",
+                                "example": "import requests\n\nurl = 'http://localhost:1337/v1/assistants'\nheaders = {'Content-Type': 'application/json'}\n\nresponse = requests.get(url, headers=headers)\n"
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/assistants/{assistant_id}": {
+      "get": {
+        "operationId": "getAssistant",
+        "tags": ["Assistants"],
+        "summary": "Retrieve assistant",
+        "description": "Retrieves an assistant. <a href = \"https://platform.openai.com/docs/api-reference/assistants/getAssistant\"> Equivalent to OpenAI's retrieve assistants. </a>\n",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "assistant_id",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "example": "jan"
+            },
+            "description": "The ID of the assistant to retrieve.\n"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "string",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/RetrieveAssistantResponse"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/threads/{thread_id}/messages": {
+      "get": {
+        "operationId": "listMessages",
+        "tags": ["Messages"],
+        "summary": "List messages",
+        "description": "Retrieves all messages from the given thread. <a  href = \"https://platform.openai.com/docs/api-reference/messages/listMessages\"> Equivalent to OpenAI's list messages. </a>\n",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "thread_id",
+            "required": true,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The ID of the thread from which to retrieve messages.\n"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "List of messages retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ListMessagesResponse"
+                }
+              }
+            }
+          }
+        }
+      },
+      "post": {
+        "operationId": "createMessage",
+        "tags": ["Messages"],
+        "summary": "Create message",
+        "description": "Create a message. <a  href = \"https://platform.openai.com/docs/api-reference/messages/createMessage\"> Equivalent to OpenAI's list messages. </a>\n",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "thread_id",
+            "required": true,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The ID of the thread to which the message will be posted.\n"
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "role": {
+                    "type": "string",
+                    "description": "Role of the sender, either 'user' or 'assistant'.\n",
+                    "example": "user",
+                    "enum": ["user", "assistant"]
+                  },
+                  "content": {
+                    "type": "string",
+                    "description": "Text content of the message.\n",
+                    "example": "How does AI work? Explain it in simple terms."
+                  }
+                },
+                "required": ["role", "content"]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Message created successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/CreateMessageResponse"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/threads/{thread_id}/messages/{message_id}": {
+      "get": {
+        "operationId": "retrieveMessage",
+        "tags": ["Messages"],
+        "summary": "Retrieve message",
+        "description": "Retrieve a specific message from a thread using its thread_id and message_id. <a  href = \"https://platform.openai.com/docs/api-reference/messages/getMessage\"> Equivalent to OpenAI's retrieve messages. </a>\n",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "thread_id",
+            "required": true,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The ID of the thread containing the message.\n"
+          },
+          {
+            "in": "path",
+            "name": "message_id",
+            "required": true,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The ID of the message to retrieve.\n"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/GetMessageResponse"
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  },
+  "x-webhooks": {
+    "ModelObject": {
+      "post": {
+        "summary": "The model object",
+        "description": "Describe a model offering that can be used with the API. <a  href = \"https://platform.openai.com/docs/api-reference/models/object\"> Equivalent to OpenAI's model object. </a>\n",
+        "operationId": "ModelObject",
+        "tags": ["Models"],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ModelObject"
+              }
+            }
+          }
+        }
+      }
+    },
+    "AssistantObject": {
+      "post": {
+        "summary": "The assistant object",
+        "description": "Build assistants that can call models and use tools to perform tasks. <a  href = \"https://platform.openai.com/docs/api-reference/assistants\"> Equivalent to OpenAI's assistants object. </a>\n",
+        "operationId": "AssistantObjects",
+        "tags": ["Assistants"],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/AssistantObject"
+              }
+            }
+          }
+        }
+      }
+    },
+    "MessageObject": {
+      "post": {
+        "summary": "The message object",
+        "description": "Information about a message in the thread.  <a  href = \"https://platform.openai.com/docs/api-reference/messages/object\"> Equivalent to OpenAI's message object. </a>\n",
+        "operationId": "MessageObject",
+        "tags": ["Messages"],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": null
+              }
+            }
+          }
+        }
+      }
+    },
+    "ThreadObject": {
+      "post": {
+        "summary": "The thread object",
+        "description": "Represents a thread that contains messages. <a  href = \"https://platform.openai.com/docs/api-reference/threads/object\"> Equivalent to OpenAI's thread object. </a>",
+        "operationId": "ThreadObject",
+        "tags": ["Threads"],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": null
+              }
+            }
+          }
+        }
+      }
+    }
+  },
+  "components": {
+    "schemas": {
+      "ThreadObject": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the thread, defaults to foldername.\n",
+            "example": "thread_...."
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, defaults to thread.\n",
+            "example": "thread"
+          },
+          "title": {
+            "type": "string",
+            "description": "A brief summary or description of the thread, defaults to an empty string.\n",
+            "example": "funny physics joke"
+          },
+          "assistants": {
+            "type": "array",
+            "description": "",
+            "items": {
+              "properties": {
+                "assistant_id": {
+                  "type": "string",
+                  "description": "The identifier of assistant, defaults to \"jan\"\n",
+                  "example": "jan"
+                },
+                "model": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "",
+                      "example": "..."
+                    },
+                    "settings": {
+                      "type": "object",
+                      "description": "Defaults to and overrides assistant.json's \"settings\" (and if none, then model.json \"settings\")\n"
+                    },
+                    "parameters": {
+                      "type": "object",
+                      "description": "Defaults to and overrides assistant.json's \"parameters\" (and if none, then model.json \"parameters\")\n"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "created": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the thread, defaults to file creation time.\n",
+            "example": 1231231
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the thread, defaults to an empty object.\n",
+            "example": {}
+          }
+        }
+      },
+      "GetThreadResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the thread.",
+            "example": "thread_abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object",
+            "example": "thread"
+          },
+          "created_at": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the thread.",
+            "example": 1699014083
+          },
+          "assistants": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "List of assistants involved in the thread.",
+            "example": ["assistant-001"]
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the thread.",
+            "example": {}
+          },
+          "messages": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "List of messages within the thread.",
+            "example": []
+          }
+        }
+      },
+      "CreateThreadResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the newly created thread.",
+            "example": "thread_abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's a thread.",
+            "example": "thread"
+          },
+          "created_at": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the thread.",
+            "example": 1699014083
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the newly created thread.",
+            "example": {}
+          }
+        }
+      },
+      "CreateThreadObject": {
+        "type": "object",
+        "properties": {
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's a thread.",
+            "example": "thread"
+          },
+          "title": {
+            "type": "string",
+            "description": "A brief summary or description of the thread, defaults to an empty string.\n",
+            "example": "funny physics joke"
+          },
+          "assistants": {
+            "type": "array",
+            "description": "assistant involved in the thread",
+            "items": {
+              "properties": {
+                "assistant_id": {
+                  "type": "string",
+                  "description": "The identifier of assistant, defaults to \"jan\"\n",
+                  "example": "jan"
+                },
+                "assistant_name": {
+                  "type": "string",
+                  "description": "The name of assistant, defaults to \"Jan\"\n",
+                  "example": "Jan"
+                },
+                "instructions": {
+                  "type": "string",
+                  "description": "The instruction of assistant, defaults to \"Be my grammar corrector\"\n"
+                },
+                "model": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "Model id",
+                      "example": "mistral-ins-7b-q4"
+                    },
+                    "settings": {
+                      "type": "object",
+                      "description": "Defaults to and overrides assistant.json's \"settings\" (and if none, then model.json \"settings\")\n"
+                    },
+                    "parameters": {
+                      "type": "object",
+                      "description": "Defaults to and overrides assistant.json's \"parameters\" (and if none, then model.json \"parameters\")\n"
+                    },
+                    "engine": {
+                      "type": "string",
+                      "description": "Engine id",
+                      "example": "nitro"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the thread, defaults to an empty object.\n"
+          }
+        }
+      },
+      "ThreadMessageObject": {
+        "type": "object",
+        "properties": {
+          "role": {
+            "type": "string",
+            "description": "\"Role of the sender, either 'user' or 'assistant'.\"\n",
+            "enum": ["user", "assistant"]
+          },
+          "content": {
+            "type": "string",
+            "description": "\"Text content of the message.\"\n"
+          },
+          "file_ids": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "\"Array of file IDs associated with the message, if any.\"\n"
+          }
+        }
+      },
+      "ModifyThreadResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "\"The identifier of the modified thread.\"\n",
+            "example": "thread_abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's a thread.",
+            "example": "thread"
+          },
+          "created_at": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the thread.",
+            "example": 1699014083
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the modified thread.",
+            "example": {}
+          }
+        }
+      },
+      "DeleteThreadResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the deleted thread.",
+            "example": "thread_abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating the thread has been deleted.",
+            "example": "thread.deleted"
+          },
+          "deleted": {
+            "type": "boolean",
+            "description": "Indicates whether the thread was successfully deleted.",
+            "example": true
+          }
+        }
+      },
+      "ListModelsResponse": {
+        "type": "object",
+        "properties": {
+          "object": {
+            "type": "string",
+            "enum": ["list"]
+          },
+          "data": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Model"
+            }
+          }
+        },
+        "required": ["object", "data"]
+      },
+      "Model": {
+        "type": "object",
+        "properties": {
+          "source_url": {
+            "type": "string",
+            "format": "uri",
+            "description": "URL to the source of the model.",
+            "example": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf"
+          },
+          "id": {
+            "type": "string",
+            "description": "Unique identifier used in chat-completions model_name, matches folder name.",
+            "example": "trinity-v1.2-7b"
+          },
+          "object": {
+            "type": "string",
+            "example": "model"
+          },
+          "name": {
+            "type": "string",
+            "description": "Name of the model.",
+            "example": "Trinity-v1.2 7B Q4"
+          },
+          "version": {
+            "type": "string",
+            "default": "1.0",
+            "description": "The version number of the model."
+          },
+          "description": {
+            "type": "string",
+            "description": "Description of the model.",
+            "example": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes."
+          },
+          "format": {
+            "type": "string",
+            "description": "State format of the model, distinct from the engine.",
+            "example": "gguf"
+          },
+          "settings": {
+            "type": "object",
+            "properties": {
+              "ctx_len": {
+                "type": "integer",
+                "description": "Context length.",
+                "example": 4096
+              },
+              "prompt_template": {
+                "type": "string",
+                "example": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
+              }
+            },
+            "additionalProperties": false
+          },
+          "parameters": {
+            "type": "object",
+            "properties": {
+              "temperature": {
+                "example": 0.7
+              },
+              "top_p": {
+                "example": 0.95
+              },
+              "stream": {
+                "example": true
+              },
+              "max_tokens": {
+                "example": 4096
+              },
+              "stop": {
+                "example": []
+              },
+              "frequency_penalty": {
+                "example": 0
+              },
+              "presence_penalty": {
+                "example": 0
+              }
+            },
+            "additionalProperties": false
+          },
+          "metadata": {
+            "type": "object",
+            "properties": {
+              "author": {
+                "type": "string",
+                "example": "Jan"
+              },
+              "tags": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "example": ["7B", "Merged", "Featured"]
+              },
+              "size": {
+                "type": "integer",
+                "example": 4370000000
+              },
+              "cover": {
+                "type": "string",
+                "format": "uri",
+                "example": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png"
+              }
+            },
+            "additionalProperties": false
+          },
+          "engine": {
+            "example": "nitro"
+          }
+        }
+      },
+      "ModelObject": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the model.\n",
+            "example": "trinity-v1.2-7b"
+          },
+          "object": {
+            "type": "string",
+            "description": "The type of the object, indicating it's a model.\n",
+            "default": "model"
+          },
+          "created": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the model.\n",
+            "example": 1253935178
+          },
+          "owned_by": {
+            "type": "string",
+            "description": "The entity that owns the model.\n",
+            "example": "_"
+          }
+        }
+      },
+      "GetModelResponse": {
+        "type": "object",
+        "properties": {
+          "source_url": {
+            "type": "string",
+            "format": "uri",
+            "description": "URL to the source of the model.",
+            "example": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+          },
+          "id": {
+            "type": "string",
+            "description": "Unique identifier used in chat-completions model_name, matches folder name.",
+            "example": "mistral-ins-7b-q4"
+          },
+          "object": {
+            "type": "string",
+            "example": "model"
+          },
+          "name": {
+            "type": "string",
+            "description": "Name of the model.",
+            "example": "Mistral Instruct 7B Q4"
+          },
+          "version": {
+            "type": "string",
+            "default": "1.0",
+            "description": "The version number of the model."
+          },
+          "description": {
+            "type": "string",
+            "description": "Description of the model.",
+            "example": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes."
+          },
+          "format": {
+            "type": "string",
+            "description": "State format of the model, distinct from the engine.",
+            "example": "gguf"
+          },
+          "settings": {
+            "type": "object",
+            "properties": {
+              "ctx_len": {
+                "type": "integer",
+                "description": "Context length.",
+                "example": 4096
+              },
+              "prompt_template": {
+                "type": "string",
+                "example": "[INST] {prompt} [/INST]"
+              }
+            },
+            "additionalProperties": false
+          },
+          "parameters": {
+            "type": "object",
+            "properties": {
+              "temperature": {
+                "example": 0.7
+              },
+              "top_p": {
+                "example": 0.95
+              },
+              "stream": {
+                "example": true
+              },
+              "max_tokens": {
+                "example": 4096
+              },
+              "stop": {
+                "example": []
+              },
+              "frequency_penalty": {
+                "example": 0
+              },
+              "presence_penalty": {
+                "example": 0
+              }
+            },
+            "additionalProperties": false
+          },
+          "metadata": {
+            "type": "object",
+            "properties": {
+              "author": {
+                "type": "string",
+                "example": "MistralAI"
+              },
+              "tags": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "example": ["7B", "Featured", "Foundation Model"]
+              },
+              "size": {
+                "example": 4370000000,
+                "type": "integer"
+              },
+              "cover": {
+                "example": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png",
+                "type": "string"
+              }
+            },
+            "additionalProperties": false
+          },
+          "engine": {
+            "example": "nitro"
+          }
+        }
+      },
+      "DeleteModelResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the model that was deleted.",
+            "example": "mistral-ins-7b-q4"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's a model.",
+            "default": "model"
+          },
+          "deleted": {
+            "type": "boolean",
+            "description": "Indicates whether the model was successfully deleted.",
+            "example": true
+          }
+        }
+      },
+      "StartModelResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the model that was started.",
+            "example": "model-zephyr-7B"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's a model.",
+            "default": "model"
+          },
+          "state": {
+            "type": "string",
+            "description": "The current state of the model after the start operation.",
+            "example": "running"
+          }
+        },
+        "required": ["id", "object", "state"]
+      },
+      "StopModelResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the model that was started.",
+            "example": "model-zephyr-7B"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's a model.",
+            "default": "model"
+          },
+          "state": {
+            "type": "string",
+            "description": "The current state of the model after the start operation.",
+            "example": "stopped"
+          }
+        },
+        "required": ["id", "object", "state"]
+      },
+      "DownloadModelResponse": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "description": "Message indicates Jan starting download corresponding model.",
+            "example": "Starting download mistral-ins-7b-q4"
+          }
+        }
+      },
+      "MessageObject": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "Sequential or UUID identifier of the message.\n",
+            "example": 0
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, defaults to 'thread.message'.\n",
+            "example": "thread.message"
+          },
+          "created_at": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the message.\n"
+          },
+          "thread_id": {
+            "type": "string",
+            "description": "Identifier of the thread to which this message belongs. Defaults to parent thread.\n",
+            "example": "thread_asdf"
+          },
+          "assistant_id": {
+            "type": "string",
+            "description": "Identifier of the assistant involved in the message. Defaults to parent thread.\n",
+            "example": "jan"
+          },
+          "role": {
+            "type": "string",
+            "enum": ["user", "assistant"],
+            "description": "Role of the sender, either 'user' or 'assistant'.\n"
+          },
+          "content": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "description": "Type of content, e.g., 'text'.\n"
+                },
+                "text": {
+                  "type": "object",
+                  "properties": {
+                    "value": {
+                      "type": "string",
+                      "description": "Text content of the message.\n",
+                      "example": "Hi!?"
+                    },
+                    "annotations": {
+                      "type": "array",
+                      "items": {
+                        "type": "string"
+                      },
+                      "description": "Annotations for the text content, if any.\n",
+                      "example": []
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the message, defaults to an empty object.\n",
+            "example": {}
+          }
+        }
+      },
+      "GetMessageResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the message.",
+            "example": "msg_abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's a thread message.",
+            "default": "thread.message"
+          },
+          "created_at": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the message.",
+            "example": 1699017614
+          },
+          "thread_id": {
+            "type": "string",
+            "description": "Identifier of the thread to which this message belongs.",
+            "example": "thread_abc123"
+          },
+          "role": {
+            "type": "string",
+            "description": "Role of the sender, either 'user' or 'assistant'.",
+            "example": "user"
+          },
+          "content": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "description": "Type of content, e.g., 'text'.",
+                  "example": "text"
+                },
+                "text": {
+                  "type": "object",
+                  "properties": {
+                    "value": {
+                      "type": "string",
+                      "description": "Text content of the message.",
+                      "example": "How does AI work? Explain it in simple terms."
+                    },
+                    "annotations": {
+                      "type": "array",
+                      "items": {
+                        "type": "string"
+                      },
+                      "description": "Annotations for the text content, if any.",
+                      "example": []
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "file_ids": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Array of file IDs associated with the message, if any.",
+            "example": []
+          },
+          "assistant_id": {
+            "type": "string",
+            "description": "Identifier of the assistant involved in the message, if applicable.",
+            "example": null
+          },
+          "run_id": {
+            "type": "string",
+            "description": "Run ID associated with the message, if applicable.",
+            "example": null
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the message.",
+            "example": {}
+          }
+        }
+      },
+      "CreateMessageResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the created message.",
+            "example": "msg_abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's a thread message.",
+            "example": "thread.message"
+          },
+          "created_at": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the message.",
+            "example": 1699017614
+          },
+          "thread_id": {
+            "type": "string",
+            "description": "Identifier of the thread to which this message belongs.",
+            "example": "thread_abc123"
+          },
+          "role": {
+            "type": "string",
+            "description": "Role of the sender, either 'user' or 'assistant'.",
+            "example": "user"
+          },
+          "content": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "description": "Type of content, e.g., 'text'.",
+                  "example": "text"
+                },
+                "text": {
+                  "type": "object",
+                  "properties": {
+                    "value": {
+                      "type": "string",
+                      "description": "Text content of the message.",
+                      "example": "How does AI work? Explain it in simple terms."
+                    },
+                    "annotations": {
+                      "type": "array",
+                      "items": {
+                        "type": "string"
+                      },
+                      "description": "Annotations for the text content, if any.",
+                      "example": []
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "file_ids": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Array of file IDs associated with the message, if any.",
+            "example": []
+          },
+          "assistant_id": {
+            "type": "string",
+            "description": "Identifier of the assistant involved in the message, if applicable.",
+            "example": null
+          },
+          "run_id": {
+            "type": "string",
+            "description": "Run ID associated with the message, if applicable.",
+            "example": null
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the message.",
+            "example": {}
+          }
+        }
+      },
+      "ListMessagesResponse": {
+        "type": "object",
+        "properties": {
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's a list.",
+            "default": "list"
+          },
+          "data": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ListMessageObject"
+            }
+          },
+          "first_id": {
+            "type": "string",
+            "description": "Identifier of the first message in the list.",
+            "example": "msg_abc123"
+          },
+          "last_id": {
+            "type": "string",
+            "description": "Identifier of the last message in the list.",
+            "example": "msg_abc456"
+          },
+          "has_more": {
+            "type": "boolean",
+            "description": "Indicates whether there are more messages to retrieve.",
+            "example": false
+          }
+        }
+      },
+      "ListMessageObject": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the message.",
+            "example": "msg_abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's a thread message.",
+            "example": "thread.message"
+          },
+          "created_at": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the message.",
+            "example": 1699017614
+          },
+          "thread_id": {
+            "type": "string",
+            "description": "Identifier of the thread to which this message belongs.",
+            "example": "thread_abc123"
+          },
+          "role": {
+            "type": "string",
+            "description": "Role of the sender, either 'user' or 'assistant'.",
+            "example": "user"
+          },
+          "content": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "description": "Type of content, e.g., 'text'."
+                },
+                "text": {
+                  "type": "object",
+                  "properties": {
+                    "value": {
+                      "type": "string",
+                      "description": "Text content of the message.",
+                      "example": "How does AI work? Explain it in simple terms."
+                    },
+                    "annotations": {
+                      "type": "array",
+                      "items": {
+                        "type": "string"
+                      },
+                      "description": "Annotations for the text content, if any."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "file_ids": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Array of file IDs associated with the message, if any.",
+            "example": []
+          },
+          "assistant_id": {
+            "type": "string",
+            "description": "Identifier of the assistant involved in the message, if applicable.",
+            "example": null
+          },
+          "run_id": {
+            "type": "string",
+            "description": "Run ID associated with the message, if applicable.",
+            "example": null
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the message.",
+            "example": {}
+          }
+        }
+      },
+      "MessageFileObject": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the file.",
+            "example": "file-abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's a thread message file.",
+            "example": "thread.message.file"
+          },
+          "created_at": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the file.",
+            "example": 1699061776
+          },
+          "message_id": {
+            "type": "string",
+            "description": "Identifier of the message to which this file is associated.",
+            "example": "msg_abc123"
+          }
+        }
+      },
+      "ListMessageFilesResponse": {
+        "type": "object",
+        "properties": {
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's a list.",
+            "default": "list"
+          },
+          "data": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/MessageFileObject"
+            }
+          }
+        }
+      },
+      "ChatObject": {
+        "type": "object",
+        "properties": {
+          "messages": {
+            "type": "array",
+            "description": "Contains input data or prompts for the model to process.\n",
+            "items": {
+              "type": "object",
+              "properties": {
+                "content": {
+                  "type": "string"
+                },
+                "role": {
+                  "type": "string"
+                }
+              }
+            },
+            "example": [
+              {
+                "content": "Hello there :wave:",
+                "role": "assistant"
+              },
+              {
+                "content": "Can you write a long story",
+                "role": "user"
+              }
+            ]
+          },
+          "stream": {
+            "type": "boolean",
+            "default": true,
+            "description": "Enables continuous output generation, allowing for streaming of model responses."
+          },
+          "model": {
+            "type": "string",
+            "example": "gpt-3.5-turbo",
+            "description": "Specifies the model being used for inference or processing tasks."
+          },
+          "max_tokens": {
+            "type": "number",
+            "default": 2048,
+            "description": "The maximum number of tokens the model will generate in a single response."
+          },
+          "stop": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Defines specific tokens or phrases at which the model will stop generating further output.",
+            "example": ["hello"]
+          },
+          "frequency_penalty": {
+            "type": "number",
+            "default": 0,
+            "description": "Adjusts the likelihood of the model repeating words or phrases in its output."
+          },
+          "presence_penalty": {
+            "type": "number",
+            "default": 0,
+            "description": "Influences the generation of new and varied concepts in the model's output."
+          },
+          "temperature": {
+            "type": "number",
+            "default": 0.7,
+            "description": "Controls the randomness of the model's output."
+          },
+          "top_p": {
+            "type": "number",
+            "default": 0.95,
+            "description": "Set probability threshold for more relevant outputs."
+          },
+          "cache_prompt": {
+            "type": "boolean",
+            "default": true,
+            "description": "Optimize performance in repeated or similar requests."
+          }
+        }
+      },
+      "ChatCompletionRequest": {
+        "type": "object",
+        "properties": {
+          "messages": {
+            "type": "array",
+            "description": "Contains input data or prompts for the model to process.\n",
+            "items": {
+              "type": "object",
+              "properties": {
+                "content": {
+                  "type": "string"
+                },
+                "role": {
+                  "type": "string"
+                }
+              }
+            },
+            "example": [
+              {
+                "content": "You are a helpful assistant.",
+                "role": "system"
+              },
+              {
+                "content": "Hello!",
+                "role": "user"
+              }
+            ]
+          },
+          "model": {
+            "type": "string",
+            "example": "tinyllama-1.1b",
+            "description": "Specifies the model being used for inference or processing tasks.\n"
+          },
+          "stream": {
+            "type": "boolean",
+            "default": true,
+            "description": "Enables continuous output generation, allowing for streaming of model responses.\n"
+          },
+          "max_tokens": {
+            "type": "number",
+            "default": 2048,
+            "description": "The maximum number of tokens the model will generate in a single response.\n"
+          },
+          "stop": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Defines specific tokens or phrases at which the model will stop generating further output.\n",
+            "example": ["hello"]
+          },
+          "frequency_penalty": {
+            "type": "number",
+            "default": 0,
+            "description": "Adjusts the likelihood of the model repeating words or phrases in its output.\n"
+          },
+          "presence_penalty": {
+            "type": "number",
+            "default": 0,
+            "description": "Influences the generation of new and varied concepts in the model's output.\n"
+          },
+          "temperature": {
+            "type": "number",
+            "default": 0.7,
+            "description": "Controls the randomness of the model's output.\n"
+          },
+          "top_p": {
+            "type": "number",
+            "default": 0.95,
+            "description": "Set probability threshold for more relevant outputs.\n"
+          }
+        }
+      },
+      "ChatCompletionResponse": {
+        "type": "object",
+        "description": "Description of the response structure",
+        "properties": {
+          "choices": {
+            "type": "array",
+            "description": "Array of choice objects",
+            "items": {
+              "type": "object",
+              "properties": {
+                "finish_reason": {
+                  "type": "string",
+                  "nullable": true,
+                  "example": null,
+                  "description": "Reason for finishing the response, if applicable"
+                },
+                "index": {
+                  "type": "integer",
+                  "example": 0,
+                  "description": "Index of the choice"
+                },
+                "message": {
+                  "type": "object",
+                  "properties": {
+                    "content": {
+                      "type": "string",
+                      "example": "Hello user. What can I help you with?",
+                      "description": "Content of the message"
+                    },
+                    "role": {
+                      "type": "string",
+                      "example": "assistant",
+                      "description": "Role of the sender"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "created": {
+            "type": "integer",
+            "example": 1700193928,
+            "description": "Timestamp of when the response was created"
+          },
+          "id": {
+            "type": "string",
+            "example": "ebwd2niJvJB1Q2Whyvkz",
+            "description": "Unique identifier of the response"
+          },
+          "model": {
+            "type": "string",
+            "nullable": true,
+            "example": "_",
+            "description": "Model used for generating the response"
+          },
+          "object": {
+            "type": "string",
+            "example": "chat.completion",
+            "description": "Type of the response object"
+          },
+          "system_fingerprint": {
+            "type": "string",
+            "nullable": true,
+            "example": "_",
+            "description": "System fingerprint"
+          },
+          "usage": {
+            "type": "object",
+            "description": "Information about the usage of tokens",
+            "properties": {
+              "completion_tokens": {
+                "type": "integer",
+                "example": 500,
+                "description": "Number of tokens used for completion"
+              },
+              "prompt_tokens": {
+                "type": "integer",
+                "example": 33,
+                "description": "Number of tokens used in the prompt"
+              },
+              "total_tokens": {
+                "type": "integer",
+                "example": 533,
+                "description": "Total number of tokens used"
+              }
+            }
+          }
+        }
+      },
+      "AssistantObject": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the assistant.",
+            "example": "asst_abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's an assistant.",
+            "default": "assistant"
+          },
+          "version": {
+            "type": "integer",
+            "description": "Version number of the assistant.",
+            "example": 1
+          },
+          "created_at": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the assistant.",
+            "example": 1698984975
+          },
+          "name": {
+            "type": "string",
+            "description": "Name of the assistant.",
+            "example": "Math Tutor"
+          },
+          "description": {
+            "type": "string",
+            "description": "Description of the assistant. Can be null.",
+            "example": null
+          },
+          "avatar": {
+            "type": "string",
+            "description": "URL of the assistant's avatar. Jan-specific property.",
+            "example": "https://pic.png"
+          },
+          "models": {
+            "type": "array",
+            "description": "List of models associated with the assistant. Jan-specific property.",
+            "items": {
+              "type": "object",
+              "properties": {
+                "model_id": {
+                  "type": "string",
+                  "example": "model_0"
+                }
+              }
+            }
+          },
+          "instructions": {
+            "type": "string",
+            "description": "A system prompt for the assistant.",
+            "example": "Be concise"
+          },
+          "events": {
+            "type": "object",
+            "description": "Event subscription settings for the assistant.",
+            "properties": {
+              "in": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              },
+              "out": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              }
+            }
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the assistant."
+          }
+        }
+      },
+      "ListAssistantsResponse": {
+        "type": "object"
+      },
+      "CreateAssistantResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the assistant.",
+            "example": "asst_abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's an assistant.",
+            "default": "assistant"
+          },
+          "version": {
+            "type": "integer",
+            "description": "Version number of the assistant.",
+            "example": 1
+          },
+          "created_at": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the assistant.",
+            "example": 1698984975
+          },
+          "name": {
+            "type": "string",
+            "description": "Name of the assistant.",
+            "example": "Math Tutor"
+          },
+          "description": {
+            "type": "string",
+            "description": "Description of the assistant. Can be null.",
+            "example": null
+          },
+          "avatar": {
+            "type": "string",
+            "description": "URL of the assistant's avatar. Jan-specific property.",
+            "example": "https://pic.png"
+          },
+          "models": {
+            "type": "array",
+            "description": "List of models associated with the assistant. Jan-specific property.",
+            "items": {
+              "type": "object",
+              "properties": {
+                "model_id": {
+                  "type": "string",
+                  "example": "model_0"
+                }
+              }
+            }
+          },
+          "instructions": {
+            "type": "string",
+            "description": "A system prompt for the assistant.",
+            "example": "Be concise"
+          },
+          "events": {
+            "type": "object",
+            "description": "Event subscription settings for the assistant.",
+            "properties": {
+              "in": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              },
+              "out": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              }
+            }
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the assistant."
+          }
+        }
+      },
+      "RetrieveAssistantResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the assistant.",
+            "example": "asst_abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's an assistant.",
+            "default": "assistant"
+          },
+          "version": {
+            "type": "integer",
+            "description": "Version number of the assistant.",
+            "example": 1
+          },
+          "created_at": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the assistant.",
+            "example": 1698984975
+          },
+          "name": {
+            "type": "string",
+            "description": "Name of the assistant.",
+            "example": "Math Tutor"
+          },
+          "description": {
+            "type": "string",
+            "description": "Description of the assistant. Can be null.",
+            "example": null
+          },
+          "avatar": {
+            "type": "string",
+            "description": "URL of the assistant's avatar. Jan-specific property.",
+            "example": "https://pic.png"
+          },
+          "models": {
+            "type": "array",
+            "description": "List of models associated with the assistant. Jan-specific property.",
+            "items": {
+              "type": "object",
+              "properties": {
+                "model_id": {
+                  "type": "string",
+                  "example": "model_0"
+                }
+              }
+            }
+          },
+          "instructions": {
+            "type": "string",
+            "description": "A system prompt for the assistant.",
+            "example": "Be concise"
+          },
+          "events": {
+            "type": "object",
+            "description": "Event subscription settings for the assistant.",
+            "properties": {
+              "in": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              },
+              "out": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              }
+            }
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the assistant."
+          }
+        }
+      },
+      "ModifyAssistantObject": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the assistant.",
+            "example": "asst_abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's an assistant.",
+            "default": "assistant"
+          },
+          "version": {
+            "type": "integer",
+            "description": "Version number of the assistant.",
+            "example": 1
+          },
+          "created_at": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the assistant.",
+            "example": 1698984975
+          },
+          "name": {
+            "type": "string",
+            "description": "Name of the assistant.",
+            "example": "Math Tutor"
+          },
+          "description": {
+            "type": "string",
+            "description": "Description of the assistant. Can be null.",
+            "example": null
+          },
+          "avatar": {
+            "type": "string",
+            "description": "URL of the assistant's avatar. Jan-specific property.",
+            "example": "https://pic.png"
+          },
+          "models": {
+            "type": "array",
+            "description": "List of models associated with the assistant. Jan-specific property.",
+            "items": {
+              "type": "object",
+              "properties": {
+                "model_id": {
+                  "type": "string",
+                  "example": "model_0"
+                }
+              }
+            }
+          },
+          "instructions": {
+            "type": "string",
+            "description": "A system prompt for the assistant.",
+            "example": "Be concise"
+          },
+          "events": {
+            "type": "object",
+            "description": "Event subscription settings for the assistant.",
+            "properties": {
+              "in": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              },
+              "out": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              }
+            }
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the assistant."
+          }
+        }
+      },
+      "ModifyAssistantResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the assistant.",
+            "example": "asst_abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating it's an assistant.",
+            "default": "assistant"
+          },
+          "version": {
+            "type": "integer",
+            "description": "Version number of the assistant.",
+            "example": 1
+          },
+          "created_at": {
+            "type": "integer",
+            "format": "int64",
+            "description": "Unix timestamp representing the creation time of the assistant.",
+            "example": 1698984975
+          },
+          "name": {
+            "type": "string",
+            "description": "Name of the assistant.",
+            "example": "Physics Tutor"
+          },
+          "description": {
+            "type": "string",
+            "description": "Description of the assistant. Can be null.",
+            "example": null
+          },
+          "avatar": {
+            "type": "string",
+            "description": "URL of the assistant's avatar. Jan-specific property.",
+            "example": "https://pic.png"
+          },
+          "models": {
+            "type": "array",
+            "description": "List of models associated with the assistant. Jan-specific property.",
+            "items": {
+              "type": "object",
+              "properties": {
+                "model_id": {
+                  "type": "string",
+                  "example": "model_0"
+                }
+              }
+            }
+          },
+          "instructions": {
+            "type": "string",
+            "description": "A system prompt for the assistant.",
+            "example": "Be concise!"
+          },
+          "events": {
+            "type": "object",
+            "description": "Event subscription settings for the assistant.",
+            "properties": {
+              "in": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              },
+              "out": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              }
+            }
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Metadata associated with the assistant."
+          }
+        }
+      },
+      "DeleteAssistantResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The identifier of the deleted assistant.",
+            "example": "asst_abc123"
+          },
+          "object": {
+            "type": "string",
+            "description": "Type of the object, indicating the assistant has been deleted.",
+            "example": "assistant.deleted"
+          },
+          "deleted": {
+            "type": "boolean",
+            "description": "Indicates whether the assistant was successfully deleted.",
+            "example": true
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/docs/openapi/jan.yaml b/docs/openapi/jan.yaml
new file mode 100644
index 000000000..35fd43175
--- /dev/null
+++ b/docs/openapi/jan.yaml
@@ -0,0 +1,1043 @@
+---
+openapi: 3.0.0
+info:
+  title: API Reference
+  description: >
+    # Introduction
+
+    Jan API is compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference).
+version: 0.1.8
+contact:
+  name: Jan Discord
+  url: https://discord.gg/7EcEz7MrvA
+license:
+  name: AGPLv3
+  url: https://github.com/janhq/nitro/blob/main/LICENSE
+servers:
+  - url: /v1
+tags:
+  - name: Models
+    description: List and describe the various models available in the API.
+  - name: Chat
+    description: >
+      Given a list of messages comprising a conversation, the model will
+      return a response.
+  - name: Messages
+    description: >
+      Messages capture a conversation's content. This can include the
+      content from LLM responses and other metadata from [chat
+      completions](/specs/chats).
+  - name: Threads
+  - name: Assistants
+    description: Configures and utilizes different AI assistants for varied tasks
+x-tagGroups:
+  - name: Endpoints
+    tags:
+      - Models
+      - Chat
+  - name: Chat
+    tags:
+      - Assistants
+      - Messages
+      - Threads
+paths:
+  /chat/completions:
+    post:
+      operationId: createChatCompletion
+      tags:
+        - Chat
+      summary: |
+        Create chat completion
+      description: >
+        Creates a model response for the given chat conversation.  <a  href
+        = "https://platform.openai.com/docs/api-reference/chat/create">
+        Equivalent to OpenAI's create chat completion. </a>
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: specs/chat.yaml#/components/schemas/ChatCompletionRequest
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: specs/chat.yaml#/components/schemas/ChatCompletionResponse
+      x-codeSamples:
+        - lang: cURL
+          source: |
+            curl -X 'POST' \
+              'http://localhost:1337/v1/chat/completions' \
+              -H 'accept: application/json' \
+              -H 'Content-Type: application/json' \
+              -d '{
+                "messages": [
+                  {
+                    "content": "You are a helpful assistant.",
+                    "role": "system"
+                  },
+                  {
+                    "content": "Hello!",
+                    "role": "user"
+                  }
+                ],
+                "model": "tinyllama-1.1b",
+                "stream": true,
+                "max_tokens": 2048,
+                "stop": [
+                  "hello"
+                ],
+                "frequency_penalty": 0,
+                "presence_penalty": 0,
+                "temperature": 0.7,
+                "top_p": 0.95
+              }'
+        - lang: JavaScript
+          source: |-
+            const data = {
+              messages: [
+                {
+                  content: 'You are a helpful assistant.',
+                  role: 'system'
+                },
+                {
+                  content: 'Hello!',
+                  role: 'user'
+                }
+              ],
+              model: 'tinyllama-1.1b',
+              stream: true,
+              max_tokens: 2048,
+              stop: ['hello'],
+              frequency_penalty: 0,
+              presence_penalty: 0,
+              temperature: 0.7,
+              top_p: 0.95
+            };
+
+            fetch('http://localhost:1337/v1/chat/completions', {
+              method: 'POST',
+              headers: {
+                'Content-Type': 'application/json',
+                'Accept': 'application/json'
+              },
+              body: JSON.stringify(data)
+            })
+              .then(response => response.json())
+              .then(data => console.log(data));
+        - lang: Node.js
+          source: |-
+            const fetch = require('node-fetch');
+
+            const data = {
+              messages: [
+                {
+                  content: 'You are a helpful assistant.',
+                  role: 'system'
+                },
+                {
+                  content: 'Hello!',
+                  role: 'user'
+                }
+              ],
+              model: 'tinyllama-1.1b',
+              stream: true,
+              max_tokens: 2048,
+              stop: ['hello'],
+              frequency_penalty: 0,
+              presence_penalty: 0,
+              temperature: 0.7,
+              top_p: 0.95
+            };
+
+            fetch('http://localhost:1337/v1/chat/completions', {
+              method: 'POST',
+              headers: {
+                'Content-Type': 'application/json',
+                'Accept': 'application/json'
+              },
+              body: JSON.stringify(data)
+            })
+              .then(response => response.json())
+              .then(data => console.log(data));
+        - lang: Python
+          source: >-
+            import requests
+
+            import json
+
+
+            data = {
+              "messages": [
+                {
+                  "content": "You are a helpful assistant.",
+                  "role": "system"
+                },
+                {
+                  "content": "Hello!",
+                  "role": "user"
+                }
+              ],
+              "model": "tinyllama-1.1b",
+              "stream": true,
+              "max_tokens": 2048,
+              "stop": [
+                "hello"
+              ],
+              "frequency_penalty": 0,
+              "presence_penalty": 0,
+              "temperature": 0.7,
+              "top_p": 0.95
+            }
+
+
+            response = requests.post('http://localhost:1337/v1/chat/completions', json=data)
+
+            print(response.json())
+  /models:
+    get:
+      operationId: listModels
+      tags:
+        - Models
+      summary: List models
+      description: >
+        Lists the currently available models, and provides basic
+        information about each one such as the owner and availability.  <a  href
+        = "https://platform.openai.com/docs/api-reference/models/list">
+        Equivalent to OpenAI's list model. </a>
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: specs/models.yaml#/components/schemas/ListModelsResponse
+      x-codeSamples:
+        - lang: cURL
+          source: |-
+            curl -X 'GET' \
+              'http://localhost:1337/v1/models' \
+              -H 'accept: application/json'
+        - lang: JavaScript
+          source: |-
+            const response = await fetch('http://localhost:1337/v1/models', {
+              method: 'GET',
+              headers: {Accept: 'application/json'}
+            });
+            const data = await response.json();
+        - lang: Node.js
+          source: |-
+            const fetch = require('node-fetch');
+
+            const url = 'http://localhost:1337/v1/models';
+            const options = {
+              method: 'GET',
+              headers: { Accept: 'application/json' }
+            };
+
+            fetch(url, options)
+              .then(res => res.json())
+              .then(json => console.log(json));
+        - lang: Python
+          source: |-
+            import requests
+
+            url = 'http://localhost:1337/v1/models'
+            headers = {'Accept': 'application/json'}
+            response = requests.get(url, headers=headers)
+            data = response.json()
+  '/models/download/{model_id}':
+    get:
+      operationId: downloadModel
+      tags:
+        - Models
+      summary: Download a specific model.
+      description: |
+        Download a model.
+      parameters:
+        - in: path
+          name: model_id
+          required: true
+          schema:
+            type: string
+            example: mistral-ins-7b-q4
+          description: |
+            The ID of the model to use for this request.
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: specs/models.yaml#/components/schemas/DownloadModelResponse
+      x-codeSamples:
+        - lang: cURL
+          source: |-
+            curl -X 'GET' \  
+              'http://localhost:1337/v1/models/download/{model_id}' \  
+              -H 'accept: application/json'
+        - lang: JavaScript
+          source: >-
+            const response = await
+            fetch('http://localhost:1337/v1/models/download/{model_id}', {
+              method: 'GET',
+              headers: {accept: 'application/json'}
+            });
+
+
+            const data = await response.json();
+        - lang: Node.js
+          source: |-
+            const fetch = require('node-fetch');
+
+            fetch('http://localhost:1337/v1/models/download/{model_id}', {
+              method: 'GET',
+              headers: {accept: 'application/json'}
+            })
+            .then(res => res.json())
+            .then(data => console.log(data));
+        - lang: Python
+          source: >-
+            import requests
+
+
+            response = requests.get('http://localhost:1337/v1/models/download/{model_id}', headers={'accept': 'application/json'})
+
+            data = response.json()
+  '/models/{model_id}':
+    get:
+      operationId: retrieveModel
+      tags:
+        - Models
+      summary: Retrieve model
+      description: >
+        Get a model instance, providing basic information about the model
+        such as the owner and permissioning.  <a  href =
+        "https://platform.openai.com/docs/api-reference/models/retrieve">
+        Equivalent to OpenAI's retrieve model. </a>
+      parameters:
+        - in: path
+          name: model_id
+          required: true
+          schema:
+            type: string
+            example: mistral-ins-7b-q4
+          description: |
+            The ID of the model to use for this request.
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: specs/models.yaml#/components/schemas/GetModelResponse
+      x-codeSamples:
+        - lang: cURL
+          source: |-
+            curl -X 'GET' \  
+              'http://localhost:1337/v1/models/{model_id}' \  
+              -H 'accept: application/json'
+        - lang: JavaScript
+          source: |-
+            const fetch = require('node-fetch');
+
+            const modelId = 'mistral-ins-7b-q4';
+
+            fetch(`http://localhost:1337/v1/models/${modelId}`, {
+              method: 'GET',
+              headers: {'accept': 'application/json'}
+            })
+            .then(res => res.json())
+            .then(json => console.log(json));
+        - lang: Node.js
+          source: |-
+            const fetch = require('node-fetch');
+
+            const modelId = 'mistral-ins-7b-q4';
+
+            fetch(`http://localhost:1337/v1/models/${modelId}`, {
+              method: 'GET',
+              headers: {'accept': 'application/json'}
+            })
+            .then(res => res.json())
+            .then(json => console.log(json));
+        - lang: Python
+          source: >-
+            import requests
+
+
+            model_id = 'mistral-ins-7b-q4'
+
+
+            response = requests.get(f'http://localhost:1337/v1/models/{model_id}', headers={'accept': 'application/json'})
+
+            print(response.json())
+    delete:
+      operationId: deleteModel
+      tags:
+        - Models
+      summary: Delete model
+      description: >
+        Delete a model. <a  href =
+        "https://platform.openai.com/docs/api-reference/models/delete">
+        Equivalent to OpenAI's delete model. </a>
+      parameters:
+        - in: path
+          name: model_id
+          required: true
+          schema:
+            type: string
+            example: mistral-ins-7b-q4
+          description: |
+            The model id to delete
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: specs/models.yaml#/components/schemas/DeleteModelResponse
+      x-codeSamples:
+        - lang: cURL
+          source: |-
+            curl -X 'DELETE' \
+              'http://localhost:1337/v1/models/{model_id}' \
+              -H 'accept: application/json'
+        - lang: JavaScript
+          source: |-
+            const fetch = require('node-fetch');
+
+            const modelId = 'mistral-ins-7b-q4';
+
+            fetch(`http://localhost:1337/v1/models/${modelId}`, {
+              method: 'DELETE',
+              headers: { 'accept': 'application/json' }
+            })
+              .then(res => res.json())
+              .then(json => console.log(json));
+        - lang: Node.js
+          source: |-
+            const fetch = require('node-fetch');
+
+            const modelId = 'mistral-ins-7b-q4';
+
+            fetch(`http://localhost:1337/v1/models/${modelId}`, {
+              method: 'DELETE',
+              headers: { 'accept': 'application/json' }
+            })
+              .then(res => res.json())
+              .then(json => console.log(json));
+        - lang: Python
+          source: >-
+            import requests
+
+
+            model_id = 'mistral-ins-7b-q4'
+
+
+            response = requests.delete(f'http://localhost:1337/v1/models/{model_id}', headers={'accept': 'application/json'})
+  /threads:
+    post:
+      operationId: createThread
+      tags:
+        - Threads
+      summary: Create thread
+      description: >
+        Create a thread.  <a  href =
+        "https://platform.openai.com/docs/api-reference/threads/createThread">
+        Equivalent to OpenAI's create thread. </a>
+      requestBody:
+        required: false
+        content:
+          application/json:
+            schema:
+              $ref: specs/threads.yaml#/components/schemas/CreateThreadObject
+      responses:
+        '200':
+          description: Thread created successfully
+          content:
+            application/json:
+              schema:
+                $ref: specs/threads.yaml#/components/schemas/CreateThreadResponse
+      x-codeSamples:
+        - lang: cURL
+          source: |
+            curl -X POST http://localhost:1337/v1/threads \  
+              -H "Content-Type: application/json" \  
+              -d '{
+                  "messages": [{
+                      "role": "user",
+                      "content": "Hello, what is AI?",
+                      "file_ids": ["file-abc123"]
+                  }, {
+                      "role": "user",
+                      "content": "How does AI work? Explain it in simple terms."
+                  }]
+              }'
+        - lang: JavaScript
+          source: |-
+            const fetch = require('node-fetch');
+
+            fetch('http://localhost:1337/v1/threads', {
+              method: 'POST',
+              headers: {
+                'Content-Type': 'application/json'
+              },
+              body: JSON.stringify({
+                messages: [
+                  {
+                    role: 'user',
+                    content: 'Hello, what is AI?',
+                    file_ids: ['file-abc123']
+                  },
+                  {
+                    role: 'user',
+                    content: 'How does AI work? Explain it in simple terms.'
+                  }
+                ]
+              })
+            });
+        - lang: Node.js
+          source: |-
+            const fetch = require('node-fetch');
+
+            fetch('http://localhost:1337/v1/threads', {
+              method: 'POST',
+              headers: {
+                'Content-Type': 'application/json'
+              },
+              body: JSON.stringify({
+                messages: [
+                  {
+                    role: 'user',
+                    content: 'Hello, what is AI?',
+                    file_ids: ['file-abc123']
+                  },
+                  {
+                    role: 'user',
+                    content: 'How does AI work? Explain it in simple terms.'
+                  }
+                ]
+              })
+            });
+        - lang: Python
+          source: |-
+            import requests
+
+            url = 'http://localhost:1337/v1/threads'
+            payload = {
+              'messages': [
+                {
+                  'role': 'user',
+                  'content': 'Hello, what is AI?',
+                  'file_ids': ['file-abc123']
+                },
+                {
+                  'role': 'user',
+                  'content': 'How does AI work? Explain it in simple terms.'
+                }
+              ]
+            }
+
+            response = requests.post(url, json=payload)
+            print(response.text)
+    get:
+      operationId: listThreads
+      tags:
+        - Threads
+      summary: List threads
+      description: |
+        Retrieves a list of all threads available in the system.
+      responses:
+        '200':
+          description: List of threads retrieved successfully
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: specs/threads.yaml#/components/schemas/ThreadObject
+                example:
+                  - id: thread_abc123
+                    object: thread
+                    created_at: 1699014083
+                    assistants:
+                      - assistant-001
+                    metadata: {}
+                    messages: []
+                  - id: thread_abc456
+                    object: thread
+                    created_at: 1699014083
+                    assistants:
+                      - assistant-002
+                      - assistant-003
+                    metadata: {}
+      x-codeSamples:
+        - lang: cURL
+          source: |-
+            curl http://localhost:1337/v1/threads \ 
+             -H "Content-Type: application/json"
+        - lang: JavaScript
+          source: |-
+            const fetch = require('node-fetch'); 
+
+            fetch('http://localhost:1337/v1/threads', { 
+             method: 'GET', 
+             headers: {'Content-Type': 'application/json'} 
+            }).then(res => res.json()) 
+            .then(json => console.log(json));
+        - lang: Node.js
+          source: |-
+            const fetch = require('node-fetch');
+
+            fetch('http://localhost:1337/v1/threads', {
+             method: 'GET',
+             headers: {'Content-Type': 'application/json'}
+            }).then(res => res.json())
+            .then(json => console.log(json));
+        - lang: Python
+          source: |-
+            import requests
+
+            url = 'http://localhost:1337/v1/threads'
+            headers = {'Content-Type': 'application/json'}
+
+            response = requests.get(url, headers=headers)
+            print(response.json())
+  '/threads/{thread_id}':
+    get:
+      operationId: getThread
+      tags:
+        - Threads
+      summary: Retrieve thread
+      description: >
+        Retrieves detailed information about a specific thread using its
+        thread_id.  <a  href =
+        "https://platform.openai.com/docs/api-reference/threads/getThread">
+        Equivalent to OpenAI's retrieve thread. </a>
+      parameters:
+        - in: path
+          name: thread_id
+          required: true
+          schema:
+            type: string
+          description: |
+            The ID of the thread to retrieve.
+      responses:
+        '200':
+          description: Thread details retrieved successfully
+          content:
+            application/json:
+              schema:
+                $ref: specs/threads.yaml#/components/schemas/GetThreadResponse
+      x-codeSamples:
+        - lang: cURL
+          source: |
+            curl http://localhost:1337/v1/threads/{thread_id}
+    patch:
+      operationId: modifyThread
+      tags:
+        - Threads
+      summary: Modify thread
+      description: >
+        Modifies a thread.  <a  href =
+        "https://platform.openai.com/docs/api-reference/threads/modifyThread">
+        Equivalent to OpenAI's modify thread. </a>
+      parameters:
+        - in: path
+          name: thread_id
+          required: true
+          schema:
+            type: string
+          description: |
+            The ID of the thread to be modified.
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                title:
+                  type: string
+                  description: Set the title of the thread
+                  items:
+                    $ref: specs/threads.yaml#/components/schemas/ThreadMessageObject
+      responses:
+        '200':
+          description: Thread modified successfully
+          content:
+            application/json:
+              schema:
+                $ref: specs/threads.yaml#/components/schemas/ModifyThreadResponse
+      x-codeSamples:
+        - lang: cURL
+          source: |
+            curl -X POST http://localhost:1337/v1/threads/{thread_id} \
+              -H "Content-Type: application/json" \
+              -d '{
+                  "messages": [{
+                      "role": "user",
+                      "content": "Hello, what is AI?",
+                      "file_ids": ["file-abc123"]
+                  }, {
+                      "role": "user",
+                      "content": "How does AI work? Explain it in simple terms."
+                  }]
+              }'
+    delete:
+      operationId: deleteThread
+      tags:
+        - Threads
+      summary: Delete thread
+      description: >
+        Delete a thread.  <a  href =
+        "https://platform.openai.com/docs/api-reference/threads/deleteThread">
+        Equivalent to OpenAI's delete thread. </a>
+      parameters:
+        - in: path
+          name: thread_id
+          required: true
+          schema:
+            type: string
+          description: |
+            The ID of the thread to be deleted.
+      responses:
+        '200':
+          description: Thread deleted successfully
+          content:
+            application/json:
+              schema:
+                $ref: specs/threads.yaml#/components/schemas/DeleteThreadResponse
+      x-codeSamples:
+        - lang: cURL
+          source: |
+            curl -X DELETE http://localhost:1337/v1/threads/{thread_id}
+  /assistants:
+    get:
+      operationId: listAssistants
+      tags:
+        - Assistants
+      summary: List assistants
+      description: >
+        Return a list of assistants. <a href =
+        "https://platform.openai.com/docs/api-reference/assistants/listAssistants">
+        Equivalent to OpenAI's list assistants. </a>
+      responses:
+        '200':
+          description: List of assistants retrieved successfully
+          content:
+            application/json:
+              schema:
+                type: array
+                example:
+                  - id: asst_abc123
+                    object: assistant
+                    version: 1
+                    created_at: 1698984975
+                    name: Math Tutor
+                    description: null
+                    avatar: https://pic.png
+                    models:
+                      - model_id: model_0
+                    instructions: Be concise
+                    events:
+                      in: []
+                      out: []
+                    metadata: {}
+                  - id: asst_abc456
+                    object: assistant
+                    version: 1
+                    created_at: 1698984975
+                    name: Physics Tutor
+                    description: null
+                    avatar: https://pic.png
+                    models:
+                      - model_id: model_1
+                    instructions: Be concise!
+                    events:
+                      in: []
+                      out: []
+                    metadata: {}
+      x-codeSamples:
+        - lang: cURL
+          source: |-
+            curl http://localhost:1337/v1/assistants \
+              -H "Content-Type: application/json"
+        - lang: JavaScript
+          source: |-
+            fetch('http://localhost:1337/v1/assistants', {
+              method: 'GET',
+              headers: {
+                'Content-Type': 'application/json'
+              }
+            })
+        - lang: Node.js
+          source: |-
+            const fetch = require('node-fetch');
+
+            fetch('http://localhost:1337/v1/assistants', {
+              method: 'GET',
+              headers: {
+                'Content-Type': 'application/json'
+              }
+            })
+        - lang: Python
+          source: |-
+            import requests
+
+            url = 'http://localhost:1337/v1/assistants'
+            headers = {'Content-Type': 'application/json'}
+
+            response = requests.get(url, headers=headers)
+  '/assistants/{assistant_id}':
+    get:
+      operationId: getAssistant
+      tags:
+        - Assistants
+      summary: Retrieve assistant
+      description: >
+        Retrieves an assistant. <a href =
+        "https://platform.openai.com/docs/api-reference/assistants/getAssistant">
+        Equivalent to OpenAI's retrieve assistants. </a>
+      parameters:
+        - in: path
+          name: assistant_id
+          required: true
+          schema:
+            type: string
+            example: jan
+          description: |
+            The ID of the assistant to retrieve.
+      responses:
+        '200':
+          description: null
+          content:
+            application/json:
+              schema:
+                $ref: specs/assistants.yaml#/components/schemas/RetrieveAssistantResponse
+      x-codeSamples:
+        - lang: cURL
+          source: |-
+            curl http://localhost:1337/v1/assistants/{assistant_id} \
+              -H "Content-Type: application/json"
+        - lang: JavaScript
+          source: |-
+            const fetch = require('node-fetch');
+
+            let assistantId = 'abc123';
+
+            fetch(`http://localhost:1337/v1/assistants/${assistantId}`, {
+              method: 'GET',
+              headers: {
+                'Content-Type': 'application/json'
+              }
+            })
+        - lang: Node.js
+          source: |-
+            const fetch = require('node-fetch');
+
+            let assistantId = 'abc123';
+
+            fetch(`http://localhost:1337/v1/assistants/${assistantId}`, {
+              method: 'GET',
+              headers: {
+                'Content-Type': 'application/json'
+              }
+            })
+        - lang: Python
+          source: >-
+            import requests
+
+
+            assistant_id = 'abc123'
+
+
+            response = requests.get(f'http://localhost:1337/v1/assistants/{assistant_id}', headers={'Content-Type': 'application/json'})
+  '/threads/{thread_id}/messages':
+    get:
+      operationId: listMessages
+      tags:
+        - Messages
+      summary: List messages
+      description: >
+        Retrieves all messages from the given thread. <a  href =
+        "https://platform.openai.com/docs/api-reference/messages/listMessages">
+        Equivalent to OpenAI's list messages. </a>
+      parameters:
+        - in: path
+          name: thread_id
+          required: true
+          schema:
+            type: string
+          description: |
+            The ID of the thread from which to retrieve messages.
+      responses:
+        '200':
+          description: List of messages retrieved successfully
+          content:
+            application/json:
+              schema:
+                $ref: specs/messages.yaml#/components/schemas/ListMessagesResponse
+      x-codeSamples:
+        - lang: cURL
+          source: |
+            curl http://localhost:1337/v1/threads/{thread_id}/messages \
+              -H "Content-Type: application/json"
+    post:
+      operationId: createMessage
+      tags:
+        - Messages
+      summary: Create message
+      description: >
+        Create a message. <a  href =
+        "https://platform.openai.com/docs/api-reference/messages/createMessage">
+        Equivalent to OpenAI's list messages. </a>
+      parameters:
+        - in: path
+          name: thread_id
+          required: true
+          schema:
+            type: string
+          description: |
+            The ID of the thread to which the message will be posted.
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                role:
+                  type: string
+                  description: |
+                    Role of the sender, either 'user' or 'assistant'.
+                  example: user
+                  enum:
+                    - user
+                    - assistant
+                content:
+                  type: string
+                  description: |
+                    Text content of the message.
+                  example: How does AI work? Explain it in simple terms.
+              required:
+                - role
+                - content
+      responses:
+        '200':
+          description: Message created successfully
+          content:
+            application/json:
+              schema:
+                $ref: specs/messages.yaml#/components/schemas/CreateMessageResponse
+      x-codeSamples:
+        - lang: cURL
+          source: |
+            curl -X POST http://localhost:1337/v1/threads/{thread_id}/messages \
+              -H "Content-Type: application/json" \
+              -d '{
+                "role": "user",
+                "content": "How does AI work? Explain it in simple terms."
+              }'
+  '/threads/{thread_id}/messages/{message_id}':
+    get:
+      operationId: retrieveMessage
+      tags:
+        - Messages
+      summary: Retrieve message
+      description: >
+        Retrieve a specific message from a thread using its thread_id and
+        message_id. <a  href =
+        "https://platform.openai.com/docs/api-reference/messages/getMessage">
+        Equivalent to OpenAI's retrieve messages. </a>
+      parameters:
+        - in: path
+          name: thread_id
+          required: true
+          schema:
+            type: string
+          description: |
+            The ID of the thread containing the message.
+        - in: path
+          name: message_id
+          required: true
+          schema:
+            type: string
+          description: |
+            The ID of the message to retrieve.
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: specs/messages.yaml#/components/schemas/GetMessageResponse
+      x-codeSamples:
+        - lang: cURL
+          source: >
+            curl http://localhost:1337/v1/threads/{thread_id}/messages/{message_id}
+            \
+              -H "Content-Type: application/json"
+x-webhooks:
+  ModelObject:
+    post:
+      summary: The model object
+      description: >
+        Describe a model offering that can be used with the API. <a  href =
+        "https://platform.openai.com/docs/api-reference/models/object">
+        Equivalent to OpenAI's model object. </a>
+      operationId: ModelObject
+      tags:
+        - Models
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: specs/models.yaml#/components/schemas/ModelObject
+  AssistantObject:
+    post:
+      summary: The assistant object
+      description: >
+        Build assistants that can call models and use tools to perform
+        tasks. <a  href =
+        "https://platform.openai.com/docs/api-reference/assistants"> Equivalent
+        to OpenAI's assistants object. </a>
+      operationId: AssistantObjects
+      tags:
+        - Assistants
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: specs/assistants.yaml#/components/schemas/AssistantObject
+  MessageObject:
+    post:
+      summary: The message object
+      description: >
+        Information about a message in the thread.  <a  href =
+        "https://platform.openai.com/docs/api-reference/messages/object">
+        Equivalent to OpenAI's message object. </a>
+      operationId: MessageObject
+      tags:
+        - Messages
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: specs/messages.yaml#/components/schemas/MessageObject
+  ThreadObject:
+    post:
+      summary: The thread object
+      description: Represents a thread that contains messages. <a  href =
+        "https://platform.openai.com/docs/api-reference/threads/object">
+        Equivalent to OpenAI's thread object. </a>
+      operationId: ThreadObject
+      tags:
+        - Threads
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: specs/threads.yaml#/components/schemas/ThreadObject
diff --git a/docs/openapi/specs/assistants.yaml b/docs/openapi/specs/assistants.yaml
new file mode 100644
index 000000000..5db1f6a97
--- /dev/null
+++ b/docs/openapi/specs/assistants.yaml
@@ -0,0 +1,319 @@
+---
+components:
+  schemas:
+    AssistantObject:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the assistant.
+          example: asst_abc123
+        object:
+          type: string
+          description: Type of the object, indicating it's an assistant.
+          default: assistant
+        version:
+          type: integer
+          description: Version number of the assistant.
+          example: 1
+        created_at:
+          type: integer
+          format: int64
+          description: Unix timestamp representing the creation time of the assistant.
+          example: 1698984975
+        name:
+          type: string
+          description: Name of the assistant.
+          example: Math Tutor
+        description:
+          type: string
+          description: Description of the assistant. Can be null.
+          example: null
+        avatar:
+          type: string
+          description: URL of the assistant's avatar. Jan-specific property.
+          example: https://pic.png
+        models:
+          type: array
+          description: List of models associated with the assistant. Jan-specific property.
+          items:
+            type: object
+            properties:
+              model_id:
+                type: string
+                example: model_0
+        instructions:
+          type: string
+          description: A system prompt for the assistant.
+          example: Be concise
+        events:
+          type: object
+          description: Event subscription settings for the assistant.
+          properties:
+            in:
+              type: array
+              items:
+                type: string
+            out:
+              type: array
+              items:
+                type: string
+        metadata:
+          type: object
+          description: Metadata associated with the assistant.
+    ListAssistantsResponse: null
+    CreateAssistantResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the assistant.
+          example: asst_abc123
+        object:
+          type: string
+          description: Type of the object, indicating it's an assistant.
+          default: assistant
+        version:
+          type: integer
+          description: Version number of the assistant.
+          example: 1
+        created_at:
+          type: integer
+          format: int64
+          description: Unix timestamp representing the creation time of the assistant.
+          example: 1698984975
+        name:
+          type: string
+          description: Name of the assistant.
+          example: Math Tutor
+        description:
+          type: string
+          description: Description of the assistant. Can be null.
+          example: null
+        avatar:
+          type: string
+          description: URL of the assistant's avatar. Jan-specific property.
+          example: https://pic.png
+        models:
+          type: array
+          description: List of models associated with the assistant. Jan-specific property.
+          items:
+            type: object
+            properties:
+              model_id:
+                type: string
+                example: model_0
+        instructions:
+          type: string
+          description: A system prompt for the assistant.
+          example: Be concise
+        events:
+          type: object
+          description: Event subscription settings for the assistant.
+          properties:
+            in:
+              type: array
+              items:
+                type: string
+            out:
+              type: array
+              items:
+                type: string
+        metadata:
+          type: object
+          description: Metadata associated with the assistant.
+    RetrieveAssistantResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the assistant.
+          example: asst_abc123
+        object:
+          type: string
+          description: Type of the object, indicating it's an assistant.
+          default: assistant
+        version:
+          type: integer
+          description: Version number of the assistant.
+          example: 1
+        created_at:
+          type: integer
+          format: int64
+          description: Unix timestamp representing the creation time of the assistant.
+          example: 1698984975
+        name:
+          type: string
+          description: Name of the assistant.
+          example: Math Tutor
+        description:
+          type: string
+          description: Description of the assistant. Can be null.
+          example: null
+        avatar:
+          type: string
+          description: URL of the assistant's avatar. Jan-specific property.
+          example: https://pic.png
+        models:
+          type: array
+          description: List of models associated with the assistant. Jan-specific property.
+          items:
+            type: object
+            properties:
+              model_id:
+                type: string
+                example: model_0
+        instructions:
+          type: string
+          description: A system prompt for the assistant.
+          example: Be concise
+        events:
+          type: object
+          description: Event subscription settings for the assistant.
+          properties:
+            in:
+              type: array
+              items:
+                type: string
+            out:
+              type: array
+              items:
+                type: string
+        metadata:
+          type: object
+          description: Metadata associated with the assistant.
+    ModifyAssistantObject:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the assistant.
+          example: asst_abc123
+        object:
+          type: string
+          description: Type of the object, indicating it's an assistant.
+          default: assistant
+        version:
+          type: integer
+          description: Version number of the assistant.
+          example: 1
+        created_at:
+          type: integer
+          format: int64
+          description: Unix timestamp representing the creation time of the assistant.
+          example: 1698984975
+        name:
+          type: string
+          description: Name of the assistant.
+          example: Math Tutor
+        description:
+          type: string
+          description: Description of the assistant. Can be null.
+          example: null
+        avatar:
+          type: string
+          description: URL of the assistant's avatar. Jan-specific property.
+          example: https://pic.png
+        models:
+          type: array
+          description: List of models associated with the assistant. Jan-specific property.
+          items:
+            type: object
+            properties:
+              model_id:
+                type: string
+                example: model_0
+        instructions:
+          type: string
+          description: A system prompt for the assistant.
+          example: Be concise
+        events:
+          type: object
+          description: Event subscription settings for the assistant.
+          properties:
+            in:
+              type: array
+              items:
+                type: string
+            out:
+              type: array
+              items:
+                type: string
+        metadata:
+          type: object
+          description: Metadata associated with the assistant.
+    ModifyAssistantResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the assistant.
+          example: asst_abc123
+        object:
+          type: string
+          description: Type of the object, indicating it's an assistant.
+          default: assistant
+        version:
+          type: integer
+          description: Version number of the assistant.
+          example: 1
+        created_at:
+          type: integer
+          format: int64
+          description: Unix timestamp representing the creation time of the assistant.
+          example: 1698984975
+        name:
+          type: string
+          description: Name of the assistant.
+          example: Physics Tutor
+        description:
+          type: string
+          description: Description of the assistant. Can be null.
+          example: null
+        avatar:
+          type: string
+          description: URL of the assistant's avatar. Jan-specific property.
+          example: https://pic.png
+        models:
+          type: array
+          description: List of models associated with the assistant. Jan-specific property.
+          items:
+            type: object
+            properties:
+              model_id:
+                type: string
+                example: model_0
+        instructions:
+          type: string
+          description: A system prompt for the assistant.
+          example: Be concise!
+        events:
+          type: object
+          description: Event subscription settings for the assistant.
+          properties:
+            in:
+              type: array
+              items:
+                type: string
+            out:
+              type: array
+              items:
+                type: string
+        metadata:
+          type: object
+          description: Metadata associated with the assistant.
+    DeleteAssistantResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the deleted assistant.
+          example: asst_abc123
+        object:
+          type: string
+          description: Type of the object, indicating the assistant has been deleted.
+          example: assistant.deleted
+        deleted:
+          type: boolean
+          description: Indicates whether the assistant was successfully deleted.
+          example: true
diff --git a/docs/openapi/specs/chat.yaml b/docs/openapi/specs/chat.yaml
new file mode 100644
index 000000000..c9358d796
--- /dev/null
+++ b/docs/openapi/specs/chat.yaml
@@ -0,0 +1,196 @@
+---
+components:
+  schemas:
+    ChatObject:
+      type: object
+      properties:
+        messages:
+          type: arrays
+          description: |
+            Contains input data or prompts for the model to process.
+          example:
+            - content: 'Hello there :wave:'
+              role: assistant
+            - content: Can you write a long story
+              role: user
+        stream:
+          type: boolean
+          default: true
+          description:
+            Enables continuous output generation, allowing for streaming of
+            model responses.
+        model:
+          type: string
+          example: gpt-3.5-turbo
+          description: Specifies the model being used for inference or processing tasks.
+        max_tokens:
+          type: number
+          default: 2048
+          description:
+            The maximum number of tokens the model will generate in a single
+            response.
+        stop:
+          type: arrays
+          example:
+            - hello
+          description:
+            Defines specific tokens or phrases at which the model will stop
+            generating further output/
+        frequency_penalty:
+          type: number
+          default: 0
+          description:
+            Adjusts the likelihood of the model repeating words or phrases in
+            its output.
+        presence_penalty:
+          type: number
+          default: 0
+          description:
+            Influences the generation of new and varied concepts in the model's
+            output.
+        temperature:
+          type: number
+          default: 0.7
+          min: 0
+          max: 1
+          description: Controls the randomness of the model's output.
+        top_p:
+          type: number
+          default: 0.95
+          min: 0
+          max: 1
+          description: Set probability threshold for more relevant outputs.
+        cache_prompt:
+          type: boolean
+          default: true
+          description: Optimize performance in repeated or similar requests.
+    ChatCompletionRequest:
+      type: object
+      properties:
+        messages:
+          type: arrays
+          description: |
+            Contains input data or prompts for the model to process.
+          example:
+            - content: You are a helpful assistant.
+              role: system
+            - content: Hello!
+              role: user
+        model:
+          type: string
+          example: tinyllama-1.1b
+          description: |
+            Specifies the model being used for inference or processing tasks.
+        stream:
+          type: boolean
+          default: true
+          description: >
+            Enables continuous output generation, allowing for streaming of
+            model responses.
+        max_tokens:
+          type: number
+          default: 2048
+          description: >
+            The maximum number of tokens the model will generate in a single
+            response.
+        stop:
+          type: arrays
+          example:
+            - hello
+          description: >
+            Defines specific tokens or phrases at which the model will stop
+            generating further output.
+        frequency_penalty:
+          type: number
+          default: 0
+          description: >
+            Adjusts the likelihood of the model repeating words or phrases in
+            its output.
+        presence_penalty:
+          type: number
+          default: 0
+          description: >
+            Influences the generation of new and varied concepts in the model's
+            output.
+        temperature:
+          type: number
+          default: 0.7
+          min: 0
+          max: 1
+          description: |
+            Controls the randomness of the model's output.
+        top_p:
+          type: number
+          default: 0.95
+          min: 0
+          max: 1
+          description: |
+            Set probability threshold for more relevant outputs.
+    ChatCompletionResponse:
+      type: object
+      description: Description of the response structure
+      properties:
+        choices:
+          type: array
+          description: Array of choice objects
+          items:
+            type: object
+            properties:
+              finish_reason:
+                type: string
+                nullable: true
+                example: null
+                description: Reason for finishing the response, if applicable
+              index:
+                type: integer
+                example: 0
+                description: Index of the choice
+              message:
+                type: object
+                properties:
+                  content:
+                    type: string
+                    example: Hello user. What can I help you with?
+                    description: Content of the message
+                  role:
+                    type: string
+                    example: assistant
+                    description: Role of the sender
+        created:
+          type: integer
+          example: 1700193928
+          description: Timestamp of when the response was created
+        id:
+          type: string
+          example: ebwd2niJvJB1Q2Whyvkz
+          description: Unique identifier of the response
+        model:
+          type: string
+          nullable: true
+          example: _
+          description: Model used for generating the response
+        object:
+          type: string
+          example: chat.completion
+          description: Type of the response object
+        system_fingerprint:
+          type: string
+          nullable: true
+          example: _
+          description: System fingerprint
+        usage:
+          type: object
+          description: Information about the usage of tokens
+          properties:
+            completion_tokens:
+              type: integer
+              example: 500
+              description: Number of tokens used for completion
+            prompt_tokens:
+              type: integer
+              example: 33
+              description: Number of tokens used in the prompt
+            total_tokens:
+              type: integer
+              example: 533
+              description: Total number of tokens used
diff --git a/docs/openapi/specs/messages.yaml b/docs/openapi/specs/messages.yaml
new file mode 100644
index 000000000..22d82b787
--- /dev/null
+++ b/docs/openapi/specs/messages.yaml
@@ -0,0 +1,313 @@
+---
+components:
+  schemas:
+    MessageObject:
+      type: object
+      properties:
+        id:
+          type: string
+          description: |
+            Sequential or UUID identifier of the message.
+          example: 0
+        object:
+          type: string
+          description: |
+            Type of the object, defaults to 'thread.message'.
+          example: thread.message
+        created_at:
+          type: integer
+          format: int64
+          description: |
+            Unix timestamp representing the creation time of the message.
+        thread_id:
+          type: string
+          description: >
+            Identifier of the thread to which this message belongs. Defaults to
+            parent thread.
+          example: thread_asdf
+        assistant_id:
+          type: string
+          description: >
+            Identifier of the assistant involved in the message. Defaults to
+            parent thread.
+          example: jan
+        role:
+          type: string
+          enum:
+            - user
+            - assistant
+          description: |
+            Role of the sender, either 'user' or 'assistant'.
+        content:
+          type: array
+          items:
+            type: object
+            properties:
+              type:
+                type: string
+                description: |
+                  Type of content, e.g., 'text'.
+              text:
+                type: object
+                properties:
+                  value:
+                    type: string
+                    description: |
+                      Text content of the message.
+                    example: Hi!?
+                  annotations:
+                    type: array
+                    items:
+                      type: string
+                    description: |
+                      Annotations for the text content, if any.
+                    example: []
+        metadata:
+          type: object
+          description: |
+            Metadata associated with the message, defaults to an empty object.
+          example: {}
+    GetMessageResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the message.
+          example: msg_abc123
+        object:
+          type: string
+          description: Type of the object, indicating it's a thread message.
+          default: thread.message
+        created_at:
+          type: integer
+          format: int64
+          description: Unix timestamp representing the creation time of the message.
+          example: 1699017614
+        thread_id:
+          type: string
+          description: Identifier of the thread to which this message belongs.
+          example: thread_abc123
+        role:
+          type: string
+          description: Role of the sender, either 'user' or 'assistant'.
+          example: user
+        content:
+          type: array
+          items:
+            type: object
+            properties:
+              type:
+                type: string
+                description: Type of content, e.g., 'text'.
+                example: text
+              text:
+                type: object
+                properties:
+                  value:
+                    type: string
+                    description: Text content of the message.
+                    example: How does AI work? Explain it in simple terms.
+                  annotations:
+                    type: array
+                    items:
+                      type: string
+                    description: Annotations for the text content, if any.
+                    example: []
+        file_ids:
+          type: array
+          items:
+            type: string
+          description: Array of file IDs associated with the message, if any.
+          example: []
+        assistant_id:
+          type: string
+          description: Identifier of the assistant involved in the message, if applicable.
+          example: null
+        run_id:
+          type: string
+          description: Run ID associated with the message, if applicable.
+          example: null
+        metadata:
+          type: object
+          description: Metadata associated with the message.
+          example: {}
+    CreateMessageResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the created message.
+          example: msg_abc123
+        object:
+          type: string
+          description: Type of the object, indicating it's a thread message.
+          example: thread.message
+        created_at:
+          type: integer
+          format: int64
+          description: Unix timestamp representing the creation time of the message.
+          example: 1699017614
+        thread_id:
+          type: string
+          description: Identifier of the thread to which this message belongs.
+          example: thread_abc123
+        role:
+          type: string
+          description: Role of the sender, either 'user' or 'assistant'.
+          example: user
+        content:
+          type: array
+          items:
+            type: object
+            properties:
+              type:
+                type: string
+                description: Type of content, e.g., 'text'.
+                example: text
+              text:
+                type: object
+                properties:
+                  value:
+                    type: string
+                    description: Text content of the message.
+                    example: How does AI work? Explain it in simple terms.
+                  annotations:
+                    type: array
+                    items:
+                      type: string
+                    description: Annotations for the text content, if any.
+                    example: []
+        file_ids:
+          type: array
+          items:
+            type: string
+          description: Array of file IDs associated with the message, if any.
+          example: []
+        assistant_id:
+          type: string
+          description: Identifier of the assistant involved in the message, if applicable.
+          example: null
+        run_id:
+          type: string
+          description: Run ID associated with the message, if applicable.
+          example: null
+        metadata:
+          type: object
+          description: Metadata associated with the message.
+          example: {}
+    ListMessagesResponse:
+      type: object
+      properties:
+        object:
+          type: string
+          description: Type of the object, indicating it's a list.
+          default: list
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ListMessageObject'
+        first_id:
+          type: string
+          description: Identifier of the first message in the list.
+          example: msg_abc123
+        last_id:
+          type: string
+          description: Identifier of the last message in the list.
+          example: msg_abc456
+        has_more:
+          type: boolean
+          description: Indicates whether there are more messages to retrieve.
+          example: false
+    ListMessageObject:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the message.
+          example: msg_abc123
+        object:
+          type: string
+          description: Type of the object, indicating it's a thread message.
+          example: thread.message
+        created_at:
+          type: integer
+          format: int64
+          description: Unix timestamp representing the creation time of the message.
+          example: 1699017614
+        thread_id:
+          type: string
+          description: Identifier of the thread to which this message belongs.
+          example: thread_abc123
+        role:
+          type: string
+          description: Role of the sender, either 'user' or 'assistant'.
+          example: user
+        content:
+          type: array
+          items:
+            type: object
+            properties:
+              type:
+                type: string
+                description: Type of content, e.g., 'text'.
+              text:
+                type: object
+                properties:
+                  value:
+                    type: string
+                    description: Text content of the message.
+                    example: How does AI work? Explain it in simple terms.
+                  annotations:
+                    type: array
+                    items:
+                      type: string
+                    description: Annotations for the text content, if any.
+        file_ids:
+          type: array
+          items:
+            type: string
+          description: Array of file IDs associated with the message, if any.
+          example: []
+        assistant_id:
+          type: string
+          description: Identifier of the assistant involved in the message, if applicable.
+          example: null
+        run_id:
+          type: string
+          description: Run ID associated with the message, if applicable.
+          example: null
+        metadata:
+          type: object
+          description: Metadata associated with the message.
+          example: {}
+    MessageFileObject:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the file.
+          example: file-abc123
+        object:
+          type: string
+          description: Type of the object, indicating it's a thread message file.
+          example: thread.message.file
+        created_at:
+          type: integer
+          format: int64
+          description: Unix timestamp representing the creation time of the file.
+          example: 1699061776
+        message_id:
+          type: string
+          description: Identifier of the message to which this file is associated.
+          example: msg_abc123
+    ListMessageFilesResponse:
+      type: object
+      properties:
+        object:
+          type: string
+          description: Type of the object, indicating it's a list.
+          default: list
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/MessageFileObject'
diff --git a/docs/openapi/specs/models.yaml b/docs/openapi/specs/models.yaml
new file mode 100644
index 000000000..ff2040bb5
--- /dev/null
+++ b/docs/openapi/specs/models.yaml
@@ -0,0 +1,259 @@
+---
+components:
+  schemas:
+    ListModelsResponse:
+      type: object
+      properties:
+        object:
+          type: string
+          enum:
+            - list
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Model'
+      required:
+        - object
+        - data
+    Model:
+      type: object
+      properties:
+        source_url:
+          type: string
+          format: uri
+          description: URL to the source of the model.
+          example: https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf
+        id:
+          type: string
+          description:
+            Unique identifier used in chat-completions model_name, matches
+            folder name.
+          example: trinity-v1.2-7b
+        object:
+          type: string
+          example: model
+        name:
+          type: string
+          description: Name of the model.
+          example: Trinity-v1.2 7B Q4
+        version:
+          type: string
+          default: '1.0'
+          description: The version number of the model.
+        description:
+          type: string
+          description: Description of the model.
+          example:
+            Trinity is an experimental model merge using the Slerp method.
+            Recommended for daily assistance purposes.
+        format:
+          type: string
+          description: State format of the model, distinct from the engine.
+          example: gguf
+        settings:
+          type: object
+          properties:
+            ctx_len:
+              type: integer
+              description: Context length.
+              example: 4096
+            prompt_template:
+              type: string
+              example: "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
+          additionalProperties: false
+        parameters:
+          type: object
+          properties:
+            temperature:
+              example: 0.7
+            top_p:
+              example: 0.95
+            stream:
+              example: true
+            max_tokens:
+              example: 4096
+            stop:
+              example: []
+            frequency_penalty:
+              example: 0
+            presence_penalty:
+              example: 0
+          additionalProperties: false
+        metadata:
+          author:
+            type: string
+            example: Jan
+          tags:
+            example:
+              - 7B
+              - Merged
+              - Featured
+          size:
+            example: 4370000000,
+          cover:
+            example: https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png
+        engine:
+          example: nitro
+    ModelObject:
+      type: object
+      properties:
+        id:
+          type: string
+          description: |
+            The identifier of the model.
+          example: trinity-v1.2-7b
+        object:
+          type: string
+          description: |
+            The type of the object, indicating it's a model.
+          default: model
+        created:
+          type: integer
+          format: int64
+          description: |
+            Unix timestamp representing the creation time of the model.
+          example: 1253935178
+        owned_by:
+          type: string
+          description: |
+            The entity that owns the model.
+          example: _
+    GetModelResponse:
+      type: object
+      properties:
+        source_url:
+          type: string
+          format: uri
+          description: URL to the source of the model.
+          example: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf
+        id:
+          type: string
+          description:
+            Unique identifier used in chat-completions model_name, matches
+            folder name.
+          example: mistral-ins-7b-q4
+        object:
+          type: string
+          example: model
+        name:
+          type: string
+          description: Name of the model.
+          example: Mistral Instruct 7B Q4
+        version:
+          type: string
+          default: '1.0'
+          description: The version number of the model.
+        description:
+          type: string
+          description: Description of the model.
+          example:
+            Trinity is an experimental model merge using the Slerp method.
+            Recommended for daily assistance purposes.
+        format:
+          type: string
+          description: State format of the model, distinct from the engine.
+          example: gguf
+        settings:
+          type: object
+          properties:
+            ctx_len:
+              type: integer
+              description: Context length.
+              example: 4096
+            prompt_template:
+              type: string
+              example: '[INST] {prompt} [/INST]'
+          additionalProperties: false
+        parameters:
+          type: object
+          properties:
+            temperature:
+              example: 0.7
+            top_p:
+              example: 0.95
+            stream:
+              example: true
+            max_tokens:
+              example: 4096
+            stop:
+              example: []
+            frequency_penalty:
+              example: 0
+            presence_penalty:
+              example: 0
+          additionalProperties: false
+        metadata:
+          author:
+            type: string
+            example: MistralAI
+          tags:
+            example:
+              - 7B
+              - Featured
+              - Foundation Model
+          size:
+            example: 4370000000,
+          cover:
+            example: https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png
+        engine:
+          example: nitro
+    DeleteModelResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the model that was deleted.
+          example: mistral-ins-7b-q4
+        object:
+          type: string
+          description: Type of the object, indicating it's a model.
+          default: model
+        deleted:
+          type: boolean
+          description: Indicates whether the model was successfully deleted.
+          example: true
+    StartModelResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the model that was started.
+          example: model-zephyr-7B
+        object:
+          type: string
+          description: Type of the object, indicating it's a model.
+          default: model
+        state:
+          type: string
+          description: The current state of the model after the start operation.
+          example: running
+      required:
+        - id
+        - object
+        - state
+    StopModelResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the model that was started.
+          example: model-zephyr-7B
+        object:
+          type: string
+          description: Type of the object, indicating it's a model.
+          default: model
+        state:
+          type: string
+          description: The current state of the model after the start operation.
+          example: stopped
+      required:
+        - id
+        - object
+        - state
+    DownloadModelResponse:
+      type: object
+      properties:
+        message:
+          type: string
+          description: Message indicates Jan starting download corresponding model.
+          example: Starting download mistral-ins-7b-q4
diff --git a/docs/openapi/specs/threads.yaml b/docs/openapi/specs/threads.yaml
new file mode 100644
index 000000000..285fcc82d
--- /dev/null
+++ b/docs/openapi/specs/threads.yaml
@@ -0,0 +1,227 @@
+---
+components:
+  schemas:
+    ThreadObject:
+      type: object
+      properties:
+        id:
+          type: string
+          description: |
+            The identifier of the thread, defaults to foldername.
+          example: thread_....
+        object:
+          type: string
+          description: |
+            Type of the object, defaults to thread.
+          example: thread
+        title:
+          type: string
+          description: >
+            A brief summary or description of the thread, defaults to an empty
+            string.
+          example: funny physics joke
+        assistants:
+          type: array
+          description: ''
+          items:
+            properties:
+              assistant_id:
+                type: string
+                description: |
+                  The identifier of assistant, defaults to "jan"
+                example: jan
+              model:
+                type: object
+                properties:
+                  id:
+                    type: string
+                    description: ''
+                    example: ...
+                  settings:
+                    type: object
+                    description: >
+                      Defaults to and overrides assistant.json's "settings" (and if none,
+                      then model.json "settings")
+                  parameters:
+                    type: object
+                    description: >
+                      Defaults to and overrides assistant.json's "parameters" (and if
+                      none, then model.json "parameters")
+        created:
+          type: integer
+          format: int64
+          description: >
+            Unix timestamp representing the creation time of the thread,
+            defaults to file creation time.
+          example: 1231231
+        metadata:
+          type: object
+          description: |
+            Metadata associated with the thread, defaults to an empty object.
+          example: {}
+    GetThreadResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the thread.
+          example: thread_abc123
+        object:
+          type: string
+          description: Type of the object
+          example: thread
+        created_at:
+          type: integer
+          format: int64
+          description: Unix timestamp representing the creation time of the thread.
+          example: 1699014083
+        assistants:
+          type: array
+          items:
+            type: string
+          description: List of assistants involved in the thread.
+          example:
+            - assistant-001
+        metadata:
+          type: object
+          description: Metadata associated with the thread.
+          example: {}
+        messages:
+          type: array
+          items:
+            type: string
+          description: List of messages within the thread.
+          example: []
+    CreateThreadResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the newly created thread.
+          example: thread_abc123
+        object:
+          type: string
+          description: Type of the object, indicating it's a thread.
+          example: thread
+        created_at:
+          type: integer
+          format: int64
+          description: Unix timestamp representing the creation time of the thread.
+          example: 1699014083
+        metadata:
+          type: object
+          description: Metadata associated with the newly created thread.
+          example: {}
+    CreateThreadObject:
+      type: object
+      properties:
+        object:
+          type: string
+          description: Type of the object, indicating it's a thread.
+          example: thread
+        title:
+          type: string
+          description: >
+            A brief summary or description of the thread, defaults to an empty
+            string.
+          example: funny physics joke
+        assistants:
+          type: array
+          description: assistant involved in the thread
+          items:
+            properties:
+              assistant_id:
+                type: string
+                description: |
+                  The identifier of assistant, defaults to "jan"
+                example: jan
+              assistant_name:
+                type: string
+                description: |
+                  The name of assistant, defaults to "Jan"
+                example: Jan
+              instructions:
+                type: string
+                description: >
+                  The instruction of assistant, defaults to "Be my grammar corrector"
+              model:
+                type: object
+                properties:
+                  id:
+                    type: string
+                    description: Model id
+                    example: mistral-ins-7b-q4
+                  settings:
+                    type: object
+                    description: >
+                      Defaults to and overrides assistant.json's "settings" (and if none,
+                      then model.json "settings")
+                  parameters:
+                    type: object
+                    description: >
+                      Defaults to and overrides assistant.json's "parameters" (and if
+                      none, then model.json "parameters")
+                  engine:
+                    type: string
+                    description: Engine id
+                    example: nitro
+        metadata:
+          type: object
+          description: |
+            Metadata associated with the thread, defaults to an empty object.
+    ThreadMessageObject:
+      type: object
+      properties:
+        role:
+          type: string
+          description: |
+            "Role of the sender, either 'user' or 'assistant'."
+          enum:
+            - user
+            - assistant
+        content:
+          type: string
+          description: |
+            "Text content of the message."
+        file_ids:
+          type: array
+          items:
+            type: string
+          description: |
+            "Array of file IDs associated with the message, if any."
+    ModifyThreadResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: |
+            "The identifier of the modified thread."
+          example: thread_abc123
+        object:
+          type: string
+          description: Type of the object, indicating it's a thread.
+          example: thread
+        created_at:
+          type: integer
+          format: int64
+          description: Unix timestamp representing the creation time of the thread.
+          example: 1699014083
+        metadata:
+          type: object
+          description: Metadata associated with the modified thread.
+          example: {}
+    DeleteThreadResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The identifier of the deleted thread.
+          example: thread_abc123
+        object:
+          type: string
+          description: Type of the object, indicating the thread has been deleted.
+          example: thread.deleted
+        deleted:
+          type: boolean
+          description: Indicates whether the thread was successfully deleted.
+          example: true