diff --git a/docs/openapi/jan.yaml b/docs/openapi/jan.yaml
index 3dd485e38..ce74ab1d2 100644
--- a/docs/openapi/jan.yaml
+++ b/docs/openapi/jan.yaml
@@ -2309,6 +2309,7 @@ paths:
               ]
             }
 
+  # Models
   /models:
     get:
       operationId: listModels
@@ -2374,6 +2375,61 @@ paths:
               ],
               "object": "list"
             }
+    post:
+      operationId: importModel
+      tags:
+        - Models
+      summary: Import Model
+      description: Imports a model instance. The model can be from a local folder, remote source, or an API endpoint. The model importer will examine the source_url for formatting.
+      parameters:
+        - in: path
+          name: source_url
+          required: true
+          schema:
+            type: string
+            # ideally this will be an actual ID, so this will always work from browser
+            example: https://huggingface.com/thebloke/example.gguf
+          description: The ID of the model to use for this request
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Model"
+      x-oaiMeta:
+        name: Import model
+        returns: The [model](/docs/api-reference/models/object) object matching the specified ID.
+        examples:
+          request:
+            curl: |
+              curl https://api.openai.com/v1/models/VAR_model_id \
+                -H "Authorization: Bearer $OPENAI_API_KEY"
+            python: |
+              from openai import OpenAI
+              client = OpenAI()
+
+              client.models.retrieve("VAR_model_id")
+            node.js: |-
+              import OpenAI from "openai";
+
+              const openai = new OpenAI();
+
+              async function main() {
+                const model = await openai.models.retrieve("gpt-3.5-turbo");
+
+                console.log(model);
+              }
+
+              main();
+          response: &retrieve_model_response |
+            {
+              "id": "VAR_model_id",
+              "object": "model",
+              "created": 1686935002,
+              "owned_by": "openai",
+              "state": "ready"
+            }
   /models/{model}:
     get:
       operationId: retrieveModel
@@ -2481,6 +2537,138 @@ paths:
               "object": "model",
               "deleted": true
             }
+    post:
+      operationId: startModel
+      tags:
+        - Models
+      summary: Start Model
+      description: Starts an imported model. Loads the model into V/RAM.
+      parameters:
+        - in: path
+          name: model
+          required: true
+          schema:
+            type: string
+            # ideally this will be an actual ID, so this will always work from browser
+            example: gpt-3.5-turbo
+          description: The ID of the model to use for this request
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Model"
+      x-oaiMeta:
+        name: Import model
+        returns: The [model](/docs/api-reference/models/object) object matching the specified ID.
+        examples:
+          request:
+            curl: |
+              curl https://api.openai.com/v1/models/VAR_model_id \
+                -H "Authorization: Bearer $OPENAI_API_KEY"
+            python: |
+              from openai import OpenAI
+              client = OpenAI()
+
+              client.models.retrieve("VAR_model_id")
+            node.js: |-
+              import OpenAI from "openai";
+
+              const openai = new OpenAI();
+
+              async function main() {
+                const model = await openai.models.retrieve("gpt-3.5-turbo");
+
+                console.log(model);
+              }
+
+              main();
+          response: &retrieve_model_response |
+            {
+              "id": "VAR_model_id",
+              "object": "model",
+              "created": 1686935002,
+              "owned_by": "openai"
+            }
+  /models/{model}/cancel:
+    post:
+      operationId: cancelModel
+      tags:
+        - Models
+      summary: Cancel Model
+      description: Stops a running model. Unloads the model from V/RAM.
+      parameters:
+        - in: path
+          name: model
+          required: true
+          schema:
+            type: string
+          description: The ID of the model that is running.
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/RunObject"
+      x-oaiMeta:
+        name: Cancel a running model
+        beta: true
+        returns: The modified [run](/docs/api-reference/runs/object) object matching the specified ID.
+        examples:
+          request:
+            curl: |
+              curl https://api.openai.com/v1/threads/thread_1cjnJPXj8MFiqTx58jU9TivC/runs/run_BeRGmpGt2wb1VI22ZRniOkrR/cancel \
+                -H 'Authorization: Bearer $OPENAI_API_KEY' \
+                -H 'OpenAI-Beta: assistants=v1' \
+                -X POST
+            python: |
+              from openai import OpenAI
+              client = OpenAI()
+
+              run = client.beta.threads.runs.cancel(
+                thread_id="thread_1cjnJPXj8MFiqTx58jU9TivC",
+                run_id="run_BeRGmpGt2wb1VI22ZRniOkrR"
+              )
+              print(run)
+            node.js: |
+              import OpenAI from "openai";
+
+              const openai = new OpenAI();
+
+              async function main() {
+                const run = await openai.beta.threads.runs.cancel(
+                  "thread_1cjnJPXj8MFiqTx58jU9TivC",
+                  "run_BeRGmpGt2wb1VI22ZRniOkrR"
+                );
+
+                console.log(run);
+              }
+
+              main();
+          response: |
+            {
+              "id": "run_BeRGmpGt2wb1VI22ZRniOkrR",
+              "object": "model",
+              "created_at": 1699076126,
+              "status": "cancelling",
+              "started_at": 1699076126,
+              "expires_at": 1699076726,
+              "cancelled_at": null,
+              "failed_at": null,
+              "completed_at": null,
+              "last_error": null,
+              "model": "gpt-4",
+              "instructions": "You summarize books.",
+              "tools": [
+                {
+                  "type": "retrieval"
+                }
+              ],
+              "file_ids": [],
+              "metadata": {}
+            }
 
   /moderations:
     post:
@@ -3062,6 +3250,7 @@ paths:
               "deleted": true
             }
 
+  # Threads
   /threads:
     post:
       operationId: createThread
@@ -7324,7 +7513,7 @@ components:
         object:
           type: string
           default: model
-        version: 
+        version:
           type: integer
           description: The version of the Model Object file
           default: 1
@@ -7333,14 +7522,14 @@ components:
           format: uri
           example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
           description: The model download source. It can be an external url or a local filepath.
-        id:               # OpenAI-equivalent
+        id: # OpenAI-equivalent
           type: string
           description: The model identifier, which can be referenced in the API endpoints.
           example: zephyr-7b
         name:
           type: string
           description: Human-readable name that is used for UI
-        owned_by:         # OpenAI-equivalent
+        owned_by: # OpenAI-equivalent
           type: string
           description: The organization that owns the model (you!)
           default: you # TODO
@@ -7356,16 +7545,16 @@ components:
           default: to_download
         parameters:
           type: object
-          description: 
+          description:
           properties:
             init:
               type: object
               properties:
-                ctx_len: 
+                ctx_len:
                   type: string
                   description: TODO
                   default: 2048
-                ngl: 
+                ngl:
                   type: string
                   description: TODO
                   default: 100
@@ -7373,92 +7562,90 @@ components:
                   type: bool
                   description: TODO
                   default: true
-                n_parallel: 
+                n_parallel:
                   type: string
                   description: TODO
                   default: 4
-                pre_prompt: 
+                pre_prompt:
                   type: string
                   description: TODO
                   default: A chat between a curious user and an artificial intelligence
-                user_prompt: 
+                user_prompt:
                   type: string
                   description: TODO
                   default: "USER:"
-                ai_prompt: 
+                ai_prompt:
                   type: string
                   description: TODO
                   default: "ASSISTANT:"
-              default: {
-                ctx_len: 2048,
-                ngl: 100,
-                embedding: true,
-                n_parallel: 4,
-                pre_prompt: "A chat between a curious user and an artificial intelligence",
-                user_prompt: "USER:",
-                ai_prompt: "ASSISTANT:"
-              }
+              default:
+                {
+                  ctx_len: 2048,
+                  ngl: 100,
+                  embedding: true,
+                  n_parallel: 4,
+                  pre_prompt: "A chat between a curious user and an artificial intelligence",
+                  user_prompt: "USER:",
+                  ai_prompt: "ASSISTANT:",
+                }
             runtime:
               type: object
               properties:
-                temperature: 
-                  type: string 
+                temperature:
+                  type: string
                   description: TODO
                   default: 0.7
-                token_limit: 
+                token_limit:
                   type: string
                   description: TODO
                   default: 2048
-                top_k: 
+                top_k:
                   type: string
                   description: TODO
                   default: 0
-                top_p: 
+                top_p:
                   type: string
                   description: TODO
                   default: 1
-                stream: 
+                stream:
                   type: string
                   description: TODO
                   default: true
-              default: {
-                temperature: 0.7,
-                token_limit: 2048,
-                top_k: 0,
-                top_p: 1,
-                stream: true
-              }
+              default:
+                {
+                  temperature: 0.7,
+                  token_limit: 2048,
+                  top_k: 0,
+                  top_p: 1,
+                  stream: true,
+                }
         metadata:
           type: object
           properties:
             engine:
               type: string
-              enum: [llamacpp, api,tensorrt]
+              enum: [llamacpp, api, tensorrt]
               default: llamacpp
             quantization:
               type: string
               description: TODO
               default: Q4_K_M
-            size: 
+            size:
               type: string
               default: 7b
             binaries:
               type: array
               description: TODO
               default: TODO
-          default: {
-            engine: llamacpp,
-            quantization: Q4_K_M,
-            size: 7b,
-            binaries: TODO
-          }
+          default:
+            { engine: llamacpp, quantization: Q4_K_M, size: 7b, binaries: TODO }
       required:
-        - id            # From OpenAI
+        - id # From OpenAI
         - version
         - source_url
-        - created       # From OpenAI, autogenerated in Jan
-        - object        # From OpenAI, autogenerated in Jan
-        - owned_by      # From OpenAI, autogenerated in Jan
+        - created # From OpenAI, autogenerated in Jan
+        - object # From OpenAI, autogenerated in Jan
+        - owned_by # From OpenAI, autogenerated in Jan
       x-oaiMeta:
         name: The model object
         example: *retrieve_model_response
@@ -9616,4 +9803,4 @@ x-oaiMeta:
           path: object
         - type: endpoint
           key: createEdit
-          path: create
\ No newline at end of file
+          path: create