diff --git a/README.md b/README.md
index 7a925eceb..2726baacf 100644
--- a/README.md
+++ b/README.md
@@ -70,7 +70,7 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
   <tr style="text-align: center">
     <td style="text-align:center"><b>Experimental (Nighlty Build)</b></td>
     <td style="text-align:center" colspan="4">
-      <a href='https://github.com/janhq/jan/actions/runs/7359469616'>
+      <a href='https://github.com/janhq/jan/actions/runs/7372465396'>
         <b>Github action artifactory</b>
       </a>
     </td>
@@ -146,15 +146,15 @@ Contributions are welcome! Please read the [CONTRIBUTING.md](CONTRIBUTING.md) fi
 
 ### Instructions
 
-1. **Clone the Repository:**
+1. **Clone the repository and prepare:**
 
 ```bash
    git clone https://github.com/janhq/jan
-   git checkout DESIRED_BRANCH
    cd jan
+   git checkout -b DESIRED_BRANCH
 ```
 
-2. **Run development and Using Jan Desktop**
+2. **Run development and use Jan Desktop**
 
    ```
    make dev
@@ -165,10 +165,7 @@ Contributions are welcome! Please read the [CONTRIBUTING.md](CONTRIBUTING.md) fi
 ### For production build
 
 ```bash
-# Do step 1 and 2 in previous section
-git clone https://github.com/janhq/jan
-cd jan
-
+# Do steps 1 and 2 in the previous section
 # Build the app
 make build
 ```
@@ -203,7 +200,7 @@ Jan builds on top of other open-source projects:
 
 ## Contact
 
-- Bugs & requests: file a Github ticket
+- Bugs & requests: file a GitHub ticket
 - For discussion: join our Discord [here](https://discord.gg/FTk2MvZwJH)
 - For business inquiries: email hello@jan.ai
 - For jobs: please email hr@jan.ai
diff --git a/docs/docs/guides/05-using-server/01-server.md b/docs/docs/guides/05-using-server/01-server.md
index ad5345533..aa5bf8d3a 100644
--- a/docs/docs/guides/05-using-server/01-server.md
+++ b/docs/docs/guides/05-using-server/01-server.md
@@ -27,6 +27,6 @@ Jan runs on port `1337` by default, but this can be changed in Settings.
 Check out the [API Reference](/api-reference) for more information on the API endpoints.
 
 ```
-curl https://localhost:1337/v1/chat/completions
+curl http://localhost:1337/v1/chat/completions
 
 ```
diff --git a/docs/openapi/jan.yaml b/docs/openapi/jan.yaml
index 6a59647e5..b7b6bd3c1 100644
--- a/docs/openapi/jan.yaml
+++ b/docs/openapi/jan.yaml
@@ -14,7 +14,7 @@ license:
   name: AGPLv3
   url: "https://github.com/janhq/nitro/blob/main/LICENSE"
 servers:
-  - url: "https://localhost:1337/v1/"
+  - url: "http://localhost:1337/v1/"
 tags:
   - name: Models
     description: List and describe the various models available in the API.
@@ -100,7 +100,7 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl https://localhost:1337/v1/models
+            curl http://localhost:1337/v1/models
     post:
       operationId: downloadModel
       tags:
@@ -118,7 +118,7 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl -X POST https://localhost:1337/v1/models
+            curl -X POST http://localhost:1337/v1/models
   "/models/{model_id}":
     get:
       operationId: retrieveModel
@@ -149,7 +149,7 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl https://localhost:1337/v1/models/{model_id}
+            curl http://localhost:1337/v1/models/{model_id}
     delete:
       operationId: deleteModel
       tags:
@@ -178,7 +178,7 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl -X DELETE https://localhost:1337/v1/models/{model_id}
+            curl -X DELETE http://localhost:1337/v1/models/{model_id}
   "/models/{model_id}/start":
     put:
       operationId: startModel
@@ -206,7 +206,7 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl -X PUT https://localhost:1337/v1/models/{model_id}/start
+            curl -X PUT http://localhost:1337/v1/models/{model_id}/start
   "/models/{model_id}/stop":
     put:
       operationId: stopModel
@@ -233,7 +233,7 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl -X PUT https://localhost:1337/v1/models/{model_id}/stop
+            curl -X PUT http://localhost:1337/v1/models/{model_id}/stop
   /threads:
     post:
       operationId: createThread
diff --git a/extensions/inference-nitro-extension/bin/linux-start.sh b/extensions/inference-nitro-extension/bin/linux-start.sh
deleted file mode 100644
index 199589ad2..000000000
--- a/extensions/inference-nitro-extension/bin/linux-start.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-
-# Check if nvidia-smi exists and is executable
-if ! command -v nvidia-smi &> /dev/null; then
-    echo "nvidia-smi not found, proceeding with CPU version..."
-    cd linux-cpu
-    ./nitro "$@"
-    exit $?
-fi
-
-# Find the GPU with the highest VRAM
-readarray -t gpus < <(nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits)
-maxMemory=0
-selectedGpuId=0
-
-for gpu in "${gpus[@]}"; do
-    IFS=, read -ra gpuInfo <<< "$gpu"
-    gpuId=${gpuInfo[0]}
-    gpuMemory=${gpuInfo[1]}
-    if (( gpuMemory > maxMemory )); then
-        maxMemory=$gpuMemory
-        selectedGpuId=$gpuId
-    fi
-done
-
-echo "Selected GPU: $selectedGpuId"
-export CUDA_VISIBLE_DEVICES=$selectedGpuId
-
-# Attempt to run nitro_linux_amd64_cuda
-cd linux-cuda
-./nitro "$@" > output.log 2>&1 || (
-    echo "Check output log" && 
-    if grep -q "CUDA error" output.log; then
-        echo "CUDA error detected, attempting to run nitro_linux_amd64..."
-        cd ../linux-cpu && ./nitro "$@"
-        exit $?
-    fi
-    exit $?
-)
diff --git a/extensions/inference-nitro-extension/bin/win-start.bat b/extensions/inference-nitro-extension/bin/win-start.bat
deleted file mode 100644
index 250e0d218..000000000
--- a/extensions/inference-nitro-extension/bin/win-start.bat
+++ /dev/null
@@ -1,45 +0,0 @@
-@echo off
-setlocal enabledelayedexpansion
-
-set "maxMemory=0"
-set "gpuId="
-
-rem check if nvidia-smi command exist or not
-where nvidia-smi >nul 2>&1
-if %errorlevel% neq 0 (
-    echo nvidia-smi not found, proceeding with CPU version...
-    cd win-cuda
-    goto RunCpuVersion
-)
-
-set "tempFile=%temp%\nvidia_smi_output.txt"
-nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits > "%tempFile%"
-
-for /f "usebackq tokens=1-2 delims=, " %%a in ("%tempFile%") do (
-    set /a memorySize=%%b
-    if !memorySize! gtr !maxMemory! (
-        set "maxMemory=!memorySize!"
-        set "gpuId=%%a"
-    )
-)
-
-rem Echo the selected GPU
-echo Selected GPU: !gpuId!
-
-rem Set the GPU with the highest VRAM as the visible CUDA device
-set CUDA_VISIBLE_DEVICES=!gpuId!
-
-rem Attempt to run nitro_windows_amd64_cuda.exe
-cd win-cuda
-
-nitro.exe %* > output.log
-type output.log | findstr /C:"CUDA error" >nul
-if %errorlevel% equ 0 ( goto :RunCpuVersion ) else ( goto :End )
-
-:RunCpuVersion
-rem Run nitro_windows_amd64.exe...
-cd ..\win-cpu
-nitro.exe %*
-
-:End
-endlocal
diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json
index 01de6a2b9..b2a143121 100644
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@@ -8,7 +8,7 @@
   "license": "AGPL-3.0",
   "scripts": {
     "build": "tsc -b . && webpack --config webpack.config.js",
-    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && chmod +x ./bin/linux-start.sh && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./bin/linux-cuda && chmod +x ./bin/linux-cuda/nitro && chmod +x ./bin/linux-start.sh",
+    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./bin/linux-cuda && chmod +x ./bin/linux-cuda/nitro",
     "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
     "downloadnitro:win32": "download.bat",
     "downloadnitro": "run-script-os",
diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts
index 4537f801c..f0fdc7d7d 100644
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@@ -394,9 +394,9 @@ function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
     } else {
       let nvida_info = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf8"));
       if (nvida_info["run_mode"] === "cpu") {
-        binaryFolder = path.join(binaryFolder, "win-cpu");
+        binaryFolder = path.join(binaryFolder, "linux-cpu");
       } else {
-        binaryFolder = path.join(binaryFolder, "win-cuda");
+        binaryFolder = path.join(binaryFolder, "linux-cuda");
         cudaVisibleDevices = nvida_info["gpu_highest_vram"];
       }
       binaryName = "nitro";
diff --git a/server/index.ts b/server/index.ts
index a5fbfe0e6..bc031305e 100644
--- a/server/index.ts
+++ b/server/index.ts
@@ -10,6 +10,23 @@ const JAN_API_PORT = Number.parseInt(process.env.JAN_API_PORT || "1337");
 
 const server = fastify();
 server.register(require("@fastify/cors"), {});
+server.register(require("@fastify/swagger"), {
+  mode: "static",
+  specification: {
+    path: "./../docs/openapi/jan.yaml",
+    baseDir: "./../docs/openapi",
+  },
+});
+server.register(require("@fastify/swagger-ui"), {
+  routePrefix: "/docs",
+  baseDir: path.join(__dirname, "../..", "./docs/openapi"),
+  uiConfig: {
+    docExpansion: "full",
+    deepLinking: false,
+  },
+  staticCSP: true,
+  transformSpecificationClone: true,
+});
 server.register(
   (childContext, _, done) => {
     childContext.register(require("@fastify/static"), {
diff --git a/server/package.json b/server/package.json
index 2d350a317..36fdc124f 100644
--- a/server/package.json
+++ b/server/package.json
@@ -19,6 +19,8 @@
   "dependencies": {
     "@fastify/cors": "^8.4.2",
     "@fastify/static": "^6.12.0",
+    "@fastify/swagger": "^8.13.0",
+    "@fastify/swagger-ui": "^2.0.1",
     "@janhq/core": "link:./core",
     "dotenv": "^16.3.1",
     "fastify": "^4.24.3",
diff --git a/web/screens/Chat/ThreadList/index.tsx b/web/screens/Chat/ThreadList/index.tsx
index a55cc2830..516bfb6c4 100644
--- a/web/screens/Chat/ThreadList/index.tsx
+++ b/web/screens/Chat/ThreadList/index.tsx
@@ -136,6 +136,7 @@ export default function ThreadList() {
                             <Button
                               themes="danger"
                               onClick={() => cleanThread(thread.id)}
+                              autoFocus
                             >
                               Yes
                             </Button>
diff --git a/web/styles/components/message.scss b/web/styles/components/message.scss
index ad9aa7fc9..0bd1e1a71 100644
--- a/web/styles/components/message.scss
+++ b/web/styles/components/message.scss
@@ -1,5 +1,7 @@
 .message {
   @apply text-black dark:text-gray-300;
+  white-space: pre-line;
+
   ul,
   ol {
     list-style: auto;
@@ -10,3 +12,7 @@
 button[class*='react-scroll-to-bottom--'] {
   display: none;
 }
+
+.code-block {
+  white-space: normal;
+}