diff --git a/.github/workflows/template-tauri-build-linux-x64.yml b/.github/workflows/template-tauri-build-linux-x64.yml
index 6c47c79f2..9356c3f28 100644
--- a/.github/workflows/template-tauri-build-linux-x64.yml
+++ b/.github/workflows/template-tauri-build-linux-x64.yml
@@ -122,6 +122,10 @@ jobs:
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
           mv /tmp/package.json web-app/package.json
 
+          # Temporarily enable devtool on prod build
+          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
+          cat ./src-tauri/Cargo.toml
+
           ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
           cat ./src-tauri/Cargo.toml
 
diff --git a/.github/workflows/template-tauri-build-macos.yml b/.github/workflows/template-tauri-build-macos.yml
index 6999ff77e..086e14ad2 100644
--- a/.github/workflows/template-tauri-build-macos.yml
+++ b/.github/workflows/template-tauri-build-macos.yml
@@ -126,6 +126,10 @@ jobs:
           ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
           cat ./src-tauri/Cargo.toml
 
+          # Temporarily enable devtool on prod build
+          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
+          cat ./src-tauri/Cargo.toml
+
           # Change app name for beta and nightly builds
           if [ "${{ inputs.channel }}" != "stable" ]; then
             jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
diff --git a/.github/workflows/template-tauri-build-windows-x64.yml b/.github/workflows/template-tauri-build-windows-x64.yml
index 47b5663cb..2ab6d7ad9 100644
--- a/.github/workflows/template-tauri-build-windows-x64.yml
+++ b/.github/workflows/template-tauri-build-windows-x64.yml
@@ -137,6 +137,10 @@ jobs:
           sed -i "s/jan_version/$new_base_version/g" ./src-tauri/tauri.bundle.windows.nsis.template
           sed -i "s/jan_build/$new_build_version/g" ./src-tauri/tauri.bundle.windows.nsis.template          
 
+          # Temporarily enable devtool on prod build
+          ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
+          cat ./src-tauri/Cargo.toml
+
           # Change app name for beta and nightly builds
           if [ "${{ inputs.channel }}" != "stable" ]; then
             jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
@@ -160,6 +164,9 @@ jobs:
             
             sed -i "s/jan_productname/Jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template
             sed -i "s/jan_mainbinaryname/jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template
+          else
+            sed -i "s/jan_productname/Jan/g" ./src-tauri/tauri.bundle.windows.nsis.template
+            sed -i "s/jan_mainbinaryname/jan/g" ./src-tauri/tauri.bundle.windows.nsis.template
           fi
           echo "---------nsis.template---------"
           cat ./src-tauri/tauri.bundle.windows.nsis.template
diff --git a/core/src/browser/models/utils.ts b/core/src/browser/models/utils.ts
index 0e52441b2..2ac243b6a 100644
--- a/core/src/browser/models/utils.ts
+++ b/core/src/browser/models/utils.ts
@@ -17,7 +17,7 @@ export const validationRules: { [key: string]: (value: any) => boolean } = {
   presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
 
   ctx_len: (value: any) => Number.isInteger(value) && value >= 0,
-  ngl: (value: any) => Number.isInteger(value) && value >= 0,
+  ngl: (value: any) => Number.isInteger(value),
   embedding: (value: any) => typeof value === 'boolean',
   n_parallel: (value: any) => Number.isInteger(value) && value >= 0,
   cpu_threads: (value: any) => Number.isInteger(value) && value >= 0,
diff --git a/extensions/inference-cortex-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json
index d27624639..54d578293 100644
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
@@ -14,7 +14,7 @@
     "description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.",
     "controllerType": "checkbox",
     "controllerProps": {
-      "value": true
+      "value": false
     }
   },
   {
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 3e8b60ebe..dd78e2d62 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -64,7 +64,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
   cpu_threads?: number
   auto_unload_models: boolean = true
   reasoning_budget = -1 // Default reasoning budget in seconds
-  context_shift = true
+  context_shift = false
   /**
    * The URL for making inference requests.
    */
@@ -132,7 +132,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
     this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
     this.context_shift = await this.getSetting<boolean>(
       Settings.context_shift,
-      true
+      false
     )
     this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
     if (this.caching_enabled)
@@ -253,11 +253,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
         }
       }
     }
+    const modelSettings = extractModelLoadParams(model.settings)
     return await this.apiInstance().then((api) =>
       api
         .post('v1/models/start', {
           json: {
-            ...extractModelLoadParams(model.settings),
+            ...modelSettings,
             model: model.id,
             engine:
               model.engine === 'nitro' // Legacy model cache
@@ -282,6 +283,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
             ...(this.context_shift === false
               ? { 'no-context-shift': true }
               : {}),
+            ...(modelSettings.ngl === -1 || modelSettings.ngl === undefined
+              ? { ngl: 100 }
+              : {}),
           },
           timeout: false,
           signal,
diff --git a/extensions/model-extension/resources/default.json b/extensions/model-extension/resources/default.json
index 32bc278e4..bd7c7e63b 100644
--- a/extensions/model-extension/resources/default.json
+++ b/extensions/model-extension/resources/default.json
@@ -125,59 +125,59 @@
     },
     "models": [
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-iQ4_XS.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-iQ4_XS.gguf",
         "size": 2270750400
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_L.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q3_K_L.gguf",
         "size": 2239784384
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_M.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q3_K_M.gguf",
         "size": 2075616704
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q3_K_S.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q3_K_S.gguf",
         "size": 1886995904
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q4_0.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_0.gguf",
         "size": 2369545664
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q4_1.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_1.gguf",
         "size": 2596627904
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q4_K_M.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_K_M.gguf",
         "size": 2497279424
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q4_K_S.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_K_S.gguf",
         "size": 2383308224
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q5_0.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_0.gguf",
         "size": 2823710144
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q5_1.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_1.gguf",
         "size": 3050792384
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q5_K_M.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_K_M.gguf",
         "size": 2889512384
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q5_K_S.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_K_S.gguf",
         "size": 2823710144
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q6_K.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q6_K.gguf",
         "size": 3306259904
       },
       {
-        "id": "Menlo:Jan-nano:jan-nano-4b-Q8_0.gguf",
+        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q8_0.gguf",
         "size": 4280403904
       }
     ]
diff --git a/src-tauri/src/core/cmd.rs b/src-tauri/src/core/cmd.rs
index 4a48e63d3..4b4463d12 100644
--- a/src-tauri/src/core/cmd.rs
+++ b/src-tauri/src/core/cmd.rs
@@ -348,23 +348,41 @@ pub async fn start_server(
     api_key: String,
     trusted_hosts: Vec<String>,
 ) -> Result<bool, String> {
-    let auth_token = app
-        .state::<AppState>()
-        .app_token
-        .clone()
-        .unwrap_or_default();
-    server::start_server(host, port, prefix, auth_token, api_key, trusted_hosts)
-        .await
-        .map_err(|e| e.to_string())?;
+    let state = app.state::<AppState>();
+    let auth_token = state.app_token.clone().unwrap_or_default();
+    let server_handle = state.server_handle.clone();
+
+    server::start_server(
+        server_handle,
+        host,
+        port,
+        prefix,
+        auth_token,
+        api_key,
+        trusted_hosts,
+    )
+    .await
+    .map_err(|e| e.to_string())?;
     Ok(true)
 }
 
 #[tauri::command]
-pub async fn stop_server() -> Result<(), String> {
-    server::stop_server().await.map_err(|e| e.to_string())?;
+pub async fn stop_server(state: State<'_, AppState>) -> Result<(), String> {
+    let server_handle = state.server_handle.clone();
+
+    server::stop_server(server_handle)
+        .await
+        .map_err(|e| e.to_string())?;
     Ok(())
 }
 
+#[tauri::command]
+pub async fn get_server_status(state: State<'_, AppState>) -> Result<bool, String> {
+    let server_handle = state.server_handle.clone();
+
+    Ok(server::is_server_running(server_handle).await)
+}
+
 #[tauri::command]
 pub async fn read_logs(app: AppHandle) -> Result<String, String> {
     let log_path = get_jan_data_folder_path(app).join("logs").join("app.log");
diff --git a/src-tauri/src/core/server.rs b/src-tauri/src/core/server.rs
index e5a784670..6da4ebf9b 100644
--- a/src-tauri/src/core/server.rs
+++ b/src-tauri/src/core/server.rs
@@ -1,17 +1,16 @@
+use flate2::read::GzDecoder;
+use futures_util::StreamExt;
 use hyper::service::{make_service_fn, service_fn};
 use hyper::{Body, Request, Response, Server, StatusCode};
 use reqwest::Client;
+use serde_json::Value;
 use std::convert::Infallible;
+use std::io::Read;
 use std::net::SocketAddr;
-use std::sync::LazyLock;
+use std::sync::Arc;
 use tokio::sync::Mutex;
-use tokio::task::JoinHandle;
 
-/// Server handle type for managing the proxy server lifecycle
-type ServerHandle = JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>>;
-
-/// Global singleton for the current server instance
-static SERVER_HANDLE: LazyLock<Mutex<Option<ServerHandle>>> = LazyLock::new(|| Mutex::new(None));
+use crate::core::state::ServerHandle;
 
 /// Configuration for the proxy server
 #[derive(Clone)]
@@ -263,11 +262,12 @@ async fn proxy_request(
 
     let original_path = req.uri().path();
     let path = get_destination_path(original_path, &config.prefix);
+    let method = req.method().clone();
 
     // Verify Host header (check target), but bypass for whitelisted paths
     let whitelisted_paths = ["/", "/openapi.json", "/favicon.ico"];
     let is_whitelisted_path = whitelisted_paths.contains(&path.as_str());
-    
+
     if !is_whitelisted_path {
         if !host_header.is_empty() {
             if !is_valid_host(&host_header, &config.trusted_hosts) {
@@ -328,7 +328,10 @@ async fn proxy_request(
                 .unwrap());
         }
     } else if is_whitelisted_path {
-        log::debug!("Bypassing authorization check for whitelisted path: {}", path);
+        log::debug!(
+            "Bypassing authorization check for whitelisted path: {}",
+            path
+        );
     }
 
     // Block access to /configs endpoint
@@ -368,10 +371,11 @@ async fn proxy_request(
 
             let mut builder = Response::builder().status(status);
 
-            // Copy response headers, excluding CORS headers to avoid conflicts
+            // Copy response headers, excluding CORS headers and Content-Length to avoid conflicts
             for (name, value) in response.headers() {
                 // Skip CORS headers from upstream to avoid duplicates
-                if !is_cors_header(name.as_str()) {
+                // Skip Content-Length header when filtering models response to avoid mismatch
+                if !is_cors_header(name.as_str()) && name != hyper::header::CONTENT_LENGTH {
                     builder = builder.header(name, value);
                 }
             }
@@ -384,23 +388,59 @@ async fn proxy_request(
                 &config.trusted_hosts,
             );
 
-            // Read response body
-            match response.bytes().await {
-                Ok(bytes) => Ok(builder.body(Body::from(bytes)).unwrap()),
-                Err(e) => {
-                    log::error!("Failed to read response body: {}", e);
-                    let mut error_response =
-                        Response::builder().status(StatusCode::INTERNAL_SERVER_ERROR);
-                    error_response = add_cors_headers_with_host_and_origin(
-                        error_response,
-                        &host_header,
-                        &origin_header,
-                        &config.trusted_hosts,
-                    );
-                    Ok(error_response
-                        .body(Body::from("Error reading upstream response"))
-                        .unwrap())
+            // Handle streaming vs non-streaming responses
+            if path.contains("/models") && method == hyper::Method::GET {
+                // For /models endpoint, we need to buffer and filter the response
+                match response.bytes().await {
+                    Ok(bytes) => match filter_models_response(&bytes) {
+                        Ok(filtered_bytes) => Ok(builder.body(Body::from(filtered_bytes)).unwrap()),
+                        Err(e) => {
+                            log::warn!(
+                                "Failed to filter models response: {}, returning original",
+                                e
+                            );
+                            Ok(builder.body(Body::from(bytes)).unwrap())
+                        }
+                    },
+                    Err(e) => {
+                        log::error!("Failed to read response body: {}", e);
+                        let mut error_response =
+                            Response::builder().status(StatusCode::INTERNAL_SERVER_ERROR);
+                        error_response = add_cors_headers_with_host_and_origin(
+                            error_response,
+                            &host_header,
+                            &origin_header,
+                            &config.trusted_hosts,
+                        );
+                        Ok(error_response
+                            .body(Body::from("Error reading upstream response"))
+                            .unwrap())
+                    }
                 }
+            } else {
+                // For streaming endpoints (like chat completions), we need to collect and forward the stream
+                let mut stream = response.bytes_stream();
+                let (mut sender, body) = hyper::Body::channel();
+
+                // Spawn a task to forward the stream
+                tokio::spawn(async move {
+                    while let Some(chunk_result) = stream.next().await {
+                        match chunk_result {
+                            Ok(chunk) => {
+                                if sender.send_data(chunk).await.is_err() {
+                                    log::debug!("Client disconnected during streaming");
+                                    break;
+                                }
+                            }
+                            Err(e) => {
+                                log::error!("Stream error: {}", e);
+                                break;
+                            }
+                        }
+                    }
+                });
+
+                Ok(builder.body(body).unwrap())
             }
         }
         Err(e) => {
@@ -419,6 +459,98 @@ async fn proxy_request(
     }
 }
 
+/// Checks if the byte array starts with gzip magic number
+fn is_gzip_encoded(bytes: &[u8]) -> bool {
+    bytes.len() >= 2 && bytes[0] == 0x1f && bytes[1] == 0x8b
+}
+
+/// Decompresses gzip-encoded bytes
+fn decompress_gzip(bytes: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>> {
+    let mut decoder = GzDecoder::new(bytes);
+    let mut decompressed = Vec::new();
+    decoder.read_to_end(&mut decompressed)?;
+    Ok(decompressed)
+}
+
+/// Compresses bytes using gzip
+fn compress_gzip(bytes: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>> {
+    use flate2::write::GzEncoder;
+    use flate2::Compression;
+    use std::io::Write;
+
+    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
+    encoder.write_all(bytes)?;
+    let compressed = encoder.finish()?;
+    Ok(compressed)
+}
+
+/// Filters models response to keep only models with status "downloaded"
+fn filter_models_response(
+    bytes: &[u8],
+) -> Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>> {
+    // Try to decompress if it's gzip-encoded
+    let decompressed_bytes = if is_gzip_encoded(bytes) {
+        log::debug!("Response is gzip-encoded, decompressing...");
+        decompress_gzip(bytes)?
+    } else {
+        bytes.to_vec()
+    };
+
+    let response_text = std::str::from_utf8(&decompressed_bytes)?;
+    let mut response_json: Value = serde_json::from_str(response_text)?;
+
+    // Check if this is a ListModelsResponseDto format with data array
+    if let Some(data_array) = response_json.get_mut("data") {
+        if let Some(models) = data_array.as_array_mut() {
+            // Keep only models where status == "downloaded"
+            models.retain(|model| {
+                if let Some(status) = model.get("status") {
+                    if let Some(status_str) = status.as_str() {
+                        status_str == "downloaded"
+                    } else {
+                        false // Remove models without string status
+                    }
+                } else {
+                    false // Remove models without status field
+                }
+            });
+            log::debug!(
+                "Filtered models response: {} downloaded models remaining",
+                models.len()
+            );
+        }
+    } else if response_json.is_array() {
+        // Handle direct array format
+        if let Some(models) = response_json.as_array_mut() {
+            models.retain(|model| {
+                if let Some(status) = model.get("status") {
+                    if let Some(status_str) = status.as_str() {
+                        status_str == "downloaded"
+                    } else {
+                        false // Remove models without string status
+                    }
+                } else {
+                    false // Remove models without status field
+                }
+            });
+            log::debug!(
+                "Filtered models response: {} downloaded models remaining",
+                models.len()
+            );
+        }
+    }
+
+    let filtered_json = serde_json::to_vec(&response_json)?;
+
+    // If original was gzip-encoded, re-compress the filtered response
+    if is_gzip_encoded(bytes) {
+        log::debug!("Re-compressing filtered response with gzip");
+        compress_gzip(&filtered_json)
+    } else {
+        Ok(filtered_json)
+    }
+}
+
 /// Checks if a header is a CORS-related header that should be filtered out from upstream responses
 fn is_cors_header(header_name: &str) -> bool {
     let header_lower = header_name.to_lowercase();
@@ -509,8 +641,19 @@ fn is_valid_host(host: &str, trusted_hosts: &[String]) -> bool {
     })
 }
 
+pub async fn is_server_running(server_handle: Arc<Mutex<Option<ServerHandle>>>) -> bool {
+    let handle_guard = server_handle.lock().await;
+
+    if handle_guard.is_some() {
+        true
+    } else {
+        false
+    }
+}
+
 /// Starts the proxy server
 pub async fn start_server(
+    server_handle: Arc<Mutex<Option<ServerHandle>>>,
     host: String,
     port: u16,
     prefix: String,
@@ -519,7 +662,7 @@ pub async fn start_server(
     trusted_hosts: Vec<String>,
 ) -> Result<bool, Box<dyn std::error::Error + Send + Sync>> {
     // Check if server is already running
-    let mut handle_guard = SERVER_HANDLE.lock().await;
+    let mut handle_guard = server_handle.lock().await;
     if handle_guard.is_some() {
         return Err("Server is already running".into());
     }
@@ -538,9 +681,11 @@ pub async fn start_server(
         trusted_hosts,
     };
 
-    // Create HTTP client
+    // Create HTTP client with longer timeout for streaming
     let client = Client::builder()
-        .timeout(std::time::Duration::from_secs(30))
+        .timeout(std::time::Duration::from_secs(300)) // 5 minutes for streaming
+        .pool_max_idle_per_host(10)
+        .pool_idle_timeout(std::time::Duration::from_secs(30))
         .build()?;
 
     // Create service handler
@@ -560,7 +705,7 @@ pub async fn start_server(
     log::info!("Proxy server started on http://{}", addr);
 
     // Spawn server task
-    let server_handle = tokio::spawn(async move {
+    let server_task = tokio::spawn(async move {
         if let Err(e) = server.await {
             log::error!("Server error: {}", e);
             return Err(Box::new(e) as Box<dyn std::error::Error + Send + Sync>);
@@ -568,16 +713,20 @@ pub async fn start_server(
         Ok(())
     });
 
-    *handle_guard = Some(server_handle);
+    *handle_guard = Some(server_task);
     Ok(true)
 }
 
 /// Stops the currently running proxy server
-pub async fn stop_server() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
-    let mut handle_guard = SERVER_HANDLE.lock().await;
+pub async fn stop_server(
+    server_handle: Arc<Mutex<Option<ServerHandle>>>,
+) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    let mut handle_guard = server_handle.lock().await;
 
     if let Some(handle) = handle_guard.take() {
         handle.abort();
+        // remove the handle to prevent future use
+        *handle_guard = None;
         log::info!("Proxy server stopped");
     } else {
         log::debug!("No server was running");
@@ -585,3 +734,139 @@ pub async fn stop_server() -> Result<(), Box<dyn std::error::Error + Send + Sync
 
     Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn test_filter_models_response_with_downloaded_status() {
+        let test_response = json!({
+            "object": "list",
+            "data": [
+                {
+                    "id": "model1",
+                    "name": "Model 1",
+                    "status": "downloaded"
+                },
+                {
+                    "id": "model2",
+                    "name": "Model 2",
+                    "status": "available"
+                },
+                {
+                    "id": "model3",
+                    "name": "Model 3"
+                }
+            ]
+        });
+
+        let response_bytes = serde_json::to_vec(&test_response).unwrap();
+        let filtered_bytes = filter_models_response(&response_bytes).unwrap();
+        let filtered_response: serde_json::Value = serde_json::from_slice(&filtered_bytes).unwrap();
+
+        let data = filtered_response["data"].as_array().unwrap();
+        assert_eq!(data.len(), 1); // Should have 1 model (only model1 with "downloaded" status)
+
+        // Verify only model1 (with "downloaded" status) is kept
+        assert!(data.iter().any(|model| model["id"] == "model1"));
+
+        // Verify model2 and model3 are filtered out
+        assert!(!data.iter().any(|model| model["id"] == "model2"));
+        assert!(!data.iter().any(|model| model["id"] == "model3"));
+    }
+
+    #[test]
+    fn test_filter_models_response_direct_array() {
+        let test_response = json!([
+            {
+                "id": "model1",
+                "name": "Model 1",
+                "status": "downloaded"
+            },
+            {
+                "id": "model2",
+                "name": "Model 2",
+                "status": "available"
+            }
+        ]);
+
+        let response_bytes = serde_json::to_vec(&test_response).unwrap();
+        let filtered_bytes = filter_models_response(&response_bytes).unwrap();
+        let filtered_response: serde_json::Value = serde_json::from_slice(&filtered_bytes).unwrap();
+
+        let data = filtered_response.as_array().unwrap();
+        assert_eq!(data.len(), 1); // Should have 1 model (only model1 with "downloaded" status)
+        assert!(data.iter().any(|model| model["id"] == "model1"));
+        assert!(!data.iter().any(|model| model["id"] == "model2"));
+    }
+
+    #[test]
+    fn test_filter_models_response_no_status_field() {
+        let test_response = json!({
+            "object": "list",
+            "data": [
+                {
+                    "id": "model1",
+                    "name": "Model 1"
+                },
+                {
+                    "id": "model2",
+                    "name": "Model 2"
+                }
+            ]
+        });
+
+        let response_bytes = serde_json::to_vec(&test_response).unwrap();
+        let filtered_bytes = filter_models_response(&response_bytes).unwrap();
+        let filtered_response: serde_json::Value = serde_json::from_slice(&filtered_bytes).unwrap();
+
+        let data = filtered_response["data"].as_array().unwrap();
+        assert_eq!(data.len(), 0); // Should remove all models when no status field (no "downloaded" status)
+    }
+
+    #[test]
+    fn test_filter_models_response_multiple_downloaded() {
+        let test_response = json!({
+            "object": "list",
+            "data": [
+                {
+                    "id": "model1",
+                    "name": "Model 1",
+                    "status": "downloaded"
+                },
+                {
+                    "id": "model2",
+                    "name": "Model 2",
+                    "status": "available"
+                },
+                {
+                    "id": "model3",
+                    "name": "Model 3",
+                    "status": "downloaded"
+                },
+                {
+                    "id": "model4",
+                    "name": "Model 4",
+                    "status": "installing"
+                }
+            ]
+        });
+
+        let response_bytes = serde_json::to_vec(&test_response).unwrap();
+        let filtered_bytes = filter_models_response(&response_bytes).unwrap();
+        let filtered_response: serde_json::Value = serde_json::from_slice(&filtered_bytes).unwrap();
+
+        let data = filtered_response["data"].as_array().unwrap();
+        assert_eq!(data.len(), 2); // Should have 2 models (model1 and model3 with "downloaded" status)
+
+        // Verify only models with "downloaded" status are kept
+        assert!(data.iter().any(|model| model["id"] == "model1"));
+        assert!(data.iter().any(|model| model["id"] == "model3"));
+
+        // Verify other models are filtered out
+        assert!(!data.iter().any(|model| model["id"] == "model2"));
+        assert!(!data.iter().any(|model| model["id"] == "model4"));
+    }
+}
diff --git a/src-tauri/src/core/setup.rs b/src-tauri/src/core/setup.rs
index 6883b2a3b..c2d3499f3 100644
--- a/src-tauri/src/core/setup.rs
+++ b/src-tauri/src/core/setup.rs
@@ -247,7 +247,10 @@ pub fn setup_sidecar(app: &App) -> Result<(), String> {
                 ]);
             #[cfg(target_os = "windows")]
             {
-                cmd = cmd.current_dir(app_handle_for_spawn.path().resource_dir().unwrap());
+                let resource_dir = app_handle_for_spawn.path().resource_dir().unwrap();
+                let normalized_path = resource_dir.to_string_lossy().replace(r"\\?\", "");
+                let normalized_pathbuf = PathBuf::from(normalized_path);
+                cmd = cmd.current_dir(normalized_pathbuf);
             }
 
             #[cfg(not(target_os = "windows"))]
@@ -291,6 +294,7 @@ pub fn setup_sidecar(app: &App) -> Result<(), String> {
                 } else {
                     log::warn!("Kill event received, but no active sidecar process found to kill.");
                 }
+                clean_up()
             });
         });
 
diff --git a/src-tauri/src/core/state.rs b/src-tauri/src/core/state.rs
index cb6a5d3fa..9957ba92e 100644
--- a/src-tauri/src/core/state.rs
+++ b/src-tauri/src/core/state.rs
@@ -4,6 +4,10 @@ use crate::core::utils::download::DownloadManagerState;
 use rand::{distributions::Alphanumeric, Rng};
 use rmcp::{service::RunningService, RoleClient};
 use tokio::sync::Mutex;
+use tokio::task::JoinHandle;
+
+/// Server handle type for managing the proxy server lifecycle
+pub type ServerHandle = JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>>;
 
 #[derive(Default)]
 pub struct AppState {
@@ -12,6 +16,7 @@ pub struct AppState {
     pub download_manager: Arc<Mutex<DownloadManagerState>>,
     pub cortex_restart_count: Arc<Mutex<u32>>,
     pub cortex_killed_intentionally: Arc<Mutex<bool>>,
+    pub server_handle: Arc<Mutex<Option<ServerHandle>>>,
 }
 pub fn generate_app_token() -> String {
     rand::thread_rng()
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index 076984106..4ed6ecee7 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -55,6 +55,7 @@ pub fn run() {
             core::cmd::app_token,
             core::cmd::start_server,
             core::cmd::stop_server,
+            core::cmd::get_server_status,
             core::cmd::read_logs,
             core::cmd::change_app_data_folder,
             core::cmd::reset_cortex_restart_count,
@@ -92,6 +93,7 @@ pub fn run() {
             download_manager: Arc::new(Mutex::new(DownloadManagerState::default())),
             cortex_restart_count: Arc::new(Mutex::new(0)),
             cortex_killed_intentionally: Arc::new(Mutex::new(false)),
+            server_handle: Arc::new(Mutex::new(None)),
         })
         .setup(|app| {
             app.handle().plugin(
diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json
index 1810c0661..04191e842 100644
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@@ -45,7 +45,7 @@
         ],
         "img-src": "'self' asset: http://asset.localhost blob: data: https://cdn.jsdelivr.net",
         "style-src": "'unsafe-inline' 'self' https://fonts.googleapis.com",
-        "script-src": "'self' asset: $APPDATA/**.* http://asset.localhost"
+        "script-src": "'self' asset: $APPDATA/**.* http://asset.localhost https://eu-assets.i.posthog.com https://posthog.com"
       },
       "assetProtocol": {
         "enable": true,
diff --git a/web-app/package.json b/web-app/package.json
index ad82e5688..8b3193817 100644
--- a/web-app/package.json
+++ b/web-app/package.json
@@ -65,7 +65,7 @@
     "remark-math": "^6.0.0",
     "sonner": "^2.0.3",
     "tailwindcss": "^4.1.4",
-    "token.js": "npm:token.js-fork@0.7.5",
+    "token.js": "npm:token.js-fork@0.7.9",
     "tw-animate-css": "^1.2.7",
     "ulidx": "^2.4.1",
     "unified": "^11.0.5",
diff --git a/web-app/src/containers/DownloadManegement.tsx b/web-app/src/containers/DownloadManegement.tsx
index 47b448485..db78181c6 100644
--- a/web-app/src/containers/DownloadManegement.tsx
+++ b/web-app/src/containers/DownloadManegement.tsx
@@ -19,7 +19,13 @@ export function DownloadManagement() {
   const { setProviders } = useModelProvider()
   const { open: isLeftPanelOpen } = useLeftPanel()
   const [isPopoverOpen, setIsPopoverOpen] = useState(false)
-  const { downloads, updateProgress, removeDownload } = useDownloadStore()
+  const {
+    downloads,
+    updateProgress,
+    localDownloadingModels,
+    removeDownload,
+    removeLocalDownloadingModel,
+  } = useDownloadStore()
   const { updateState } = useAppUpdater()
 
   const [appUpdateState, setAppUpdateState] = useState({
@@ -76,23 +82,36 @@ export function DownloadManagement() {
     })
   }, [])
 
+  const downloadProcesses = useMemo(() => {
+    // Get downloads with progress data
+    const downloadsWithProgress = Object.values(downloads).map((download) => ({
+      id: download.name,
+      name: download.name,
+      progress: download.progress,
+      current: download.current,
+      total: download.total,
+    }))
+
+    // Add local downloading models that don't have progress data yet
+    const localDownloadsWithoutProgress = Array.from(localDownloadingModels)
+      .filter((modelId) => !downloads[modelId]) // Only include models not in downloads
+      .map((modelId) => ({
+        id: modelId,
+        name: modelId,
+        progress: 0,
+        current: 0,
+        total: 0,
+      }))
+
+    return [...downloadsWithProgress, ...localDownloadsWithoutProgress]
+  }, [downloads, localDownloadingModels])
+
   const downloadCount = useMemo(() => {
-    const modelDownloads = Object.keys(downloads).length
+    const modelDownloads = downloadProcesses.length
     const appUpdateDownload = appUpdateState.isDownloading ? 1 : 0
     const total = modelDownloads + appUpdateDownload
     return total
-  }, [downloads, appUpdateState.isDownloading])
-  const downloadProcesses = useMemo(
-    () =>
-      Object.values(downloads).map((download) => ({
-        id: download.name,
-        name: download.name,
-        progress: download.progress,
-        current: download.current,
-        total: download.total,
-      })),
-    [downloads]
-  )
+  }, [downloadProcesses, appUpdateState.isDownloading])
 
   const overallProgress = useMemo(() => {
     const modelTotal = downloadProcesses.reduce((acc, download) => {
@@ -139,29 +158,32 @@ export function DownloadManagement() {
     (state: DownloadState) => {
       console.debug('onFileDownloadError', state)
       removeDownload(state.modelId)
+      removeLocalDownloadingModel(state.modelId)
     },
-    [removeDownload]
+    [removeDownload, removeLocalDownloadingModel]
   )
 
   const onFileDownloadStopped = useCallback(
     (state: DownloadState) => {
       console.debug('onFileDownloadError', state)
       removeDownload(state.modelId)
+      removeLocalDownloadingModel(state.modelId)
     },
-    [removeDownload]
+    [removeDownload, removeLocalDownloadingModel]
   )
 
   const onFileDownloadSuccess = useCallback(
     async (state: DownloadState) => {
       console.debug('onFileDownloadSuccess', state)
       removeDownload(state.modelId)
+      removeLocalDownloadingModel(state.modelId)
       getProviders().then(setProviders)
       toast.success('Download Complete', {
         id: 'download-complete',
         description: `The model ${state.modelId} has been downloaded`,
       })
     },
-    [removeDownload, setProviders]
+    [removeDownload, removeLocalDownloadingModel, setProviders]
   )
 
   useEffect(() => {
@@ -264,12 +286,16 @@ export function DownloadManagement() {
                     />
                     <p className="text-main-view-fg/60 text-xs">
                       {`${renderGB(appUpdateState.downloadedBytes)} / ${renderGB(appUpdateState.totalBytes)}`}{' '}
-                      GB ({Math.round(appUpdateState.downloadProgress * 100)}%)
+                      GB ({Math.round(appUpdateState.downloadProgress * 100)}
+                      %)
                     </p>
                   </div>
                 )}
                 {downloadProcesses.map((download) => (
-                  <div className="bg-main-view-fg/4 rounded-md p-2">
+                  <div
+                    key={download.id}
+                    className="bg-main-view-fg/4 rounded-md p-2"
+                  >
                     <div className="flex items-center justify-between">
                       <p className="truncate text-main-view-fg/80">
                         {download.name}
@@ -299,8 +325,9 @@ export function DownloadManagement() {
                       className="my-2"
                     />
                     <p className="text-main-view-fg/60 text-xs">
-                      {`${renderGB(download.current)} / ${renderGB(download.total)}`}{' '}
-                      GB ({Math.round(download.progress * 100)}%)
+                      {download.total > 0
+                        ? `${renderGB(download.current)} / ${renderGB(download.total)} GB (${Math.round(download.progress * 100)}%)`
+                        : 'Initializing download...'}
                     </p>
                   </div>
                 ))}
diff --git a/web-app/src/containers/DropdownModelProvider.tsx b/web-app/src/containers/DropdownModelProvider.tsx
index 6d5d24155..0747a1ad1 100644
--- a/web-app/src/containers/DropdownModelProvider.tsx
+++ b/web-app/src/containers/DropdownModelProvider.tsx
@@ -16,6 +16,7 @@ import { ModelSetting } from '@/containers/ModelSetting'
 import ProvidersAvatar from '@/containers/ProvidersAvatar'
 import { Fzf } from 'fzf'
 import { localStorageKey } from '@/constants/localStorage'
+import { isProd } from '@/lib/version'
 
 type DropdownModelProviderProps = {
   model?: ThreadModel
@@ -390,17 +391,12 @@ const DropdownModelProvider = ({
                               )}
                             >
                               <div className="flex items-center gap-2 flex-1 min-w-0">
-                                <span
-                                  className="truncate text-main-view-fg/80 text-sm"
-                                  dangerouslySetInnerHTML={{
-                                    __html:
-                                      searchableModel.highlightedId ||
-                                      searchableModel.model.id,
-                                  }}
-                                />
+                                <span className="truncate text-main-view-fg/80 text-sm">
+                                  {searchableModel.model.id}
+                                </span>
 
                                 <div className="flex-1"></div>
-                                {capabilities.length > 0 && (
+                                {!isProd && capabilities.length > 0 && (
                                   <div className="flex-shrink-0 -mr-1.5">
                                     <Capabilities capabilities={capabilities} />
                                   </div>
diff --git a/web-app/src/containers/SetupScreen.tsx b/web-app/src/containers/SetupScreen.tsx
index 6f3f07873..cf8e32c84 100644
--- a/web-app/src/containers/SetupScreen.tsx
+++ b/web-app/src/containers/SetupScreen.tsx
@@ -3,6 +3,7 @@ import { useModelProvider } from '@/hooks/useModelProvider'
 import { Link } from '@tanstack/react-router'
 import { route } from '@/constants/routes'
 import HeaderPage from './HeaderPage'
+import { isProd } from '@/lib/version'
 
 function SetupScreen() {
   const { providers } = useModelProvider()
@@ -19,7 +20,7 @@ function SetupScreen() {
               Welcome to Jan
             </h1>
             <p className="text-main-view-fg/70 text-lg mt-2">
-              To get started, you’ll need to either download a local AI model or
+              To get started, you'll need to either download a local AI model or
               connect to a cloud model using an API key
             </p>
           </div>
@@ -29,7 +30,7 @@ function SetupScreen() {
                 <Link
                   to={route.hub}
                   search={{
-                    step: 'setup_local_provider',
+                    ...(!isProd ? { step: 'setup_local_provider' } : {}),
                   }}
                 >
                   <div>
diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx
index af359d32e..076327ea6 100644
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@@ -358,12 +358,15 @@ export const ThreadContent = memo(
             ) : null}
 
             {!isToolCalls && (
-              <div className="flex items-center gap-2 mt-2 text-main-view-fg/60 text-xs">
+              <div className="flex items-center gap-2 text-main-view-fg/60 text-xs">
                 <div className={cn('flex items-center gap-2')}>
                   <div
                     className={cn(
                       'flex items-center gap-2',
-                      item.isLastMessage && streamingContent && 'hidden'
+                      item.isLastMessage &&
+                        streamingContent &&
+                        streamingContent.thread_id === item.thread_id &&
+                        'hidden'
                     )}
                   >
                     <CopyButton text={item.content?.[0]?.text.value || ''} />
@@ -417,17 +420,6 @@ export const ThreadContent = memo(
                               />
                             </div>
                           </div>
-                          <DialogFooter className="mt-2 flex items-center">
-                            <DialogClose asChild>
-                              <Button
-                                variant="link"
-                                size="sm"
-                                className="hover:no-underline"
-                              >
-                                Close
-                              </Button>
-                            </DialogClose>
-                          </DialogFooter>
                         </DialogHeader>
                       </DialogContent>
                     </Dialog>
@@ -450,7 +442,11 @@ export const ThreadContent = memo(
                   </div>
 
                   <TokenSpeedIndicator
-                    streaming={Boolean(item.isLastMessage && streamingContent)}
+                    streaming={Boolean(
+                      item.isLastMessage &&
+                        streamingContent &&
+                        streamingContent.thread_id === item.thread_id
+                    )}
                     metadata={item.metadata}
                   />
                 </div>
@@ -458,6 +454,7 @@ export const ThreadContent = memo(
             )}
           </>
         )}
+
         {item.type === 'image_url' && image && (
           <div>
             <img
diff --git a/web-app/src/containers/ThreadList.tsx b/web-app/src/containers/ThreadList.tsx
index 44f8bbfc4..9e3df65c1 100644
--- a/web-app/src/containers/ThreadList.tsx
+++ b/web-app/src/containers/ThreadList.tsx
@@ -9,7 +9,6 @@ import {
 import {
   SortableContext,
   verticalListSortingStrategy,
-  arrayMove,
   useSortable,
 } from '@dnd-kit/sortable'
 import { CSS } from '@dnd-kit/utilities'
@@ -54,7 +53,7 @@ const SortableItem = memo(({ thread }: { thread: Thread }) => {
     transform,
     transition,
     isDragging,
-  } = useSortable({ id: thread.id })
+  } = useSortable({ id: thread.id, disabled: true })
 
   const style = {
     transform: CSS.Transform.toString(transform),
@@ -102,9 +101,7 @@ const SortableItem = memo(({ thread }: { thread: Thread }) => {
       )}
     >
       <div className="py-1 pr-2 truncate">
-        <span
-          dangerouslySetInnerHTML={{ __html: thread.title || 'New Thread' }}
-        />
+        <span>{thread.title || 'New Thread'}</span>
       </div>
       <div className="flex items-center">
         <DropdownMenu
@@ -263,18 +260,8 @@ type ThreadListProps = {
 }
 
 function ThreadList({ threads }: ThreadListProps) {
-  const { setThreads } = useThreads()
-
   const sortedThreads = useMemo(() => {
     return threads.sort((a, b) => {
-      // If both have order, sort by order (ascending, so lower order comes first)
-      if (a.order != null && b.order != null) {
-        return a.order - b.order
-      }
-      // If only one has order, prioritize the one with order (order comes first)
-      if (a.order != null) return -1
-      if (b.order != null) return 1
-      // If neither has order, sort by updated time (newer threads first)
       return (b.updated || 0) - (a.updated || 0)
     })
   }, [threads])
@@ -290,36 +277,7 @@ function ThreadList({ threads }: ThreadListProps) {
   )
 
   return (
-    <DndContext
-      sensors={sensors}
-      collisionDetection={closestCenter}
-      onDragEnd={(event) => {
-        const { active, over } = event
-        if (active.id !== over?.id && over) {
-          // Access Global State
-          const allThreadsMap = useThreads.getState().threads
-          const allThreadsArray = Object.values(allThreadsMap)
-
-          // Calculate Global Indices
-          const oldIndexInGlobal = allThreadsArray.findIndex(
-            (t) => t.id === active.id
-          )
-          const newIndexInGlobal = allThreadsArray.findIndex(
-            (t) => t.id === over.id
-          )
-
-          // Reorder Globally and Update State
-          if (oldIndexInGlobal !== -1 && newIndexInGlobal !== -1) {
-            const reorderedGlobalThreads = arrayMove(
-              allThreadsArray,
-              oldIndexInGlobal,
-              newIndexInGlobal
-            )
-            setThreads(reorderedGlobalThreads)
-          }
-        }
-      }}
-    >
+    <DndContext sensors={sensors} collisionDetection={closestCenter}>
       <SortableContext
         items={sortedThreads.map((t) => t.id)}
         strategy={verticalListSortingStrategy}
diff --git a/web-app/src/containers/TokenSpeedIndicator.tsx b/web-app/src/containers/TokenSpeedIndicator.tsx
index 5309d890c..ea9f91be0 100644
--- a/web-app/src/containers/TokenSpeedIndicator.tsx
+++ b/web-app/src/containers/TokenSpeedIndicator.tsx
@@ -1,4 +1,5 @@
 import { useAppState } from '@/hooks/useAppState'
+import { toNumber } from '@/utils/number'
 import { Gauge } from 'lucide-react'
 
 interface TokenSpeedIndicatorProps {
@@ -11,16 +12,27 @@ export const TokenSpeedIndicator = ({
   streaming,
 }: TokenSpeedIndicatorProps) => {
   const { tokenSpeed } = useAppState()
-  const persistedTokenSpeed = (metadata?.tokenSpeed as { tokenSpeed: number })
-    ?.tokenSpeed
+  const persistedTokenSpeed =
+    (metadata?.tokenSpeed as { tokenSpeed: number })?.tokenSpeed || 0
+
+  const nonStreamingAssistantParam =
+    typeof metadata?.assistant === 'object' &&
+    metadata?.assistant !== null &&
+    'parameters' in metadata.assistant
+      ? (metadata.assistant as { parameters?: { stream?: boolean } }).parameters
+          ?.stream === false
+      : undefined
+
+  if (nonStreamingAssistantParam) return
 
   return (
     <div className="flex items-center gap-1 text-main-view-fg/60 text-xs">
       <Gauge size={16} />
-
       <span>
         {Math.round(
-          streaming ? Number(tokenSpeed?.tokenSpeed) : persistedTokenSpeed
+          streaming
+            ? toNumber(tokenSpeed?.tokenSpeed)
+            : toNumber(persistedTokenSpeed)
         )}
         &nbsp;tokens/sec
       </span>
diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts
index 4b0e3e6e8..2c8f9fd2a 100644
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@@ -180,10 +180,6 @@ export const useChat = () => {
       if (updatedProvider)
         await restartModel(updatedProvider, model.id, controller)
 
-      console.log(
-        updatedProvider?.models.find((e) => e.id === model.id)?.settings
-          ?.ctx_len?.controller_props.value
-      )
       return updatedProvider
     },
     [getProviderByName, restartModel, updateProvider]
diff --git a/web-app/src/hooks/useDownloadStore.ts b/web-app/src/hooks/useDownloadStore.ts
index 8a0e6ac19..48a5a347c 100644
--- a/web-app/src/hooks/useDownloadStore.ts
+++ b/web-app/src/hooks/useDownloadStore.ts
@@ -11,6 +11,7 @@ export interface DownloadProgressProps {
 // Zustand store for thinking block state
 export type DownloadState = {
   downloads: { [id: string]: DownloadProgressProps }
+  localDownloadingModels: Set<string>
   removeDownload: (id: string) => void
   updateProgress: (
     id: string,
@@ -19,6 +20,8 @@ export type DownloadState = {
     current?: number,
     total?: number
   ) => void
+  addLocalDownloadingModel: (modelId: string) => void
+  removeLocalDownloadingModel: (modelId: string) => void
 }
 
 /**
@@ -26,6 +29,7 @@ export type DownloadState = {
  */
 export const useDownloadStore = create<DownloadState>((set) => ({
   downloads: {},
+  localDownloadingModels: new Set(),
   removeDownload: (id: string) =>
     set((state) => {
       // eslint-disable-next-line @typescript-eslint/no-unused-vars
@@ -46,4 +50,18 @@ export const useDownloadStore = create<DownloadState>((set) => ({
         },
       },
     })),
+
+  addLocalDownloadingModel: (modelId: string) =>
+    set((state) => ({
+      localDownloadingModels: new Set(state.localDownloadingModels).add(
+        modelId
+      ),
+    })),
+
+  removeLocalDownloadingModel: (modelId: string) =>
+    set((state) => {
+      const newSet = new Set(state.localDownloadingModels)
+      newSet.delete(modelId)
+      return { localDownloadingModels: newSet }
+    }),
 }))
diff --git a/web-app/src/hooks/useHardware.ts b/web-app/src/hooks/useHardware.ts
index 7ad067cc8..16e83a7a5 100644
--- a/web-app/src/hooks/useHardware.ts
+++ b/web-app/src/hooks/useHardware.ts
@@ -87,8 +87,17 @@ interface HardwareStore {
   // Update RAM available
   updateRAMAvailable: (available: number) => void
 
-  // Toggle GPU activation
-  toggleGPUActivation: (index: number) => void
+  // Toggle GPU activation (async, with loading)
+  toggleGPUActivation: (index: number) => Promise<void>
+
+  // GPU loading state
+  gpuLoading: { [index: number]: boolean }
+  setGpuLoading: (index: number, loading: boolean) => void
+
+  // Polling control
+  pollingPaused: boolean
+  pausePolling: () => void
+  resumePolling: () => void
 
   // Reorder GPUs
   reorderGPUs: (oldIndex: number, newIndex: number) => void
@@ -96,8 +105,16 @@ interface HardwareStore {
 
 export const useHardware = create<HardwareStore>()(
   persist(
-    (set) => ({
+    (set, get) => ({
       hardwareData: defaultHardwareData,
+      gpuLoading: {},
+      pollingPaused: false,
+      setGpuLoading: (index, loading) =>
+        set((state) => ({
+          gpuLoading: { ...state.gpuLoading, [state.hardwareData.gpus[index].uuid]: loading },
+        })),
+      pausePolling: () => set({ pollingPaused: true }),
+      resumePolling: () => set({ pollingPaused: false }),
 
       setCPU: (cpu) =>
         set((state) => ({
@@ -172,25 +189,34 @@ export const useHardware = create<HardwareStore>()(
           },
         })),
 
-      toggleGPUActivation: (index) => {
-        set((state) => {
-          const newGPUs = [...state.hardwareData.gpus]
-          if (index >= 0 && index < newGPUs.length) {
-            newGPUs[index] = {
-              ...newGPUs[index],
-              activated: !newGPUs[index].activated,
+      toggleGPUActivation: async (index) => {
+        const { pausePolling, setGpuLoading, resumePolling } = get();
+        pausePolling();
+        setGpuLoading(index, true);
+        try {
+          await new Promise((resolve) => setTimeout(resolve, 200)); // Simulate async, replace with real API if needed
+          set((state) => {
+            const newGPUs = [...state.hardwareData.gpus];
+            if (index >= 0 && index < newGPUs.length) {
+              newGPUs[index] = {
+                ...newGPUs[index],
+                activated: !newGPUs[index].activated,
+              };
             }
-          }
-          setActiveGpus({
-            gpus: newGPUs.filter((e) => e.activated).map((e) => parseInt(e.id)),
-          })
-          return {
-            hardwareData: {
-              ...state.hardwareData,
-              gpus: newGPUs,
-            },
-          }
-        })
+            setActiveGpus({
+              gpus: newGPUs.filter((e) => e.activated).map((e) => parseInt(e.id)),
+            });
+            return {
+              hardwareData: {
+                ...state.hardwareData,
+                gpus: newGPUs,
+              },
+            };
+          });
+        } finally {
+          setGpuLoading(index, false);
+          setTimeout(resumePolling, 1000); // Resume polling after 1s
+        }
       },
 
       reorderGPUs: (oldIndex, newIndex) =>
diff --git a/web-app/src/hooks/useMessages.ts b/web-app/src/hooks/useMessages.ts
index 3a83b5a48..3841a50b2 100644
--- a/web-app/src/hooks/useMessages.ts
+++ b/web-app/src/hooks/useMessages.ts
@@ -34,7 +34,13 @@ export const useMessages = create<MessageState>()((set, get) => ({
       created_at: message.created_at || Date.now(),
       metadata: {
         ...message.metadata,
-        assistant: currentAssistant,
+        assistant: {
+          id: currentAssistant?.id || '',
+          name: currentAssistant?.name || '',
+          avatar: currentAssistant?.avatar || '',
+          instructions: currentAssistant?.instructions || '',
+          parameters: currentAssistant?.parameters || '',
+        },
       },
     }
     createMessage(newMessage).then((createdMessage) => {
diff --git a/web-app/src/hooks/useThreads.ts b/web-app/src/hooks/useThreads.ts
index 806749b73..e81b48e8e 100644
--- a/web-app/src/hooks/useThreads.ts
+++ b/web-app/src/hooks/useThreads.ts
@@ -2,7 +2,7 @@ import { create } from 'zustand'
 import { ulid } from 'ulidx'
 import { createThread, deleteThread, updateThread } from '@/services/threads'
 import { Fzf } from 'fzf'
-import { highlightFzfMatch } from '../utils/highlight'
+
 type ThreadState = {
   threads: Record<string, Thread>
   currentThreadId?: string
@@ -32,13 +32,6 @@ export const useThreads = create<ThreadState>()((set, get) => ({
   threads: {},
   searchIndex: null,
   setThreads: (threads) => {
-    threads.forEach((thread, index) => {
-      thread.order = index + 1
-      updateThread({
-        ...thread,
-        order: index + 1,
-      })
-    })
     const threadMap = threads.reduce(
       (acc: Record<string, Thread>, thread) => {
         acc[thread.id] = thread
@@ -75,12 +68,10 @@ export const useThreads = create<ThreadState>()((set, get) => ({
     return fzfResults.map(
       (result: { item: Thread; positions: Set<number> }) => {
         const thread = result.item // Fzf stores the original item here
-        // Ensure result.positions is an array, default to empty if undefined
-        const positions = Array.from(result.positions) || []
-        const highlightedTitle = highlightFzfMatch(thread.title, positions)
+
         return {
           ...thread,
-          title: highlightedTitle, // Override title with highlighted version
+          title: thread.title, // Override title with highlighted version
         }
       }
     )
@@ -159,7 +150,6 @@ export const useThreads = create<ThreadState>()((set, get) => ({
       id: ulid(),
       title: title ?? 'New Thread',
       model,
-      // order: 1, // Will be set properly by setThreads
       updated: Date.now() / 1000,
       assistants: assistant ? [assistant] : [],
     }
@@ -244,44 +234,14 @@ export const useThreads = create<ThreadState>()((set, get) => ({
       const thread = state.threads[threadId]
       if (!thread) return state
 
-      // If the thread is already at order 1, just update the timestamp
-      if (thread.order === 1) {
-        const updatedThread = {
-          ...thread,
-          updated: Date.now() / 1000,
-        }
-        updateThread(updatedThread)
-
-        return {
-          threads: {
-            ...state.threads,
-            [threadId]: updatedThread,
-          },
-        }
-      }
-
       // Update the thread with new timestamp and set it to order 1 (top)
       const updatedThread = {
         ...thread,
         updated: Date.now() / 1000,
-        order: 1,
       }
 
       // Update all other threads to increment their order by 1
       const updatedThreads = { ...state.threads }
-      Object.keys(updatedThreads).forEach((id) => {
-        if (id !== threadId) {
-          const otherThread = updatedThreads[id]
-          updatedThreads[id] = {
-            ...otherThread,
-            order: (otherThread.order || 1) + 1,
-          }
-          // Update the backend for other threads
-          updateThread(updatedThreads[id])
-        }
-      })
-
-      // Set the updated thread
       updatedThreads[threadId] = updatedThread
 
       // Update the backend for the main thread
diff --git a/web-app/src/index.css b/web-app/src/index.css
index 185cb0612..d8ae284e9 100644
--- a/web-app/src/index.css
+++ b/web-app/src/index.css
@@ -79,6 +79,15 @@
   ::-ms-reveal {
     display: none;
   }
+
+  .reset-heading {
+    :is(h1, h2, h3, h4, h5, h6) {
+      font-weight: 600;
+      font-size: 14px !important;
+      margin-top: 0 !important;
+      margin-bottom: 0.5em;
+    }
+  }
 }
 
 @layer utilities {
diff --git a/web-app/src/lib/version.ts b/web-app/src/lib/version.ts
index f8e7df5b0..fbbe217b3 100644
--- a/web-app/src/lib/version.ts
+++ b/web-app/src/lib/version.ts
@@ -2,4 +2,4 @@ import { isDev } from './utils'
 
 export const isNightly = VERSION.includes('-')
 export const isBeta = VERSION.includes('beta')
-export const isProd = !isNightly && !isBeta && !isDev
+export const isProd = !isNightly && !isBeta && !isDev()
diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx
index c4327fb04..3bca8649f 100644
--- a/web-app/src/routes/hub.tsx
+++ b/web-app/src/routes/hub.tsx
@@ -182,7 +182,8 @@ function Hub() {
     }
   }
 
-  const { downloads } = useDownloadStore()
+  const { downloads, localDownloadingModels, addLocalDownloadingModel } =
+    useDownloadStore()
 
   const downloadProcesses = useMemo(
     () =>
@@ -225,7 +226,9 @@ function Hub() {
         model.models.find((e) =>
           defaultModelQuantizations.some((m) => e.id.toLowerCase().includes(m))
         )?.id ?? model.models[0]?.id
-      const isDownloading = downloadProcesses.some((e) => e.id === modelId)
+      const isDownloading =
+        localDownloadingModels.has(modelId) ||
+        downloadProcesses.some((e) => e.id === modelId)
       const downloadProgress =
         downloadProcesses.find((e) => e.id === modelId)?.progress || 0
       const isDownloaded = llamaProvider?.models.some(
@@ -233,6 +236,12 @@ function Hub() {
       )
       const isRecommended = isRecommendedModel(model.metadata?.id)
 
+      const handleDownload = () => {
+        // Immediately set local downloading state
+        addLocalDownloadingModel(modelId)
+        downloadModel(modelId)
+      }
+
       return (
         <div
           className={cn(
@@ -255,7 +264,7 @@ function Hub() {
           ) : (
             <Button
               size="sm"
-              onClick={() => downloadModel(modelId)}
+              onClick={handleDownload}
               className={cn(isDownloading && 'hidden')}
               ref={isRecommended ? downloadButtonRef : undefined}
             >
@@ -271,6 +280,8 @@ function Hub() {
     handleUseModel,
     isRecommendedModel,
     downloadButtonRef,
+    localDownloadingModels,
+    addLocalDownloadingModel,
   ])
 
   const { step } = useSearch({ from: Route.id })
@@ -320,7 +331,8 @@ function Hub() {
   }
 
   // Check if any model is currently downloading
-  const isDownloading = downloadProcesses.length > 0
+  const isDownloading =
+    localDownloadingModels.size > 0 || downloadProcesses.length > 0
 
   const steps = [
     {
@@ -483,7 +495,7 @@ function Hub() {
                         <div className="line-clamp-2 mt-3 text-main-view-fg/60">
                           <RenderMarkdown
                             enableRawHtml={true}
-                            className="select-none"
+                            className="select-none reset-heading"
                             components={{
                               a: ({ ...props }) => (
                                 <a
@@ -553,6 +565,9 @@ function Hub() {
                                       </p>
                                       {(() => {
                                         const isDownloading =
+                                          localDownloadingModels.has(
+                                            variant.id
+                                          ) ||
                                           downloadProcesses.some(
                                             (e) => e.id === variant.id
                                           )
@@ -607,9 +622,12 @@ function Hub() {
                                           <div
                                             className="size-6 cursor-pointer flex items-center justify-center rounded hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
                                             title="Download model"
-                                            onClick={() =>
+                                            onClick={() => {
+                                              addLocalDownloadingModel(
+                                                variant.id
+                                              )
                                               downloadModel(variant.id)
-                                            }
+                                            }}
                                           >
                                             <IconDownload
                                               size={16}
diff --git a/web-app/src/routes/settings/general.tsx b/web-app/src/routes/settings/general.tsx
index eb7e28871..cfdd4f298 100644
--- a/web-app/src/routes/settings/general.tsx
+++ b/web-app/src/routes/settings/general.tsx
@@ -45,6 +45,7 @@ import { isDev } from '@/lib/utils'
 import { emit } from '@tauri-apps/api/event'
 import { stopAllModels } from '@/services/models'
 import { SystemEvent } from '@/types/events'
+import { isProd } from '@/lib/version'
 
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 export const Route = createFileRoute(route.settings.general as any)({
@@ -234,10 +235,12 @@ function General() {
                   </Button>
                 }
               />
-              <CardItem
-                title={t('common.language')}
-                actions={<LanguageSwitcher />}
-              />
+              {!isProd && (
+                <CardItem
+                  title={t('common.language')}
+                  actions={<LanguageSwitcher />}
+                />
+              )}
             </Card>
 
             {/* Data folder */}
diff --git a/web-app/src/routes/settings/hardware.tsx b/web-app/src/routes/settings/hardware.tsx
index 769997b7a..23f4eafef 100644
--- a/web-app/src/routes/settings/hardware.tsx
+++ b/web-app/src/routes/settings/hardware.tsx
@@ -49,7 +49,7 @@ function SortableGPUItem({ gpu, index }: { gpu: GPU; index: number }) {
     isDragging,
   } = useSortable({ id: gpu.id || index })
 
-  const { toggleGPUActivation } = useHardware()
+  const { toggleGPUActivation, gpuLoading } = useHardware()
 
   const style = {
     transform: CSS.Transform.toString(transform),
@@ -78,6 +78,7 @@ function SortableGPUItem({ gpu, index }: { gpu: GPU; index: number }) {
           <div className="flex items-center gap-4">
             <Switch
               checked={gpu.activated}
+              disabled={!!gpuLoading[index]}
               onCheckedChange={() => toggleGPUActivation(index)}
             />
           </div>
@@ -122,6 +123,7 @@ function Hardware() {
     updateCPUUsage,
     updateRAMAvailable,
     reorderGPUs,
+    pollingPaused,
   } = useHardware()
   const { vulkanEnabled, setVulkanEnabled } = useVulkan()
 
@@ -155,16 +157,16 @@ function Hardware() {
   }
 
   useEffect(() => {
+    if (pollingPaused) return;
     const intervalId = setInterval(() => {
       getHardwareInfo().then((data) => {
-        setHardwareData(data as unknown as HardwareData)
         updateCPUUsage(data.cpu.usage)
         updateRAMAvailable(data.ram.available)
       })
     }, 5000)
 
     return () => clearInterval(intervalId)
-  }, [setHardwareData, updateCPUUsage, updateRAMAvailable])
+  }, [setHardwareData, updateCPUUsage, updateRAMAvailable, pollingPaused])
 
   const handleClickSystemMonitor = async () => {
     try {
@@ -352,7 +354,7 @@ function Hardware() {
               <Card title="Vulkan">
                 <CardItem
                   title="Enable Vulkan"
-                  description="Use Vulkan API for GPU acceleration."
+                  description="Use Vulkan API for GPU acceleration. Do not enable Vulkan if you have an NVIDIA GPU as it may cause compatibility issues."
                   actions={
                     <div className="flex items-center gap-4">
                       <Switch
@@ -371,30 +373,34 @@ function Hardware() {
             )}
 
             {/* GPU Information */}
-            <Card title="GPUs">
-              {hardwareData.gpus.length > 0 ? (
-                <DndContext
-                  sensors={sensors}
-                  collisionDetection={closestCenter}
-                  onDragEnd={handleDragEnd}
-                >
-                  <SortableContext
-                    items={hardwareData.gpus.map((gpu) => gpu.id)}
-                    strategy={verticalListSortingStrategy}
+            {!IS_MACOS ? (
+              <Card title="GPUs">
+                {hardwareData.gpus.length > 0 ? (
+                  <DndContext
+                    sensors={sensors}
+                    collisionDetection={closestCenter}
+                    onDragEnd={handleDragEnd}
                   >
-                    {hardwareData.gpus.map((gpu, index) => (
-                      <SortableGPUItem
-                        key={gpu.id || index}
-                        gpu={gpu}
-                        index={index}
-                      />
-                    ))}
-                  </SortableContext>
-                </DndContext>
-              ) : (
-                <CardItem title="No GPUs detected" actions={<></>} />
-              )}
-            </Card>
+                    <SortableContext
+                      items={hardwareData.gpus.map((gpu) => gpu.id)}
+                      strategy={verticalListSortingStrategy}
+                    >
+                      {hardwareData.gpus.map((gpu, index) => (
+                        <SortableGPUItem
+                          key={gpu.id || index}
+                          gpu={gpu}
+                          index={index}
+                        />
+                      ))}
+                    </SortableContext>
+                  </DndContext>
+                ) : (
+                  <CardItem title="No GPUs detected" actions={<></>} />
+                )}
+              </Card>
+            ) : (
+              <></>
+            )}
           </div>
         </div>
       </div>
diff --git a/web-app/src/routes/settings/local-api-server.tsx b/web-app/src/routes/settings/local-api-server.tsx
index dd7561be5..94f577074 100644
--- a/web-app/src/routes/settings/local-api-server.tsx
+++ b/web-app/src/routes/settings/local-api-server.tsx
@@ -17,7 +17,8 @@ import { windowKey } from '@/constants/windows'
 import { IconLogs } from '@tabler/icons-react'
 import { cn } from '@/lib/utils'
 import { ApiKeyInput } from '@/containers/ApiKeyInput'
-import { useState } from 'react'
+import { useEffect, useState } from 'react'
+import { invoke } from '@tauri-apps/api/core'
 
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 export const Route = createFileRoute(route.settings.local_api_server as any)({
@@ -44,6 +45,17 @@ function LocalAPIServer() {
     !apiKey || apiKey.toString().trim().length === 0
   )
 
+  useEffect(() => {
+    const checkServerStatus = async () => {
+      invoke('get_server_status').then((running) => {
+        if (running) {
+          setServerStatus('running')
+        }
+      })
+    }
+    checkServerStatus()
+  }, [setServerStatus])
+
   const handleApiKeyValidation = (isValid: boolean) => {
     setIsApiKeyEmpty(!isValid)
   }
diff --git a/web-app/src/routes/settings/providers/$providerName.tsx b/web-app/src/routes/settings/providers/$providerName.tsx
index 7ed4e3969..2daa496b7 100644
--- a/web-app/src/routes/settings/providers/$providerName.tsx
+++ b/web-app/src/routes/settings/providers/$providerName.tsx
@@ -39,6 +39,7 @@ import { toast } from 'sonner'
 import { ActiveModel } from '@/types/models'
 import { useEffect, useState } from 'react'
 import { predefinedProviders } from '@/mock/data'
+import { isProd } from '@/lib/version'
 
 // as route.threadsDetail
 export const Route = createFileRoute('/settings/providers/$providerName')({
@@ -454,15 +455,19 @@ function ProviderDetail() {
                           title={
                             <div className="flex items-center gap-2">
                               <h1 className="font-medium">{model.id}</h1>
-                              <Capabilities capabilities={capabilities} />
+                              {!isProd && (
+                                <Capabilities capabilities={capabilities} />
+                              )}
                             </div>
                           }
                           actions={
                             <div className="flex items-center gap-1">
-                              <DialogEditModel
-                                provider={provider}
-                                modelId={model.id}
-                              />
+                              {!isProd && (
+                                <DialogEditModel
+                                  provider={provider}
+                                  modelId={model.id}
+                                />
+                              )}
                               {model.settings && (
                                 <ModelSetting
                                   provider={provider}
diff --git a/web-app/src/utils/number.ts b/web-app/src/utils/number.ts
new file mode 100644
index 000000000..866755ed6
--- /dev/null
+++ b/web-app/src/utils/number.ts
@@ -0,0 +1,4 @@
+export const toNumber = (value: unknown): number => {
+  const num = Number(value)
+  return isNaN(num) ? 0 : num
+}