diff --git a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
index 338e8aa30..09f1b4578 100644
--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@@ -185,40 +185,76 @@ pub async fn load_llama_model(
 
     // Spawn task to monitor stdout for readiness
     let _stdout_task = tokio::spawn(async move {
-        let mut reader = BufReader::new(stdout).lines();
-        while let Ok(Some(line)) = reader.next_line().await {
-            log::info!("[llamacpp stdout] {}", line);
+        let mut reader = BufReader::new(stdout);
+        let mut byte_buffer = Vec::new();
+
+        loop {
+            byte_buffer.clear();
+            match reader.read_until(b'\n', &mut byte_buffer).await {
+                Ok(0) => break, // EOF
+                Ok(_) => {
+                    let line = String::from_utf8_lossy(&byte_buffer);
+                    let line = line.trim_end();
+                    if !line.is_empty() {
+                        log::info!("[llamacpp stdout] {}", line);
+                    }
+                }
+                Err(e) => {
+                    log::error!("Error reading stdout: {}", e);
+                    break;
+                }
+            }
         }
     });
 
     // Spawn task to capture stderr and monitor for errors
     let stderr_task = tokio::spawn(async move {
-        let mut reader = BufReader::new(stderr).lines();
+        let mut reader = BufReader::new(stderr);
+        let mut byte_buffer = Vec::new();
         let mut stderr_buffer = String::new();
-        while let Ok(Some(line)) = reader.next_line().await {
-            log::info!("[llamacpp] {}", line); // Using your log format
-            stderr_buffer.push_str(&line);
-            stderr_buffer.push('\n');
-            // Check for critical error indicators that should stop the process
-            // TODO: check for different errors
-            if line.to_lowercase().contains("error")
-                || line.to_lowercase().contains("failed")
-                || line.to_lowercase().contains("fatal")
-                || line.contains("CUDA error")
-                || line.contains("out of memory")
-                || line.contains("failed to load")
-            {
-                let _ = error_tx.send(line.clone()).await;
-            }
-            // Check for readiness indicator - llama-server outputs this when ready
-            else if line.contains("server is listening on")
-                || line.contains("starting the main loop")
-                || line.contains("server listening on")
-            {
-                log::info!("Server appears to be ready based on stdout: '{}'", line);
-                let _ = ready_tx.send(true).await;
+
+        loop {
+            byte_buffer.clear();
+            match reader.read_until(b'\n', &mut byte_buffer).await {
+                Ok(0) => break, // EOF
+                Ok(_) => {
+                    let line = String::from_utf8_lossy(&byte_buffer);
+                    let line = line.trim_end();
+
+                    if !line.is_empty() {
+                        stderr_buffer.push_str(line);
+                        stderr_buffer.push('\n');
+                        log::info!("[llamacpp] {}", line);
+
+                        // Check for critical error indicators that should stop the process
+                        let line_lower = line.to_string().to_lowercase();
+                        if line_lower.contains("error loading model")
+                            || line_lower.contains("unknown model architecture")
+                            || line_lower.contains("fatal")
+                            || line_lower.contains("cuda error")
+                            || line_lower.contains("out of memory")
+                            || line_lower.contains("error")
+                            || line_lower.contains("failed")
+                        {
+                            let _ = error_tx.send(line.to_string()).await;
+                        }
+                        // Check for readiness indicator - llama-server outputs this when ready
+                        else if line.contains("server is listening on")
+                            || line.contains("starting the main loop")
+                            || line.contains("server listening on")
+                        {
+                            log::info!("Server appears to be ready based on stderr: '{}'", line);
+                            let _ = ready_tx.send(true).await;
+                        }
+                    }
+                }
+                Err(e) => {
+                    log::error!("Error reading stderr: {}", e);
+                    break;
+                }
             }
         }
+
         stderr_buffer
     });
 
@@ -226,7 +262,7 @@ pub async fn load_llama_model(
     if let Some(status) = child.try_wait()? {
         if !status.success() {
             let stderr_output = stderr_task.await.unwrap_or_default();
-            log::error!("llama.cpp exited early with code {status:?}");
+            log::error!("llama.cpp exited early with code {:?}", status);
             log::error!("--- stderr ---\n{}", stderr_output);
             return Err(ServerError::LlamacppError(stderr_output.trim().to_string()));
         }
@@ -246,25 +282,43 @@ pub async fn load_llama_model(
             // Error occurred
             Some(error_msg) = error_rx.recv() => {
                 log::error!("Server encountered an error: {}", error_msg);
-                let _ = child.kill().await;
+
+                // Give process a moment to exit naturally
+                tokio::time::sleep(Duration::from_millis(100)).await;
+
+                // Check if process already exited
+                if let Some(status) = child.try_wait()? {
+                    log::info!("Process exited with code {:?}", status);
+                    return Err(ServerError::LlamacppError(error_msg));
+                } else {
+                    log::info!("Process still running, killing it...");
+                    let _ = child.kill().await;
+                }
+
                 // Get full stderr output
                 let stderr_output = stderr_task.await.unwrap_or_default();
                 return Err(ServerError::LlamacppError(format!("Error: {}\n\nFull stderr:\n{}", error_msg, stderr_output)));
             }
-            // Timeout
-            _ = tokio::time::sleep(Duration::from_millis(100)) => {
+            // Check for process exit more frequently
+            _ = tokio::time::sleep(Duration::from_millis(50)) => {
+                // Check if process exited
+                if let Some(status) = child.try_wait()? {
+                    let stderr_output = stderr_task.await.unwrap_or_default();
+                    if !status.success() {
+                        log::error!("llama.cpp exited with error code {:?}", status);
+                        return Err(ServerError::LlamacppError(format!("Process exited with code {:?}\n\nStderr:\n{}", status, stderr_output)));
+                    } else {
+                        log::error!("llama.cpp exited successfully but without ready signal");
+                        return Err(ServerError::LlamacppError(format!("Process exited unexpectedly\n\nStderr:\n{}", stderr_output)));
+                    }
+                }
+
+                // Timeout check
                 if start_time.elapsed() > timeout_duration {
                     log::error!("Timeout waiting for server to be ready");
                     let _ = child.kill().await;
-                    return Err(ServerError::LlamacppError("Server startup timeout".to_string()));
-                }
-                // Check if process is still alive
-                if let Some(status) = child.try_wait()? {
-                    if !status.success() {
-                        let stderr_output = stderr_task.await.unwrap_or_default();
-                        log::error!("llama.cpp exited during startup with code {status:?}");
-                        return Err(ServerError::LlamacppError(format!("Process exited with code {status:?}\n\nStderr:\n{}", stderr_output)));
-                    }
+                    let stderr_output = stderr_task.await.unwrap_or_default();
+                    return Err(ServerError::LlamacppError(format!("Server startup timeout\n\nStderr:\n{}", stderr_output)));
                 }
             }
         }
@@ -331,7 +385,10 @@ pub async fn unload_llama_model(
         #[cfg(all(windows, target_arch = "x86_64"))]
         {
             if let Some(raw_pid) = child.id() {
-                log::warn!("gracefully killing is unsupported on Windows, force-killing PID {}", raw_pid);
+                log::warn!(
+                    "gracefully killing is unsupported on Windows, force-killing PID {}",
+                    raw_pid
+                );
 
                 // Since we know a graceful shutdown doesn't work and there are no child processes
                 // to worry about, we can use `child.kill()` directly. On Windows, this is