diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
index 7681c967a..f4ad82f95 100644
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -1176,6 +1176,7 @@ export default class llamacpp_extension extends AIEngine {
       }
     }
     const args: string[] = []
+    const envs: Record<string, string> = {}
     const cfg = { ...this.config, ...(overrideSettings ?? {}) }
     const [version, backend] = cfg.version_backend.split('/')
     if (!version || !backend) {
@@ -1202,7 +1203,7 @@ export default class llamacpp_extension extends AIEngine {
     // disable llama-server webui
     args.push('--no-webui')
     const api_key = await this.generateApiKey(modelId, String(port))
-    args.push('--api-key', api_key)
+    envs["LLAMA_API_KEY"] = api_key
 
     // model option is required
     // NOTE: model_path and mmproj_path can be either relative to Jan's data folder or absolute path
@@ -1287,14 +1288,12 @@ export default class llamacpp_extension extends AIEngine {
 
     try {
       // TODO: add LIBRARY_PATH
-      const sInfo = await invoke<SessionInfo>(
-        'plugin:llamacpp|load_llama_model',
-        {
-          backendPath,
-          libraryPath,
-          args,
-        }
-      )
+      const sInfo = await invoke<SessionInfo>('plugin:llamacpp|load_llama_model', {
+        backendPath,
+        libraryPath,
+        args,
+        envs,
+      })
       return sInfo
     } catch (error) {
       logger.error('Error in load command:\n', error)
diff --git a/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs b/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs
index a2592f345..16590491e 100644
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs
@@ -1,6 +1,7 @@
 use base64::{engine::general_purpose, Engine as _};
 use hmac::{Hmac, Mac};
 use sha2::Sha256;
+use std::collections::HashMap;
 use std::process::Stdio;
 use std::time::Duration;
 use tauri::{Manager, Runtime, State};
@@ -42,6 +43,7 @@ pub async fn load_llama_model<R: Runtime>(
     backend_path: &str,
     library_path: Option<&str>,
     mut args: Vec<String>,
+    envs: HashMap<String, String>,
 ) -> ServerResult<SessionInfo> {
     let state: State<LlamacppState> = app_handle.state();
     let mut process_map = state.llama_server_process.lock().await;
@@ -54,12 +56,21 @@ pub async fn load_llama_model<R: Runtime>(
     let port = parse_port_from_args(&args);
     let model_path_pb = validate_model_path(&mut args)?;
 
-    let api_key = extract_arg_value(&args, "--api-key");
+    let api_key: String;
+
+    if let Some(api_value) = envs.get("LLAMA_API_KEY") {
+        api_key = api_value.to_string();
+    } else {
+        log::warn!("API key not provided");
+        api_key = "".to_string();
+    }
+
     let model_id = extract_arg_value(&args, "-a");
 
     // Configure the command to run the server
     let mut command = Command::new(backend_path);
     command.args(args);
+    command.envs(envs);
 
     setup_library_path(library_path, &mut command);
     command.stdout(Stdio::piped());