diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index efd06cab5..4f710f568 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -76,7 +76,7 @@ export default class llamacpp_extension extends AIEngine { private downloadManager private activeSessions: Map = new Map() private modelsBasePath!: string - private activeRequests: Map = new Map() + private enginesPath!: string override async onLoad(): Promise { super.onLoad() // Calls registerEngine() from AIEngine @@ -89,8 +89,24 @@ export default class llamacpp_extension extends AIEngine { await getJanDataFolderPath(), 'models', ]) + + this.enginesPath = await joinPath([await getJanDataFolderPath(), 'llamacpp', 'engines']) } + override async onUnload(): Promise { + // Terminate all active sessions + for (const [sessionId, _] of this.activeSessions) { + try { + await this.unload(sessionId); + } catch (error) { + console.error(`Failed to unload session ${sessionId}:`, error); + } + } + + // Clear the sessions map + this.activeSessions.clear(); +} + // Implement the required LocalProvider interface methods override async list(): Promise { const modelsDir = await joinPath([this.modelsBasePath, this.provider]) @@ -335,6 +351,7 @@ export default class llamacpp_extension extends AIEngine { try { const sInfo = await invoke('load_llama_model', { + server_path: this.enginesPath, args: args, }) @@ -348,17 +365,17 @@ export default class llamacpp_extension extends AIEngine { } } - override async unload(opts: unloadOptions): Promise { + override async unload(sessionId: string): Promise { try { // Pass the PID as the session_id const result = await invoke('unload_llama_model', { - session_id: opts.sessionId, // Using PID as session ID + session_id: sessionId, // Using PID as session ID }) // If successful, remove from active sessions if (result.success) { - this.activeSessions.delete(opts.sessionId) - console.log(`Successfully unloaded model with PID ${opts.sessionId}`) + this.activeSessions.delete(sessionId) + console.log(`Successfully unloaded model with PID ${sessionId}`) } else { console.warn(`Failed to unload model: ${result.error}`) } @@ -496,8 +513,4 @@ export default class llamacpp_extension extends AIEngine { override getChatClient(sessionId: string): any { throw new Error('method not implemented yet') } - - onUnload(): void { - throw new Error('Method not implemented.') - } } diff --git a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs index 283d07849..77ba5187c 100644 --- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs +++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs @@ -37,33 +37,6 @@ impl serde::Serialize for ServerError { type ServerResult = Result; -// --- Helper function to find the server binary --- -// -- TODO: Adjust extension engine paths -// engine: static llama-server build (CUDA, VULKAN, SYCL, etc) -fn get_server_path(app_handle: &AppHandle) -> ServerResult { - let binary_name = if cfg!(windows) { - "llama-server.exe" - } else { - "llama-server" - }; - let relative_path = PathBuf::from("engines").join(binary_name); // TODO: ADJUST THIS PATH - - app_handle - .path() - .resolve(relative_path, BaseDirectory::Resource) - .map_err(|e| ServerError::ResourcePathError(e.to_string())) - // .ok_or_else(|| { - // ServerError::BinaryNotFound(format!( - // "Could not resolve resource path for '{}'", - // if cfg!(windows) { - // "engines/llama-server.exe" - // } else { - // "engines/llama-server" - // } // TODO: ADJUST THIS PATH - // )) - // }) -} - #[derive(Debug, Serialize, Deserialize)] pub struct SessionInfo { pub session_id: String, // opaque handle for unload/chat @@ -82,6 +55,7 @@ pub struct UnloadResult { pub async fn load_llama_model( app_handle: AppHandle, // Get the AppHandle state: State<'_, AppState>, // Access the shared state + server_path: String, args: Vec, // Arguments from the frontend ) -> ServerResult { let mut process_lock = state.llama_server_process.lock().await; @@ -91,7 +65,6 @@ pub async fn load_llama_model( return Err(ServerError::AlreadyRunning); } - let server_path = get_server_path(&app_handle)?; log::info!("Attempting to launch server at path: {:?}", server_path); log::info!("Using arguments: {:?}", args);