chore: store session_info in backend as well for API server(WIP)
This commit is contained in:
parent
ffef7b9cab
commit
dbdc031583
@ -842,6 +842,7 @@ export default class llamacpp_extension extends AIEngine {
|
||||
})
|
||||
|
||||
// Store the session info for later use
|
||||
console.log(sInfo)
|
||||
this.activeSessions.set(sInfo.pid, sInfo)
|
||||
await this.waitForModelLoad(sInfo)
|
||||
|
||||
|
||||
@ -8,6 +8,12 @@ use tokio::task::JoinHandle;
|
||||
/// Server handle type for managing the proxy server lifecycle
|
||||
pub type ServerHandle = JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>>;
|
||||
use tokio::{process::Child, sync::Mutex};
|
||||
use crate::core::utils::extensions::inference_llamacpp_extension::server::SessionInfo;
|
||||
|
||||
pub struct LLamaBackendSession {
|
||||
pub child: Child,
|
||||
pub info: SessionInfo,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct AppState {
|
||||
@ -18,7 +24,7 @@ pub struct AppState {
|
||||
pub mcp_active_servers: Arc<Mutex<HashMap<String, serde_json::Value>>>,
|
||||
pub mcp_successfully_connected: Arc<Mutex<HashMap<String, bool>>>,
|
||||
pub server_handle: Arc<Mutex<Option<ServerHandle>>>,
|
||||
pub llama_server_process: Arc<Mutex<HashMap<String, Child>>>,
|
||||
pub llama_server_process: Arc<Mutex<HashMap<String, LLamaBackendSession>>>,
|
||||
}
|
||||
pub fn generate_app_token() -> String {
|
||||
rand::thread_rng()
|
||||
|
||||
@ -5,7 +5,8 @@ pub async fn cleanup_processes(state: State<'_, AppState>) {
|
||||
let mut map = state.llama_server_process.lock().await;
|
||||
let pids: Vec<String> = map.keys().cloned().collect();
|
||||
for pid in pids {
|
||||
if let Some(mut child) = map.remove(&pid) {
|
||||
if let Some(session) = map.remove(&pid) {
|
||||
let mut child = session.child;
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
|
||||
@ -12,6 +12,7 @@ use tokio::time::timeout;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::core::state::AppState;
|
||||
use crate::core::state::LLamaBackendSession;
|
||||
|
||||
type HmacSha256 = Hmac<Sha256>;
|
||||
// Error type for server commands
|
||||
@ -41,7 +42,7 @@ impl serde::Serialize for ServerError {
|
||||
|
||||
type ServerResult<T> = Result<T, ServerError>;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SessionInfo {
|
||||
pub pid: String, // opaque handle for unload/chat
|
||||
pub port: String, // llama-server output port
|
||||
@ -151,18 +152,23 @@ pub async fn load_llama_model(
|
||||
});
|
||||
|
||||
log::info!("Server process started with PID: {}", pid);
|
||||
|
||||
// Store the child process handle in the state
|
||||
process_map.insert(pid.clone(), child);
|
||||
|
||||
let session_info = SessionInfo {
|
||||
pid: pid,
|
||||
pid: pid.clone(),
|
||||
port: port,
|
||||
model_id: model_id,
|
||||
model_path: model_path,
|
||||
api_key: api_key,
|
||||
};
|
||||
|
||||
// insert sesinfo to process_map
|
||||
process_map.insert(
|
||||
pid.clone(),
|
||||
LLamaBackendSession {
|
||||
child,
|
||||
info: session_info.clone(),
|
||||
},
|
||||
);
|
||||
|
||||
Ok(session_info)
|
||||
}
|
||||
|
||||
@ -173,7 +179,8 @@ pub async fn unload_llama_model(
|
||||
state: State<'_, AppState>,
|
||||
) -> ServerResult<UnloadResult> {
|
||||
let mut map = state.llama_server_process.lock().await;
|
||||
if let Some(mut child) = map.remove(&pid) {
|
||||
if let Some(session) = map.remove(&pid) {
|
||||
let mut child = session.child;
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user