chore: store session_info in backend as well for API server(WIP)
This commit is contained in:
parent
ffef7b9cab
commit
dbdc031583
@ -842,6 +842,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
})
|
})
|
||||||
|
|
||||||
// Store the session info for later use
|
// Store the session info for later use
|
||||||
|
console.log(sInfo)
|
||||||
this.activeSessions.set(sInfo.pid, sInfo)
|
this.activeSessions.set(sInfo.pid, sInfo)
|
||||||
await this.waitForModelLoad(sInfo)
|
await this.waitForModelLoad(sInfo)
|
||||||
|
|
||||||
|
|||||||
@ -8,6 +8,12 @@ use tokio::task::JoinHandle;
|
|||||||
/// Server handle type for managing the proxy server lifecycle
|
/// Server handle type for managing the proxy server lifecycle
|
||||||
pub type ServerHandle = JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>>;
|
pub type ServerHandle = JoinHandle<Result<(), Box<dyn std::error::Error + Send + Sync>>>;
|
||||||
use tokio::{process::Child, sync::Mutex};
|
use tokio::{process::Child, sync::Mutex};
|
||||||
|
use crate::core::utils::extensions::inference_llamacpp_extension::server::SessionInfo;
|
||||||
|
|
||||||
|
pub struct LLamaBackendSession {
|
||||||
|
pub child: Child,
|
||||||
|
pub info: SessionInfo,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct AppState {
|
pub struct AppState {
|
||||||
@ -18,7 +24,7 @@ pub struct AppState {
|
|||||||
pub mcp_active_servers: Arc<Mutex<HashMap<String, serde_json::Value>>>,
|
pub mcp_active_servers: Arc<Mutex<HashMap<String, serde_json::Value>>>,
|
||||||
pub mcp_successfully_connected: Arc<Mutex<HashMap<String, bool>>>,
|
pub mcp_successfully_connected: Arc<Mutex<HashMap<String, bool>>>,
|
||||||
pub server_handle: Arc<Mutex<Option<ServerHandle>>>,
|
pub server_handle: Arc<Mutex<Option<ServerHandle>>>,
|
||||||
pub llama_server_process: Arc<Mutex<HashMap<String, Child>>>,
|
pub llama_server_process: Arc<Mutex<HashMap<String, LLamaBackendSession>>>,
|
||||||
}
|
}
|
||||||
pub fn generate_app_token() -> String {
|
pub fn generate_app_token() -> String {
|
||||||
rand::thread_rng()
|
rand::thread_rng()
|
||||||
|
|||||||
@ -5,7 +5,8 @@ pub async fn cleanup_processes(state: State<'_, AppState>) {
|
|||||||
let mut map = state.llama_server_process.lock().await;
|
let mut map = state.llama_server_process.lock().await;
|
||||||
let pids: Vec<String> = map.keys().cloned().collect();
|
let pids: Vec<String> = map.keys().cloned().collect();
|
||||||
for pid in pids {
|
for pid in pids {
|
||||||
if let Some(mut child) = map.remove(&pid) {
|
if let Some(session) = map.remove(&pid) {
|
||||||
|
let mut child = session.child;
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
{
|
{
|
||||||
use nix::sys::signal::{kill, Signal};
|
use nix::sys::signal::{kill, Signal};
|
||||||
|
|||||||
@ -12,6 +12,7 @@ use tokio::time::timeout;
|
|||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::core::state::AppState;
|
use crate::core::state::AppState;
|
||||||
|
use crate::core::state::LLamaBackendSession;
|
||||||
|
|
||||||
type HmacSha256 = Hmac<Sha256>;
|
type HmacSha256 = Hmac<Sha256>;
|
||||||
// Error type for server commands
|
// Error type for server commands
|
||||||
@ -41,7 +42,7 @@ impl serde::Serialize for ServerError {
|
|||||||
|
|
||||||
type ServerResult<T> = Result<T, ServerError>;
|
type ServerResult<T> = Result<T, ServerError>;
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct SessionInfo {
|
pub struct SessionInfo {
|
||||||
pub pid: String, // opaque handle for unload/chat
|
pub pid: String, // opaque handle for unload/chat
|
||||||
pub port: String, // llama-server output port
|
pub port: String, // llama-server output port
|
||||||
@ -151,18 +152,23 @@ pub async fn load_llama_model(
|
|||||||
});
|
});
|
||||||
|
|
||||||
log::info!("Server process started with PID: {}", pid);
|
log::info!("Server process started with PID: {}", pid);
|
||||||
|
|
||||||
// Store the child process handle in the state
|
|
||||||
process_map.insert(pid.clone(), child);
|
|
||||||
|
|
||||||
let session_info = SessionInfo {
|
let session_info = SessionInfo {
|
||||||
pid: pid,
|
pid: pid.clone(),
|
||||||
port: port,
|
port: port,
|
||||||
model_id: model_id,
|
model_id: model_id,
|
||||||
model_path: model_path,
|
model_path: model_path,
|
||||||
api_key: api_key,
|
api_key: api_key,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// insert sesinfo to process_map
|
||||||
|
process_map.insert(
|
||||||
|
pid.clone(),
|
||||||
|
LLamaBackendSession {
|
||||||
|
child,
|
||||||
|
info: session_info.clone(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
Ok(session_info)
|
Ok(session_info)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -173,7 +179,8 @@ pub async fn unload_llama_model(
|
|||||||
state: State<'_, AppState>,
|
state: State<'_, AppState>,
|
||||||
) -> ServerResult<UnloadResult> {
|
) -> ServerResult<UnloadResult> {
|
||||||
let mut map = state.llama_server_process.lock().await;
|
let mut map = state.llama_server_process.lock().await;
|
||||||
if let Some(mut child) = map.remove(&pid) {
|
if let Some(session) = map.remove(&pid) {
|
||||||
|
let mut child = session.child;
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
{
|
{
|
||||||
use nix::sys::signal::{kill, Signal};
|
use nix::sys::signal::{kill, Signal};
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user