This commit introduces a new field, `is_embedding`, to the `SessionInfo` structure to clearly mark sessions running dedicated embedding models. Key changes: - Adds `is_embedding` to the `SessionInfo` interface in `AIEngine.ts` and the Rust backend. - Updates the `loadLlamaModel` command signatures to pass this new flag. - Modifies the llama.cpp extension's **auto-unload logic** to explicitly **filter out** and **not unload** any currently loaded embedding models when a new text generation model is loaded. This is a critical performance fix to prevent the embedding model (e.g., used for RAG) from being repeatedly reloaded. Also includes minor code style cleanup/reformatting in `jan-provider-web/provider.ts` for improved readability.
42 lines
973 B
Rust
42 lines
973 B
Rust
use serde::{Deserialize, Serialize};
|
|
use std::collections::HashMap;
|
|
use std::sync::Arc;
|
|
use tokio::process::Child;
|
|
use tokio::sync::Mutex;
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct SessionInfo {
|
|
pub pid: i32, // opaque handle for unload/chat
|
|
pub port: i32, // llama-server output port
|
|
pub model_id: String,
|
|
pub model_path: String, // path of the loaded model
|
|
pub is_embedding: bool,
|
|
pub api_key: String,
|
|
#[serde(default)]
|
|
pub mmproj_path: Option<String>,
|
|
}
|
|
|
|
pub struct LLamaBackendSession {
|
|
pub child: Child,
|
|
pub info: SessionInfo,
|
|
}
|
|
|
|
/// LlamaCpp plugin state
|
|
pub struct LlamacppState {
|
|
pub llama_server_process: Arc<Mutex<HashMap<i32, LLamaBackendSession>>>,
|
|
}
|
|
|
|
impl Default for LlamacppState {
|
|
fn default() -> Self {
|
|
Self {
|
|
llama_server_process: Arc::new(Mutex::new(HashMap::new())),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl LlamacppState {
|
|
pub fn new() -> Self {
|
|
Self::default()
|
|
}
|
|
}
|