feat: Distinguish and preserve embedding model sessions
This commit introduces a new field, `is_embedding`, to the `SessionInfo` structure to clearly mark sessions running dedicated embedding models. Key changes: - Adds `is_embedding` to the `SessionInfo` interface in `AIEngine.ts` and the Rust backend. - Updates the `loadLlamaModel` command signatures to pass this new flag. - Modifies the llama.cpp extension's **auto-unload logic** to explicitly **filter out** and **not unload** any currently loaded embedding models when a new text generation model is loaded. This is a critical performance fix to prevent the embedding model (e.g., used for RAG) from being repeatedly reloaded. Also includes minor code style cleanup/reformatting in `jan-provider-web/provider.ts` for improved readability.
This commit is contained in:
parent
ff93dc3c5c
commit
7762cea10a
@ -182,6 +182,7 @@ export interface SessionInfo {
|
|||||||
port: number // llama-server output port (corrected from portid)
|
port: number // llama-server output port (corrected from portid)
|
||||||
model_id: string //name of the model
|
model_id: string //name of the model
|
||||||
model_path: string // path of the loaded model
|
model_path: string // path of the loaded model
|
||||||
|
is_embedding: boolean
|
||||||
api_key: string
|
api_key: string
|
||||||
mmproj_path?: string
|
mmproj_path?: string
|
||||||
}
|
}
|
||||||
|
|||||||
@ -45,7 +45,7 @@ export default class JanProviderWeb extends AIEngine {
|
|||||||
// Verify Jan models capabilities in localStorage
|
// Verify Jan models capabilities in localStorage
|
||||||
private validateJanModelsLocalStorage() {
|
private validateJanModelsLocalStorage() {
|
||||||
try {
|
try {
|
||||||
console.log("Validating Jan models in localStorage...")
|
console.log('Validating Jan models in localStorage...')
|
||||||
const storageKey = 'model-provider'
|
const storageKey = 'model-provider'
|
||||||
const data = localStorage.getItem(storageKey)
|
const data = localStorage.getItem(storageKey)
|
||||||
if (!data) return
|
if (!data) return
|
||||||
@ -60,9 +60,14 @@ export default class JanProviderWeb extends AIEngine {
|
|||||||
if (provider.provider === 'jan' && provider.models) {
|
if (provider.provider === 'jan' && provider.models) {
|
||||||
for (const model of provider.models) {
|
for (const model of provider.models) {
|
||||||
console.log(`Checking Jan model: ${model.id}`, model.capabilities)
|
console.log(`Checking Jan model: ${model.id}`, model.capabilities)
|
||||||
if (JSON.stringify(model.capabilities) !== JSON.stringify(JAN_MODEL_CAPABILITIES)) {
|
if (
|
||||||
|
JSON.stringify(model.capabilities) !==
|
||||||
|
JSON.stringify(JAN_MODEL_CAPABILITIES)
|
||||||
|
) {
|
||||||
hasInvalidModel = true
|
hasInvalidModel = true
|
||||||
console.log(`Found invalid Jan model: ${model.id}, clearing localStorage`)
|
console.log(
|
||||||
|
`Found invalid Jan model: ${model.id}, clearing localStorage`
|
||||||
|
)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -79,9 +84,17 @@ export default class JanProviderWeb extends AIEngine {
|
|||||||
// If still present, try setting to empty state
|
// If still present, try setting to empty state
|
||||||
if (afterRemoval) {
|
if (afterRemoval) {
|
||||||
// Try alternative clearing method
|
// Try alternative clearing method
|
||||||
localStorage.setItem(storageKey, JSON.stringify({ state: { providers: [] }, version: parsed.version || 3 }))
|
localStorage.setItem(
|
||||||
|
storageKey,
|
||||||
|
JSON.stringify({
|
||||||
|
state: { providers: [] },
|
||||||
|
version: parsed.version || 3,
|
||||||
|
})
|
||||||
|
)
|
||||||
}
|
}
|
||||||
console.log('Cleared model-provider from localStorage due to invalid Jan capabilities')
|
console.log(
|
||||||
|
'Cleared model-provider from localStorage due to invalid Jan capabilities'
|
||||||
|
)
|
||||||
// Force a page reload to ensure clean state
|
// Force a page reload to ensure clean state
|
||||||
window.location.reload()
|
window.location.reload()
|
||||||
}
|
}
|
||||||
@ -159,6 +172,7 @@ export default class JanProviderWeb extends AIEngine {
|
|||||||
port: 443, // HTTPS port
|
port: 443, // HTTPS port
|
||||||
model_id: modelId,
|
model_id: modelId,
|
||||||
model_path: `remote:${modelId}`, // Indicate this is a remote model
|
model_path: `remote:${modelId}`, // Indicate this is a remote model
|
||||||
|
is_embedding: false, // assume false here, TODO: might need further implementation
|
||||||
api_key: '', // API key handled by auth service
|
api_key: '', // API key handled by auth service
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -193,8 +207,12 @@ export default class JanProviderWeb extends AIEngine {
|
|||||||
console.error(`Failed to unload Jan session ${sessionId}:`, error)
|
console.error(`Failed to unload Jan session ${sessionId}:`, error)
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
error: error instanceof ApiError ? error.message :
|
error:
|
||||||
error instanceof Error ? error.message : 'Unknown error',
|
error instanceof ApiError
|
||||||
|
? error.message
|
||||||
|
: error instanceof Error
|
||||||
|
? error.message
|
||||||
|
: 'Unknown error',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -333,14 +333,12 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
)
|
)
|
||||||
// Clear the invalid stored preference
|
// Clear the invalid stored preference
|
||||||
this.clearStoredBackendType()
|
this.clearStoredBackendType()
|
||||||
bestAvailableBackendString = await this.determineBestBackend(
|
bestAvailableBackendString =
|
||||||
version_backends
|
await this.determineBestBackend(version_backends)
|
||||||
)
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
bestAvailableBackendString = await this.determineBestBackend(
|
bestAvailableBackendString =
|
||||||
version_backends
|
await this.determineBestBackend(version_backends)
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let settings = structuredClone(SETTINGS)
|
let settings = structuredClone(SETTINGS)
|
||||||
@ -1530,6 +1528,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
|
|
||||||
if (
|
if (
|
||||||
this.autoUnload &&
|
this.autoUnload &&
|
||||||
|
!isEmbedding &&
|
||||||
(loadedModels.length > 0 || otherLoadingPromises.length > 0)
|
(loadedModels.length > 0 || otherLoadingPromises.length > 0)
|
||||||
) {
|
) {
|
||||||
// Wait for OTHER loading models to finish, then unload everything
|
// Wait for OTHER loading models to finish, then unload everything
|
||||||
@ -1537,10 +1536,33 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
await Promise.all(otherLoadingPromises)
|
await Promise.all(otherLoadingPromises)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now unload all loaded models
|
// Now unload all loaded Text models excluding embedding models
|
||||||
const allLoadedModels = await this.getLoadedModels()
|
const allLoadedModels = await this.getLoadedModels()
|
||||||
if (allLoadedModels.length > 0) {
|
if (allLoadedModels.length > 0) {
|
||||||
await Promise.all(allLoadedModels.map((model) => this.unload(model)))
|
const sessionInfos: (SessionInfo | null)[] = await Promise.all(
|
||||||
|
allLoadedModels.map(async (modelId) => {
|
||||||
|
try {
|
||||||
|
return await this.findSessionByModel(modelId)
|
||||||
|
} catch (e) {
|
||||||
|
logger.warn(`Unable to find session for model "${modelId}": ${e}`)
|
||||||
|
return null // treat as “not‑eligible for unload”
|
||||||
|
}
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(JSON.stringify(sessionInfos))
|
||||||
|
|
||||||
|
const nonEmbeddingModels: string[] = sessionInfos
|
||||||
|
.filter(
|
||||||
|
(s): s is SessionInfo => s !== null && s.is_embedding === false
|
||||||
|
)
|
||||||
|
.map((s) => s.model_id)
|
||||||
|
|
||||||
|
if (nonEmbeddingModels.length > 0) {
|
||||||
|
await Promise.all(
|
||||||
|
nonEmbeddingModels.map((modelId) => this.unload(modelId))
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const args: string[] = []
|
const args: string[] = []
|
||||||
@ -1677,6 +1699,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
libraryPath,
|
libraryPath,
|
||||||
args,
|
args,
|
||||||
envs,
|
envs,
|
||||||
|
isEmbedding,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return sInfo
|
return sInfo
|
||||||
@ -2024,7 +2047,11 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
let sInfo = await this.findSessionByModel('sentence-transformer-mini')
|
let sInfo = await this.findSessionByModel('sentence-transformer-mini')
|
||||||
if (!sInfo) {
|
if (!sInfo) {
|
||||||
const downloadedModelList = await this.list()
|
const downloadedModelList = await this.list()
|
||||||
if (!downloadedModelList.some((model) => model.id === 'sentence-transformer-mini')) {
|
if (
|
||||||
|
!downloadedModelList.some(
|
||||||
|
(model) => model.id === 'sentence-transformer-mini'
|
||||||
|
)
|
||||||
|
) {
|
||||||
await this.import('sentence-transformer-mini', {
|
await this.import('sentence-transformer-mini', {
|
||||||
modelPath:
|
modelPath:
|
||||||
'https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/resolve/main/all-MiniLM-L6-v2-ggml-model-f16.gguf?download=true',
|
'https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/resolve/main/all-MiniLM-L6-v2-ggml-model-f16.gguf?download=true',
|
||||||
|
|||||||
@ -30,12 +30,14 @@ export async function cleanupLlamaProcesses(): Promise<void> {
|
|||||||
export async function loadLlamaModel(
|
export async function loadLlamaModel(
|
||||||
backendPath: string,
|
backendPath: string,
|
||||||
libraryPath?: string,
|
libraryPath?: string,
|
||||||
args: string[] = []
|
args: string[] = [],
|
||||||
|
isEmbedding: boolean = false
|
||||||
): Promise<SessionInfo> {
|
): Promise<SessionInfo> {
|
||||||
return await invoke('plugin:llamacpp|load_llama_model', {
|
return await invoke('plugin:llamacpp|load_llama_model', {
|
||||||
backendPath,
|
backendPath,
|
||||||
libraryPath,
|
libraryPath,
|
||||||
args,
|
args,
|
||||||
|
isEmbedding,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -44,6 +44,7 @@ pub async fn load_llama_model<R: Runtime>(
|
|||||||
library_path: Option<&str>,
|
library_path: Option<&str>,
|
||||||
mut args: Vec<String>,
|
mut args: Vec<String>,
|
||||||
envs: HashMap<String, String>,
|
envs: HashMap<String, String>,
|
||||||
|
is_embedding: bool,
|
||||||
) -> ServerResult<SessionInfo> {
|
) -> ServerResult<SessionInfo> {
|
||||||
let state: State<LlamacppState> = app_handle.state();
|
let state: State<LlamacppState> = app_handle.state();
|
||||||
let mut process_map = state.llama_server_process.lock().await;
|
let mut process_map = state.llama_server_process.lock().await;
|
||||||
@ -223,6 +224,7 @@ pub async fn load_llama_model<R: Runtime>(
|
|||||||
port: port,
|
port: port,
|
||||||
model_id: model_id,
|
model_id: model_id,
|
||||||
model_path: model_path_pb.display().to_string(),
|
model_path: model_path_pb.display().to_string(),
|
||||||
|
is_embedding: is_embedding,
|
||||||
api_key: api_key,
|
api_key: api_key,
|
||||||
mmproj_path: mmproj_path_string,
|
mmproj_path: mmproj_path_string,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -10,6 +10,7 @@ pub struct SessionInfo {
|
|||||||
pub port: i32, // llama-server output port
|
pub port: i32, // llama-server output port
|
||||||
pub model_id: String,
|
pub model_id: String,
|
||||||
pub model_path: String, // path of the loaded model
|
pub model_path: String, // path of the loaded model
|
||||||
|
pub is_embedding: bool,
|
||||||
pub api_key: String,
|
pub api_key: String,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub mmproj_path: Option<String>,
|
pub mmproj_path: Option<String>,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user