Merge pull request #6659 from menloresearch/fix/6626

fix: Improve KV cache estimation robustness
2025-09-30 13:42:28 +07:00 · 2025-09-30 13:42:28 +07:00 · 04fcd788a3
commit 04fcd788a3
parent d315522c5a 34b254e2d8
1 changed files with 31 additions and 2 deletions
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/utils.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/utils.rs
@ -62,6 +62,7 @@ pub async fn estimate_kv_cache_internal(
    ctx_size: Option<u64>,
 ) -> Result<KVCacheEstimate, KVCacheError> {
    log::info!("Received ctx_size parameter: {:?}", ctx_size);
+    log::info!("Received model metadata:\n{:?}", &meta);
    let arch = meta
        .get("general.architecture")
        .ok_or(KVCacheError::ArchitectureNotFound)?;
@ -94,15 +95,43 @@ pub async fn estimate_kv_cache_internal(
    let key_len_key = format!("{}.attention.key_length", arch);
    let val_len_key = format!("{}.attention.value_length", arch);

-    let key_len = meta
+    let mut key_len = meta
        .get(&key_len_key)
        .and_then(|s| s.parse::<u64>().ok())
        .unwrap_or(0);
-    let val_len = meta
+    let mut val_len = meta
        .get(&val_len_key)
        .and_then(|s| s.parse::<u64>().ok())
        .unwrap_or(0);

+    // Fallback: calculate from embedding_length if key/val lengths not found
+    if key_len == 0 || val_len == 0 {
+        let emb_len_key = format!("{}.embedding_length", arch);
+        let emb_len = meta
+            .get(&emb_len_key)
+            .and_then(|s| s.parse::<u64>().ok())
+            .unwrap_or(0);
+
+        if emb_len > 0 && n_head > 0 {
+            // For most transformers: head_dim = embedding_length / total_heads
+            let total_heads = meta
+                .get(&n_head_key)
+                .and_then(|s| s.parse::<u64>().ok())
+                .unwrap_or(n_head);
+
+            let head_dim = emb_len / total_heads;
+            key_len = head_dim;
+            val_len = head_dim;
+
+            log::info!(
+                "Calculated key_len and val_len from embedding_length: {} / {} heads = {} per head",
+                emb_len,
+                total_heads,
+                head_dim
+            );
+        }
+    }
+
    if key_len == 0 || val_len == 0 {
        return Err(KVCacheError::EmbeddingLengthInvalid);
    }