From b9f658f2aec2a0281e1dbc3bfc368aeea8d7df45 Mon Sep 17 00:00:00 2001
From: Akarshan Biswas <akarshan@menlo.ai>
Date: Wed, 17 Sep 2025 20:09:33 +0530
Subject: [PATCH] fix: correct memory suitability checks in llamacpp extension
 (#6504)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous implementation mixed model size and VRAM checks, leading to inaccurate status reporting (e.g., false RED results).
- Simplified import statement for `readGgufMetadata`.
- Fixed RAM/VRAM comparison by removing unnecessary parentheses.
- Replaced ambiguous `modelSize > usableTotalMemory` check with a clear `totalRequired > usableTotalMemory` hard‑limit condition.
- Refactored the status logic to explicitly handle the CPU‑GPU hybrid scenario, returning **YELLOW** when the total requirement fits combined memory but exceeds VRAM.
- Updated comments for better readability and maintenance.
---
 extensions/llamacpp-extension/src/index.ts | 25 +++++++++-------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
index 2d5d9272b..b2ca7b9c7 100644
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -36,9 +36,7 @@ import {
 import { invoke } from '@tauri-apps/api/core'
 import { getProxyConfig } from './util'
 import { basename } from '@tauri-apps/api/path'
-import {
-  readGgufMetadata,
-} from '@janhq/tauri-plugin-llamacpp-api'
+import { readGgufMetadata } from '@janhq/tauri-plugin-llamacpp-api'
 import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
 
 // Error message constant - matches web-app/src/utils/error.ts
@@ -2162,7 +2160,7 @@ export default class llamacpp_extension extends AIEngine {
     ).size
 
     const ramForModel = modelSize + (offloadMmproj ? 0 : mmprojSize)
-    if (ramForModel + vramForMinContext > (usableSystemMemory + usableVRAM)) {
+    if (ramForModel + vramForMinContext > usableSystemMemory + usableVRAM) {
       logger.error(
         `Model unsupported. Not enough resources for model and min context.`
       )
@@ -2425,9 +2423,9 @@ export default class llamacpp_extension extends AIEngine {
         memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
       const usableVRAM = memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
 
-      // Check if model fits in total memory at all
-      if (modelSize > usableTotalMemory) {
-        return 'RED'
+      // Check if model fits in total memory at all (this is the hard limit)
+      if (totalRequired > usableTotalMemory) {
+        return 'RED' // Truly impossible to run
       }
 
       // Check if everything fits in VRAM (ideal case)
@@ -2435,14 +2433,11 @@ export default class llamacpp_extension extends AIEngine {
         return 'GREEN'
       }
 
-      // Check if model fits in VRAM but total requirement exceeds VRAM
-      // OR if total requirement fits in total memory but not in VRAM
-      if (modelSize <= usableVRAM || totalRequired <= usableTotalMemory) {
-        return 'YELLOW'
-      }
-
-      // If we get here, nothing fits properly
-      return 'RED'
+      // If we get here, it means:
+      // - Total requirement fits in combined memory
+      // - But doesn't fit entirely in VRAM
+      // This is the CPU-GPU hybrid scenario
+      return 'YELLOW'
     } catch (e) {
       throw new Error(String(e))
     }