From b9f658f2aec2a0281e1dbc3bfc368aeea8d7df45 Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Wed, 17 Sep 2025 20:09:33 +0530 Subject: [PATCH] fix: correct memory suitability checks in llamacpp extension (#6504) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous implementation mixed model size and VRAM checks, leading to inaccurate status reporting (e.g., false RED results). - Simplified import statement for `readGgufMetadata`. - Fixed RAM/VRAM comparison by removing unnecessary parentheses. - Replaced ambiguous `modelSize > usableTotalMemory` check with a clear `totalRequired > usableTotalMemory` hard‑limit condition. - Refactored the status logic to explicitly handle the CPU‑GPU hybrid scenario, returning **YELLOW** when the total requirement fits combined memory but exceeds VRAM. - Updated comments for better readability and maintenance. --- extensions/llamacpp-extension/src/index.ts | 25 +++++++++------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 2d5d9272b..b2ca7b9c7 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -36,9 +36,7 @@ import { import { invoke } from '@tauri-apps/api/core' import { getProxyConfig } from './util' import { basename } from '@tauri-apps/api/path' -import { - readGgufMetadata, -} from '@janhq/tauri-plugin-llamacpp-api' +import { readGgufMetadata } from '@janhq/tauri-plugin-llamacpp-api' import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api' // Error message constant - matches web-app/src/utils/error.ts @@ -2162,7 +2160,7 @@ export default class llamacpp_extension extends AIEngine { ).size const ramForModel = modelSize + (offloadMmproj ? 0 : mmprojSize) - if (ramForModel + vramForMinContext > (usableSystemMemory + usableVRAM)) { + if (ramForModel + vramForMinContext > usableSystemMemory + usableVRAM) { logger.error( `Model unsupported. Not enough resources for model and min context.` ) @@ -2425,9 +2423,9 @@ export default class llamacpp_extension extends AIEngine { memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE const usableVRAM = memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE - // Check if model fits in total memory at all - if (modelSize > usableTotalMemory) { - return 'RED' + // Check if model fits in total memory at all (this is the hard limit) + if (totalRequired > usableTotalMemory) { + return 'RED' // Truly impossible to run } // Check if everything fits in VRAM (ideal case) @@ -2435,14 +2433,11 @@ export default class llamacpp_extension extends AIEngine { return 'GREEN' } - // Check if model fits in VRAM but total requirement exceeds VRAM - // OR if total requirement fits in total memory but not in VRAM - if (modelSize <= usableVRAM || totalRequired <= usableTotalMemory) { - return 'YELLOW' - } - - // If we get here, nothing fits properly - return 'RED' + // If we get here, it means: + // - Total requirement fits in combined memory + // - But doesn't fit entirely in VRAM + // This is the CPU-GPU hybrid scenario + return 'YELLOW' } catch (e) { throw new Error(String(e)) }