fix: correct memory suitability checks in llamacpp extension (#6504)

The previous implementation mixed model size and VRAM checks, leading to inaccurate status reporting (e.g., false RED results).
- Simplified import statement for `readGgufMetadata`.
- Fixed RAM/VRAM comparison by removing unnecessary parentheses.
- Replaced ambiguous `modelSize > usableTotalMemory` check with a clear `totalRequired > usableTotalMemory` hard‑limit condition.
- Refactored the status logic to explicitly handle the CPU‑GPU hybrid scenario, returning **YELLOW** when the total requirement fits combined memory but exceeds VRAM.
- Updated comments for better readability and maintenance.
This commit is contained in:
Akarshan Biswas 2025-09-17 20:09:33 +05:30 committed by GitHub
parent 630463257b
commit b9f658f2ae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -36,9 +36,7 @@ import {
import { invoke } from '@tauri-apps/api/core' import { invoke } from '@tauri-apps/api/core'
import { getProxyConfig } from './util' import { getProxyConfig } from './util'
import { basename } from '@tauri-apps/api/path' import { basename } from '@tauri-apps/api/path'
import { import { readGgufMetadata } from '@janhq/tauri-plugin-llamacpp-api'
readGgufMetadata,
} from '@janhq/tauri-plugin-llamacpp-api'
import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api' import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
// Error message constant - matches web-app/src/utils/error.ts // Error message constant - matches web-app/src/utils/error.ts
@ -2162,7 +2160,7 @@ export default class llamacpp_extension extends AIEngine {
).size ).size
const ramForModel = modelSize + (offloadMmproj ? 0 : mmprojSize) const ramForModel = modelSize + (offloadMmproj ? 0 : mmprojSize)
if (ramForModel + vramForMinContext > (usableSystemMemory + usableVRAM)) { if (ramForModel + vramForMinContext > usableSystemMemory + usableVRAM) {
logger.error( logger.error(
`Model unsupported. Not enough resources for model and min context.` `Model unsupported. Not enough resources for model and min context.`
) )
@ -2425,9 +2423,9 @@ export default class llamacpp_extension extends AIEngine {
memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
const usableVRAM = memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE const usableVRAM = memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
// Check if model fits in total memory at all // Check if model fits in total memory at all (this is the hard limit)
if (modelSize > usableTotalMemory) { if (totalRequired > usableTotalMemory) {
return 'RED' return 'RED' // Truly impossible to run
} }
// Check if everything fits in VRAM (ideal case) // Check if everything fits in VRAM (ideal case)
@ -2435,14 +2433,11 @@ export default class llamacpp_extension extends AIEngine {
return 'GREEN' return 'GREEN'
} }
// Check if model fits in VRAM but total requirement exceeds VRAM // If we get here, it means:
// OR if total requirement fits in total memory but not in VRAM // - Total requirement fits in combined memory
if (modelSize <= usableVRAM || totalRequired <= usableTotalMemory) { // - But doesn't fit entirely in VRAM
return 'YELLOW' // This is the CPU-GPU hybrid scenario
} return 'YELLOW'
// If we get here, nothing fits properly
return 'RED'
} catch (e) { } catch (e) {
throw new Error(String(e)) throw new Error(String(e))
} }