fix: correct memory suitability checks in llamacpp extension (#6504)
The previous implementation mixed model size and VRAM checks, leading to inaccurate status reporting (e.g., false RED results). - Simplified import statement for `readGgufMetadata`. - Fixed RAM/VRAM comparison by removing unnecessary parentheses. - Replaced ambiguous `modelSize > usableTotalMemory` check with a clear `totalRequired > usableTotalMemory` hard‑limit condition. - Refactored the status logic to explicitly handle the CPU‑GPU hybrid scenario, returning **YELLOW** when the total requirement fits combined memory but exceeds VRAM. - Updated comments for better readability and maintenance.
This commit is contained in:
parent
630463257b
commit
b9f658f2ae
@ -36,9 +36,7 @@ import {
|
|||||||
import { invoke } from '@tauri-apps/api/core'
|
import { invoke } from '@tauri-apps/api/core'
|
||||||
import { getProxyConfig } from './util'
|
import { getProxyConfig } from './util'
|
||||||
import { basename } from '@tauri-apps/api/path'
|
import { basename } from '@tauri-apps/api/path'
|
||||||
import {
|
import { readGgufMetadata } from '@janhq/tauri-plugin-llamacpp-api'
|
||||||
readGgufMetadata,
|
|
||||||
} from '@janhq/tauri-plugin-llamacpp-api'
|
|
||||||
import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
|
import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
|
||||||
|
|
||||||
// Error message constant - matches web-app/src/utils/error.ts
|
// Error message constant - matches web-app/src/utils/error.ts
|
||||||
@ -2162,7 +2160,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
).size
|
).size
|
||||||
|
|
||||||
const ramForModel = modelSize + (offloadMmproj ? 0 : mmprojSize)
|
const ramForModel = modelSize + (offloadMmproj ? 0 : mmprojSize)
|
||||||
if (ramForModel + vramForMinContext > (usableSystemMemory + usableVRAM)) {
|
if (ramForModel + vramForMinContext > usableSystemMemory + usableVRAM) {
|
||||||
logger.error(
|
logger.error(
|
||||||
`Model unsupported. Not enough resources for model and min context.`
|
`Model unsupported. Not enough resources for model and min context.`
|
||||||
)
|
)
|
||||||
@ -2425,9 +2423,9 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
|
memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
|
||||||
const usableVRAM = memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
|
const usableVRAM = memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
|
||||||
|
|
||||||
// Check if model fits in total memory at all
|
// Check if model fits in total memory at all (this is the hard limit)
|
||||||
if (modelSize > usableTotalMemory) {
|
if (totalRequired > usableTotalMemory) {
|
||||||
return 'RED'
|
return 'RED' // Truly impossible to run
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if everything fits in VRAM (ideal case)
|
// Check if everything fits in VRAM (ideal case)
|
||||||
@ -2435,14 +2433,11 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
return 'GREEN'
|
return 'GREEN'
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if model fits in VRAM but total requirement exceeds VRAM
|
// If we get here, it means:
|
||||||
// OR if total requirement fits in total memory but not in VRAM
|
// - Total requirement fits in combined memory
|
||||||
if (modelSize <= usableVRAM || totalRequired <= usableTotalMemory) {
|
// - But doesn't fit entirely in VRAM
|
||||||
|
// This is the CPU-GPU hybrid scenario
|
||||||
return 'YELLOW'
|
return 'YELLOW'
|
||||||
}
|
|
||||||
|
|
||||||
// If we get here, nothing fits properly
|
|
||||||
return 'RED'
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
throw new Error(String(e))
|
throw new Error(String(e))
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user