fix: Use 80% total memory for compatibility check (#6321)

* fix: Use 80% total memory for compatibility check

* refactor: extract usable memory percentage to named constant

Extract the hardcoded 0.8 multiplier into a named constant
USABLE_MEMORY_PERCENTAGE for better readability and maintainability.
This commit is contained in:
Akarshan Biswas 2025-08-28 14:50:00 +05:30 committed by GitHub
parent bf15036ef8
commit 5fae954ac5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1954,22 +1954,27 @@ export default class llamacpp_extension extends AIEngine {
logger.info( logger.info(
`isModelSupported: Total memory requirement: ${totalRequired} for ${path}` `isModelSupported: Total memory requirement: ${totalRequired} for ${path}`
) )
let availableMemBytes: number let totalMemBytes: number
const devices = await this.getDevices() const devices = await this.getDevices()
if (devices.length > 0) { if (devices.length > 0) {
// Sum free memory across all GPUs // Sum total memory across all GPUs
availableMemBytes = devices totalMemBytes = devices
.map((d) => d.free * 1024 * 1024) .map((d) => d.mem * 1024 * 1024)
.reduce((a, b) => a + b, 0) .reduce((a, b) => a + b, 0)
} else { } else {
// CPU fallback // CPU fallback
const sys = await getSystemUsage() const sys = await getSystemUsage()
availableMemBytes = (sys.total_memory - sys.used_memory) * 1024 * 1024 totalMemBytes = sys.total_memory * 1024 * 1024
} }
// check model size wrt system memory
if (modelSize > availableMemBytes) { // Use 80% of total memory as the usable limit
const USABLE_MEMORY_PERCENTAGE = 0.8
const usableMemBytes = totalMemBytes * USABLE_MEMORY_PERCENTAGE
// check model size wrt 80% of system memory
if (modelSize > usableMemBytes) {
return 'RED' return 'RED'
} else if (modelSize + kvCacheSize > availableMemBytes) { } else if (modelSize + kvCacheSize > usableMemBytes) {
return 'YELLOW' return 'YELLOW'
} else { } else {
return 'GREEN' return 'GREEN'