Merge pull request #6333 from menloresearch/release/v0.6.9
This commit is contained in:
commit
88decc95aa
@ -1954,22 +1954,27 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
logger.info(
|
logger.info(
|
||||||
`isModelSupported: Total memory requirement: ${totalRequired} for ${path}`
|
`isModelSupported: Total memory requirement: ${totalRequired} for ${path}`
|
||||||
)
|
)
|
||||||
let availableMemBytes: number
|
let totalMemBytes: number
|
||||||
const devices = await this.getDevices()
|
const devices = await this.getDevices()
|
||||||
if (devices.length > 0) {
|
if (devices.length > 0) {
|
||||||
// Sum free memory across all GPUs
|
// Sum total memory across all GPUs
|
||||||
availableMemBytes = devices
|
totalMemBytes = devices
|
||||||
.map((d) => d.free * 1024 * 1024)
|
.map((d) => d.mem * 1024 * 1024)
|
||||||
.reduce((a, b) => a + b, 0)
|
.reduce((a, b) => a + b, 0)
|
||||||
} else {
|
} else {
|
||||||
// CPU fallback
|
// CPU fallback
|
||||||
const sys = await getSystemUsage()
|
const sys = await getSystemUsage()
|
||||||
availableMemBytes = (sys.total_memory - sys.used_memory) * 1024 * 1024
|
totalMemBytes = sys.total_memory * 1024 * 1024
|
||||||
}
|
}
|
||||||
// check model size wrt system memory
|
|
||||||
if (modelSize > availableMemBytes) {
|
// Use 80% of total memory as the usable limit
|
||||||
|
const USABLE_MEMORY_PERCENTAGE = 0.8
|
||||||
|
const usableMemBytes = totalMemBytes * USABLE_MEMORY_PERCENTAGE
|
||||||
|
|
||||||
|
// check model size wrt 80% of system memory
|
||||||
|
if (modelSize > usableMemBytes) {
|
||||||
return 'RED'
|
return 'RED'
|
||||||
} else if (modelSize + kvCacheSize > availableMemBytes) {
|
} else if (modelSize + kvCacheSize > usableMemBytes) {
|
||||||
return 'YELLOW'
|
return 'YELLOW'
|
||||||
} else {
|
} else {
|
||||||
return 'GREEN'
|
return 'GREEN'
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user