refactor: rename noOffloadMmproj flag to offloadMmproj and reorder args

The flag `noOffloadMmproj` was misleading – it actually indicates when the mmproj file **is** offloaded to VRAM. Renaming it to `offloadMmproj` clarifies its purpose and aligns the naming with the surrounding code.

Additionally, the `planModelLoad` signature has been reordered to place `mmprojPath` before `requestedCtx`, improving readability and making the optional parameters more intuitive. All related logic, calculations, and log messages have been updated to use the new flag name.
This commit is contained in:
Akarshan 2025-09-11 12:29:53 +05:30
parent bc29046c06
commit abd0cbe599
No known key found for this signature in database
GPG Key ID: D75C9634A870665F

View File

@ -80,7 +80,7 @@ type ModelPlan = {
gpuLayers: number
maxContextLength: number
noOffloadKVCache: boolean
noOffloadMmproj?: boolean
offloadMmproj?: boolean
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
}
@ -2049,8 +2049,8 @@ export default class llamacpp_extension extends AIEngine {
async planModelLoad(
path: string,
requestedCtx?: number,
mmprojPath?: string
mmprojPath?: string,
requestedCtx?: number
): Promise<ModelPlan> {
const modelSize = await this.getModelSize(path)
const memoryInfo = await this.getTotalSystemMemory()
@ -2138,12 +2138,12 @@ export default class llamacpp_extension extends AIEngine {
)
// --- Priority 1: Allocate mmproj (if exists) ---
let noOffloadMmproj = false
let offloadMmproj = false
let remainingVRAM = usableVRAM
if (mmprojSize > 0) {
if (mmprojSize <= remainingVRAM) {
noOffloadMmproj = true
offloadMmproj = true
remainingVRAM -= mmprojSize
logger.info(`MMProj allocated to VRAM: ${mmprojSize} bytes`)
} else {
@ -2218,7 +2218,7 @@ export default class llamacpp_extension extends AIEngine {
const cpuLayers = totalLayers - gpuLayers
const modelCPUSize = cpuLayers * layerSize
const mmprojCPUSize =
mmprojSize > 0 && !noOffloadMmproj ? mmprojSize : 0
mmprojSize > 0 && !offloadMmproj ? mmprojSize : 0
const systemRAMUsed = modelCPUSize + mmprojCPUSize
const availableSystemRAMForKVCache = Math.max(
0,
@ -2277,7 +2277,7 @@ export default class llamacpp_extension extends AIEngine {
const estimatedGPUUsage =
gpuLayers * layerSize +
maxContextLength * kvCachePerToken +
(noOffloadMmproj ? mmprojSize : 0)
(offloadMmproj ? mmprojSize : 0)
if (estimatedGPUUsage > memoryInfo.totalVRAM * 0.9) {
logger.warn(
@ -2293,7 +2293,7 @@ export default class llamacpp_extension extends AIEngine {
const newEstimate =
gpuLayers * layerSize +
maxContextLength * kvCachePerToken +
(noOffloadMmproj ? mmprojSize : 0)
(offloadMmproj ? mmprojSize : 0)
if (newEstimate <= memoryInfo.totalVRAM * 0.9) break
}
@ -2329,7 +2329,7 @@ export default class llamacpp_extension extends AIEngine {
// Log final plan
const mmprojInfo = mmprojPath
? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(2)}MB, noOffloadMmproj=${noOffloadMmproj}`
? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(2)}MB, offloadMmproj=${offloadMmproj}`
: ''
logger.info(
@ -2343,7 +2343,7 @@ export default class llamacpp_extension extends AIEngine {
maxContextLength,
noOffloadKVCache,
mode,
noOffloadMmproj,
offloadMmproj,
}
}