refactor: rename noOffloadMmproj flag to offloadMmproj and reorder args

The flag `noOffloadMmproj` was misleading – it actually indicates when the mmproj file **is** offloaded to VRAM. Renaming it to `offloadMmproj` clarifies its purpose and aligns the naming with the surrounding code.

Additionally, the `planModelLoad` signature has been reordered to place `mmprojPath` before `requestedCtx`, improving readability and making the optional parameters more intuitive. All related logic, calculations, and log messages have been updated to use the new flag name.
This commit is contained in:
Akarshan 2025-09-11 12:29:53 +05:30
parent bc29046c06
commit abd0cbe599
No known key found for this signature in database
GPG Key ID: D75C9634A870665F

View File

@ -80,7 +80,7 @@ type ModelPlan = {
gpuLayers: number gpuLayers: number
maxContextLength: number maxContextLength: number
noOffloadKVCache: boolean noOffloadKVCache: boolean
noOffloadMmproj?: boolean offloadMmproj?: boolean
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported' mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
} }
@ -2049,8 +2049,8 @@ export default class llamacpp_extension extends AIEngine {
async planModelLoad( async planModelLoad(
path: string, path: string,
requestedCtx?: number, mmprojPath?: string,
mmprojPath?: string requestedCtx?: number
): Promise<ModelPlan> { ): Promise<ModelPlan> {
const modelSize = await this.getModelSize(path) const modelSize = await this.getModelSize(path)
const memoryInfo = await this.getTotalSystemMemory() const memoryInfo = await this.getTotalSystemMemory()
@ -2138,12 +2138,12 @@ export default class llamacpp_extension extends AIEngine {
) )
// --- Priority 1: Allocate mmproj (if exists) --- // --- Priority 1: Allocate mmproj (if exists) ---
let noOffloadMmproj = false let offloadMmproj = false
let remainingVRAM = usableVRAM let remainingVRAM = usableVRAM
if (mmprojSize > 0) { if (mmprojSize > 0) {
if (mmprojSize <= remainingVRAM) { if (mmprojSize <= remainingVRAM) {
noOffloadMmproj = true offloadMmproj = true
remainingVRAM -= mmprojSize remainingVRAM -= mmprojSize
logger.info(`MMProj allocated to VRAM: ${mmprojSize} bytes`) logger.info(`MMProj allocated to VRAM: ${mmprojSize} bytes`)
} else { } else {
@ -2218,7 +2218,7 @@ export default class llamacpp_extension extends AIEngine {
const cpuLayers = totalLayers - gpuLayers const cpuLayers = totalLayers - gpuLayers
const modelCPUSize = cpuLayers * layerSize const modelCPUSize = cpuLayers * layerSize
const mmprojCPUSize = const mmprojCPUSize =
mmprojSize > 0 && !noOffloadMmproj ? mmprojSize : 0 mmprojSize > 0 && !offloadMmproj ? mmprojSize : 0
const systemRAMUsed = modelCPUSize + mmprojCPUSize const systemRAMUsed = modelCPUSize + mmprojCPUSize
const availableSystemRAMForKVCache = Math.max( const availableSystemRAMForKVCache = Math.max(
0, 0,
@ -2277,7 +2277,7 @@ export default class llamacpp_extension extends AIEngine {
const estimatedGPUUsage = const estimatedGPUUsage =
gpuLayers * layerSize + gpuLayers * layerSize +
maxContextLength * kvCachePerToken + maxContextLength * kvCachePerToken +
(noOffloadMmproj ? mmprojSize : 0) (offloadMmproj ? mmprojSize : 0)
if (estimatedGPUUsage > memoryInfo.totalVRAM * 0.9) { if (estimatedGPUUsage > memoryInfo.totalVRAM * 0.9) {
logger.warn( logger.warn(
@ -2293,7 +2293,7 @@ export default class llamacpp_extension extends AIEngine {
const newEstimate = const newEstimate =
gpuLayers * layerSize + gpuLayers * layerSize +
maxContextLength * kvCachePerToken + maxContextLength * kvCachePerToken +
(noOffloadMmproj ? mmprojSize : 0) (offloadMmproj ? mmprojSize : 0)
if (newEstimate <= memoryInfo.totalVRAM * 0.9) break if (newEstimate <= memoryInfo.totalVRAM * 0.9) break
} }
@ -2329,7 +2329,7 @@ export default class llamacpp_extension extends AIEngine {
// Log final plan // Log final plan
const mmprojInfo = mmprojPath const mmprojInfo = mmprojPath
? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(2)}MB, noOffloadMmproj=${noOffloadMmproj}` ? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(2)}MB, offloadMmproj=${offloadMmproj}`
: '' : ''
logger.info( logger.info(
@ -2343,7 +2343,7 @@ export default class llamacpp_extension extends AIEngine {
maxContextLength, maxContextLength,
noOffloadKVCache, noOffloadKVCache,
mode, mode,
noOffloadMmproj, offloadMmproj,
} }
} }