refactor: rename noOffloadMmproj flag to offloadMmproj and reorder args
The flag `noOffloadMmproj` was misleading – it actually indicates when the mmproj file **is** offloaded to VRAM. Renaming it to `offloadMmproj` clarifies its purpose and aligns the naming with the surrounding code. Additionally, the `planModelLoad` signature has been reordered to place `mmprojPath` before `requestedCtx`, improving readability and making the optional parameters more intuitive. All related logic, calculations, and log messages have been updated to use the new flag name.
This commit is contained in:
parent
bc29046c06
commit
abd0cbe599
@ -80,7 +80,7 @@ type ModelPlan = {
|
|||||||
gpuLayers: number
|
gpuLayers: number
|
||||||
maxContextLength: number
|
maxContextLength: number
|
||||||
noOffloadKVCache: boolean
|
noOffloadKVCache: boolean
|
||||||
noOffloadMmproj?: boolean
|
offloadMmproj?: boolean
|
||||||
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
|
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2049,8 +2049,8 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
|
|
||||||
async planModelLoad(
|
async planModelLoad(
|
||||||
path: string,
|
path: string,
|
||||||
requestedCtx?: number,
|
mmprojPath?: string,
|
||||||
mmprojPath?: string
|
requestedCtx?: number
|
||||||
): Promise<ModelPlan> {
|
): Promise<ModelPlan> {
|
||||||
const modelSize = await this.getModelSize(path)
|
const modelSize = await this.getModelSize(path)
|
||||||
const memoryInfo = await this.getTotalSystemMemory()
|
const memoryInfo = await this.getTotalSystemMemory()
|
||||||
@ -2138,12 +2138,12 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
)
|
)
|
||||||
|
|
||||||
// --- Priority 1: Allocate mmproj (if exists) ---
|
// --- Priority 1: Allocate mmproj (if exists) ---
|
||||||
let noOffloadMmproj = false
|
let offloadMmproj = false
|
||||||
let remainingVRAM = usableVRAM
|
let remainingVRAM = usableVRAM
|
||||||
|
|
||||||
if (mmprojSize > 0) {
|
if (mmprojSize > 0) {
|
||||||
if (mmprojSize <= remainingVRAM) {
|
if (mmprojSize <= remainingVRAM) {
|
||||||
noOffloadMmproj = true
|
offloadMmproj = true
|
||||||
remainingVRAM -= mmprojSize
|
remainingVRAM -= mmprojSize
|
||||||
logger.info(`MMProj allocated to VRAM: ${mmprojSize} bytes`)
|
logger.info(`MMProj allocated to VRAM: ${mmprojSize} bytes`)
|
||||||
} else {
|
} else {
|
||||||
@ -2218,7 +2218,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
const cpuLayers = totalLayers - gpuLayers
|
const cpuLayers = totalLayers - gpuLayers
|
||||||
const modelCPUSize = cpuLayers * layerSize
|
const modelCPUSize = cpuLayers * layerSize
|
||||||
const mmprojCPUSize =
|
const mmprojCPUSize =
|
||||||
mmprojSize > 0 && !noOffloadMmproj ? mmprojSize : 0
|
mmprojSize > 0 && !offloadMmproj ? mmprojSize : 0
|
||||||
const systemRAMUsed = modelCPUSize + mmprojCPUSize
|
const systemRAMUsed = modelCPUSize + mmprojCPUSize
|
||||||
const availableSystemRAMForKVCache = Math.max(
|
const availableSystemRAMForKVCache = Math.max(
|
||||||
0,
|
0,
|
||||||
@ -2277,7 +2277,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
const estimatedGPUUsage =
|
const estimatedGPUUsage =
|
||||||
gpuLayers * layerSize +
|
gpuLayers * layerSize +
|
||||||
maxContextLength * kvCachePerToken +
|
maxContextLength * kvCachePerToken +
|
||||||
(noOffloadMmproj ? mmprojSize : 0)
|
(offloadMmproj ? mmprojSize : 0)
|
||||||
|
|
||||||
if (estimatedGPUUsage > memoryInfo.totalVRAM * 0.9) {
|
if (estimatedGPUUsage > memoryInfo.totalVRAM * 0.9) {
|
||||||
logger.warn(
|
logger.warn(
|
||||||
@ -2293,7 +2293,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
const newEstimate =
|
const newEstimate =
|
||||||
gpuLayers * layerSize +
|
gpuLayers * layerSize +
|
||||||
maxContextLength * kvCachePerToken +
|
maxContextLength * kvCachePerToken +
|
||||||
(noOffloadMmproj ? mmprojSize : 0)
|
(offloadMmproj ? mmprojSize : 0)
|
||||||
if (newEstimate <= memoryInfo.totalVRAM * 0.9) break
|
if (newEstimate <= memoryInfo.totalVRAM * 0.9) break
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2329,7 +2329,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
|
|
||||||
// Log final plan
|
// Log final plan
|
||||||
const mmprojInfo = mmprojPath
|
const mmprojInfo = mmprojPath
|
||||||
? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(2)}MB, noOffloadMmproj=${noOffloadMmproj}`
|
? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(2)}MB, offloadMmproj=${offloadMmproj}`
|
||||||
: ''
|
: ''
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
@ -2343,7 +2343,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
maxContextLength,
|
maxContextLength,
|
||||||
noOffloadKVCache,
|
noOffloadKVCache,
|
||||||
mode,
|
mode,
|
||||||
noOffloadMmproj,
|
offloadMmproj,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user