Merge pull request #6416 from menloresearch/enhancement/experimental-label
enhancement: add label experimental for optimize setting
This commit is contained in:
commit
e709d200aa
@ -80,7 +80,7 @@ type ModelPlan = {
|
|||||||
gpuLayers: number
|
gpuLayers: number
|
||||||
maxContextLength: number
|
maxContextLength: number
|
||||||
noOffloadKVCache: boolean
|
noOffloadKVCache: boolean
|
||||||
noOffloadMmproj?: boolean
|
offloadMmproj?: boolean
|
||||||
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
|
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -328,7 +328,8 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
await this.determineBestBackend(version_backends)
|
await this.determineBestBackend(version_backends)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
bestAvailableBackendString = await this.determineBestBackend(version_backends)
|
bestAvailableBackendString =
|
||||||
|
await this.determineBestBackend(version_backends)
|
||||||
}
|
}
|
||||||
|
|
||||||
let settings = structuredClone(SETTINGS)
|
let settings = structuredClone(SETTINGS)
|
||||||
@ -2047,11 +2048,25 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
return { layerSize: modelSize / totalLayers, totalLayers }
|
return { layerSize: modelSize / totalLayers, totalLayers }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private isAbsolutePath(p: string): boolean {
|
||||||
|
// Normalize back‑slashes to forward‑slashes first.
|
||||||
|
const norm = p.replace(/\\/g, '/')
|
||||||
|
return (
|
||||||
|
norm.startsWith('/') || // POSIX absolute
|
||||||
|
/^[a-zA-Z]:/.test(norm) || // Drive‑letter Windows (C: or D:)
|
||||||
|
/^\/\/[^/]+/.test(norm) // UNC path //server/share
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
async planModelLoad(
|
async planModelLoad(
|
||||||
path: string,
|
path: string,
|
||||||
requestedCtx?: number,
|
mmprojPath?: string,
|
||||||
mmprojPath?: string
|
requestedCtx?: number
|
||||||
): Promise<ModelPlan> {
|
): Promise<ModelPlan> {
|
||||||
|
if (!this.isAbsolutePath(path))
|
||||||
|
path = await joinPath([await getJanDataFolderPath(), path])
|
||||||
|
if (mmprojPath && !this.isAbsolutePath(mmprojPath))
|
||||||
|
mmprojPath = await joinPath([await getJanDataFolderPath(), path])
|
||||||
const modelSize = await this.getModelSize(path)
|
const modelSize = await this.getModelSize(path)
|
||||||
const memoryInfo = await this.getTotalSystemMemory()
|
const memoryInfo = await this.getTotalSystemMemory()
|
||||||
const gguf = await readGgufMetadata(path)
|
const gguf = await readGgufMetadata(path)
|
||||||
@ -2138,12 +2153,12 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
)
|
)
|
||||||
|
|
||||||
// --- Priority 1: Allocate mmproj (if exists) ---
|
// --- Priority 1: Allocate mmproj (if exists) ---
|
||||||
let noOffloadMmproj = false
|
let offloadMmproj = false
|
||||||
let remainingVRAM = usableVRAM
|
let remainingVRAM = usableVRAM
|
||||||
|
|
||||||
if (mmprojSize > 0) {
|
if (mmprojSize > 0) {
|
||||||
if (mmprojSize <= remainingVRAM) {
|
if (mmprojSize <= remainingVRAM) {
|
||||||
noOffloadMmproj = true
|
offloadMmproj = true
|
||||||
remainingVRAM -= mmprojSize
|
remainingVRAM -= mmprojSize
|
||||||
logger.info(`MMProj allocated to VRAM: ${mmprojSize} bytes`)
|
logger.info(`MMProj allocated to VRAM: ${mmprojSize} bytes`)
|
||||||
} else {
|
} else {
|
||||||
@ -2217,8 +2232,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
// Calculate available system RAM for KV cache
|
// Calculate available system RAM for KV cache
|
||||||
const cpuLayers = totalLayers - gpuLayers
|
const cpuLayers = totalLayers - gpuLayers
|
||||||
const modelCPUSize = cpuLayers * layerSize
|
const modelCPUSize = cpuLayers * layerSize
|
||||||
const mmprojCPUSize =
|
const mmprojCPUSize = mmprojSize > 0 && !offloadMmproj ? mmprojSize : 0
|
||||||
mmprojSize > 0 && !noOffloadMmproj ? mmprojSize : 0
|
|
||||||
const systemRAMUsed = modelCPUSize + mmprojCPUSize
|
const systemRAMUsed = modelCPUSize + mmprojCPUSize
|
||||||
const availableSystemRAMForKVCache = Math.max(
|
const availableSystemRAMForKVCache = Math.max(
|
||||||
0,
|
0,
|
||||||
@ -2277,7 +2291,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
const estimatedGPUUsage =
|
const estimatedGPUUsage =
|
||||||
gpuLayers * layerSize +
|
gpuLayers * layerSize +
|
||||||
maxContextLength * kvCachePerToken +
|
maxContextLength * kvCachePerToken +
|
||||||
(noOffloadMmproj ? mmprojSize : 0)
|
(offloadMmproj ? mmprojSize : 0)
|
||||||
|
|
||||||
if (estimatedGPUUsage > memoryInfo.totalVRAM * 0.9) {
|
if (estimatedGPUUsage > memoryInfo.totalVRAM * 0.9) {
|
||||||
logger.warn(
|
logger.warn(
|
||||||
@ -2293,7 +2307,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
const newEstimate =
|
const newEstimate =
|
||||||
gpuLayers * layerSize +
|
gpuLayers * layerSize +
|
||||||
maxContextLength * kvCachePerToken +
|
maxContextLength * kvCachePerToken +
|
||||||
(noOffloadMmproj ? mmprojSize : 0)
|
(offloadMmproj ? mmprojSize : 0)
|
||||||
if (newEstimate <= memoryInfo.totalVRAM * 0.9) break
|
if (newEstimate <= memoryInfo.totalVRAM * 0.9) break
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2329,7 +2343,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
|
|
||||||
// Log final plan
|
// Log final plan
|
||||||
const mmprojInfo = mmprojPath
|
const mmprojInfo = mmprojPath
|
||||||
? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(2)}MB, noOffloadMmproj=${noOffloadMmproj}`
|
? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(2)}MB, offloadMmproj=${offloadMmproj}`
|
||||||
: ''
|
: ''
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
@ -2343,7 +2357,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
maxContextLength,
|
maxContextLength,
|
||||||
noOffloadKVCache,
|
noOffloadKVCache,
|
||||||
mode,
|
mode,
|
||||||
noOffloadMmproj,
|
offloadMmproj,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -46,15 +46,16 @@ export function ModelSetting({
|
|||||||
}
|
}
|
||||||
setIsPlanning(true)
|
setIsPlanning(true)
|
||||||
try {
|
try {
|
||||||
// Read the model config to get the actual model path
|
// Read the model config to get the actual model path and mmproj path
|
||||||
const modelConfig = await serviceHub.app().readYaml<{
|
const modelConfig = await serviceHub.app().readYaml<{
|
||||||
model_path: string
|
model_path: string
|
||||||
|
mmproj_path?: string
|
||||||
}>(`llamacpp/models/${model.id}/model.yml`)
|
}>(`llamacpp/models/${model.id}/model.yml`)
|
||||||
|
|
||||||
if (modelConfig && modelConfig.model_path) {
|
if (modelConfig && modelConfig.model_path) {
|
||||||
const result = await serviceHub
|
const result = await serviceHub
|
||||||
.models()
|
.models()
|
||||||
.planModelLoad(modelConfig.model_path)
|
.planModelLoad(modelConfig.model_path, modelConfig.mmproj_path)
|
||||||
|
|
||||||
// Apply the recommended settings to the model sequentially to avoid race conditions
|
// Apply the recommended settings to the model sequentially to avoid race conditions
|
||||||
const settingsToUpdate: Array<{
|
const settingsToUpdate: Array<{
|
||||||
@ -82,6 +83,25 @@ export function ModelSetting({
|
|||||||
value: result.noOffloadKVCache,
|
value: result.noOffloadKVCache,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
if (
|
||||||
|
model.settings?.no_kv_offload &&
|
||||||
|
result.noOffloadKVCache !== undefined
|
||||||
|
) {
|
||||||
|
settingsToUpdate.push({
|
||||||
|
key: 'no_kv_offload',
|
||||||
|
value: result.noOffloadKVCache,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
model.settings?.mmproj_offload &&
|
||||||
|
result.offloadMmproj !== undefined
|
||||||
|
) {
|
||||||
|
settingsToUpdate.push({
|
||||||
|
key: 'mmproj_offload',
|
||||||
|
value: result.offloadMmproj,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// Apply all settings in a single update to avoid race conditions
|
// Apply all settings in a single update to avoid race conditions
|
||||||
if (settingsToUpdate.length > 0) {
|
if (settingsToUpdate.length > 0) {
|
||||||
@ -242,11 +262,18 @@ export function ModelSetting({
|
|||||||
{provider.provider === 'llamacpp' && (
|
{provider.provider === 'llamacpp' && (
|
||||||
<div className="pb-4 border-b border-main-view-fg/10 my-4">
|
<div className="pb-4 border-b border-main-view-fg/10 my-4">
|
||||||
<div>
|
<div>
|
||||||
<h3 className="font-medium mb-1">Optimize Settings</h3>
|
<div>
|
||||||
|
<div className="flex items-center gap-2 mb-1">
|
||||||
|
<h3 className="font-medium">Optimize Settings</h3>
|
||||||
|
<div className="text-xs bg-main-view-fg/10 border border-main-view-fg/20 text-main-view-fg/70 rounded-full py-0.5 px-2">
|
||||||
|
<span>{t('mcp-servers:experimental')}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<p className="text-main-view-fg/70 text-xs mb-3">
|
<p className="text-main-view-fg/70 text-xs mb-3">
|
||||||
Analyze your system and model, then apply optimal loading
|
Analyze your system and model, then apply optimal loading
|
||||||
settings automatically
|
settings automatically
|
||||||
</p>
|
</p>
|
||||||
|
</div>
|
||||||
<Button
|
<Button
|
||||||
onClick={handlePlanModelLoad}
|
onClick={handlePlanModelLoad}
|
||||||
disabled={isPlanning}
|
disabled={isPlanning}
|
||||||
|
|||||||
@ -495,12 +495,14 @@ export class DefaultModelsService implements ModelsService {
|
|||||||
|
|
||||||
async planModelLoad(
|
async planModelLoad(
|
||||||
modelPath: string,
|
modelPath: string,
|
||||||
|
mmprojPath?: string,
|
||||||
requestedCtx?: number
|
requestedCtx?: number
|
||||||
): Promise<ModelPlan> {
|
): Promise<ModelPlan> {
|
||||||
try {
|
try {
|
||||||
const engine = this.getEngine('llamacpp') as AIEngine & {
|
const engine = this.getEngine('llamacpp') as AIEngine & {
|
||||||
planModelLoad?: (
|
planModelLoad?: (
|
||||||
path: string,
|
path: string,
|
||||||
|
mmprojPath?: string,
|
||||||
requestedCtx?: number
|
requestedCtx?: number
|
||||||
) => Promise<ModelPlan>
|
) => Promise<ModelPlan>
|
||||||
}
|
}
|
||||||
@ -514,7 +516,12 @@ export class DefaultModelsService implements ModelsService {
|
|||||||
(core) => core.joinPath
|
(core) => core.joinPath
|
||||||
)
|
)
|
||||||
const fullModelPath = await joinPath([janDataFolderPath, modelPath])
|
const fullModelPath = await joinPath([janDataFolderPath, modelPath])
|
||||||
return await engine.planModelLoad(fullModelPath, requestedCtx)
|
// mmprojPath is currently unused, but included for compatibility
|
||||||
|
return await engine.planModelLoad(
|
||||||
|
fullModelPath,
|
||||||
|
mmprojPath,
|
||||||
|
requestedCtx
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback if method is not available
|
// Fallback if method is not available
|
||||||
@ -523,6 +530,7 @@ export class DefaultModelsService implements ModelsService {
|
|||||||
gpuLayers: 0,
|
gpuLayers: 0,
|
||||||
maxContextLength: 2048,
|
maxContextLength: 2048,
|
||||||
noOffloadKVCache: true,
|
noOffloadKVCache: true,
|
||||||
|
offloadMmproj: false,
|
||||||
mode: 'Unsupported',
|
mode: 'Unsupported',
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@ -531,6 +539,7 @@ export class DefaultModelsService implements ModelsService {
|
|||||||
gpuLayers: 0,
|
gpuLayers: 0,
|
||||||
maxContextLength: 2048,
|
maxContextLength: 2048,
|
||||||
noOffloadKVCache: true,
|
noOffloadKVCache: true,
|
||||||
|
offloadMmproj: false,
|
||||||
mode: 'Unsupported',
|
mode: 'Unsupported',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -85,6 +85,7 @@ export interface ModelPlan {
|
|||||||
gpuLayers: number
|
gpuLayers: number
|
||||||
maxContextLength: number
|
maxContextLength: number
|
||||||
noOffloadKVCache: boolean
|
noOffloadKVCache: boolean
|
||||||
|
offloadMmproj: boolean
|
||||||
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
|
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -136,5 +137,9 @@ export interface ModelsService {
|
|||||||
ctxSize?: number
|
ctxSize?: number
|
||||||
): Promise<'RED' | 'YELLOW' | 'GREEN' | 'GREY'>
|
): Promise<'RED' | 'YELLOW' | 'GREEN' | 'GREY'>
|
||||||
validateGgufFile(filePath: string): Promise<ModelValidationResult>
|
validateGgufFile(filePath: string): Promise<ModelValidationResult>
|
||||||
planModelLoad(modelPath: string, requestedCtx?: number): Promise<ModelPlan>
|
planModelLoad(
|
||||||
|
modelPath: string,
|
||||||
|
mmprojPath?: string,
|
||||||
|
requestedCtx?: number
|
||||||
|
): Promise<ModelPlan>
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user