From 2899d58ad788d787cd6dcbb8970ab522757cacd1 Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 17 Jun 2025 15:30:07 +0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9Bfix:=20llama.cpp=20default=20NGL=20?= =?UTF-8?q?setting=20does=20not=20offload=20all=20layers=20to=20GPU=20(#53?= =?UTF-8?q?10)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🐛fix: llama.cpp default NGL setting does not offload all layers to GPU * chore: cover more cases * chore: clean up * fix: should not show GPU section on Mac --- core/src/browser/models/utils.ts | 2 +- .../inference-cortex-extension/src/index.ts | 6 ++- web-app/src/routes/settings/hardware.tsx | 50 ++++++++++--------- 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/core/src/browser/models/utils.ts b/core/src/browser/models/utils.ts index 0e52441b2..2ac243b6a 100644 --- a/core/src/browser/models/utils.ts +++ b/core/src/browser/models/utils.ts @@ -17,7 +17,7 @@ export const validationRules: { [key: string]: (value: any) => boolean } = { presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1, ctx_len: (value: any) => Number.isInteger(value) && value >= 0, - ngl: (value: any) => Number.isInteger(value) && value >= 0, + ngl: (value: any) => Number.isInteger(value), embedding: (value: any) => typeof value === 'boolean', n_parallel: (value: any) => Number.isInteger(value) && value >= 0, cpu_threads: (value: any) => Number.isInteger(value) && value >= 0, diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index 3e8b60ebe..d80bad3d4 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -253,11 +253,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { } } } + const modelSettings = extractModelLoadParams(model.settings) return await this.apiInstance().then((api) => api .post('v1/models/start', { json: { - ...extractModelLoadParams(model.settings), + ...modelSettings, model: model.id, engine: model.engine === 'nitro' // Legacy model cache @@ -282,6 +283,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { ...(this.context_shift === false ? { 'no-context-shift': true } : {}), + ...(modelSettings.ngl === -1 || modelSettings.ngl === undefined + ? { ngl: 100 } + : {}), }, timeout: false, signal, diff --git a/web-app/src/routes/settings/hardware.tsx b/web-app/src/routes/settings/hardware.tsx index dcd301773..53de1f9b3 100644 --- a/web-app/src/routes/settings/hardware.tsx +++ b/web-app/src/routes/settings/hardware.tsx @@ -371,30 +371,34 @@ function Hardware() { )} {/* GPU Information */} - - {hardwareData.gpus.length > 0 ? ( - - gpu.id)} - strategy={verticalListSortingStrategy} + {!IS_MACOS ? ( + + {hardwareData.gpus.length > 0 ? ( + - {hardwareData.gpus.map((gpu, index) => ( - - ))} - - - ) : ( - } /> - )} - + gpu.id)} + strategy={verticalListSortingStrategy} + > + {hardwareData.gpus.map((gpu, index) => ( + + ))} + + + ) : ( + } /> + )} + + ) : ( + <> + )}