🐛fix: llama.cpp default NGL setting does not offload all layers to GPU (#5310)

* 🐛fix: llama.cpp default NGL setting does not offload all layers to GPU

* chore: cover more cases

* chore: clean up

* fix: should not show GPU section on Mac
This commit is contained in:
Louis 2025-06-17 15:30:07 +07:00 committed by GitHub
parent 50c25b65b6
commit 2899d58ad7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 33 additions and 25 deletions

View File

@ -17,7 +17,7 @@ export const validationRules: { [key: string]: (value: any) => boolean } = {
presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1, presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
ctx_len: (value: any) => Number.isInteger(value) && value >= 0, ctx_len: (value: any) => Number.isInteger(value) && value >= 0,
ngl: (value: any) => Number.isInteger(value) && value >= 0, ngl: (value: any) => Number.isInteger(value),
embedding: (value: any) => typeof value === 'boolean', embedding: (value: any) => typeof value === 'boolean',
n_parallel: (value: any) => Number.isInteger(value) && value >= 0, n_parallel: (value: any) => Number.isInteger(value) && value >= 0,
cpu_threads: (value: any) => Number.isInteger(value) && value >= 0, cpu_threads: (value: any) => Number.isInteger(value) && value >= 0,

View File

@ -253,11 +253,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
} }
} }
} }
const modelSettings = extractModelLoadParams(model.settings)
return await this.apiInstance().then((api) => return await this.apiInstance().then((api) =>
api api
.post('v1/models/start', { .post('v1/models/start', {
json: { json: {
...extractModelLoadParams(model.settings), ...modelSettings,
model: model.id, model: model.id,
engine: engine:
model.engine === 'nitro' // Legacy model cache model.engine === 'nitro' // Legacy model cache
@ -282,6 +283,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
...(this.context_shift === false ...(this.context_shift === false
? { 'no-context-shift': true } ? { 'no-context-shift': true }
: {}), : {}),
...(modelSettings.ngl === -1 || modelSettings.ngl === undefined
? { ngl: 100 }
: {}),
}, },
timeout: false, timeout: false,
signal, signal,

View File

@ -371,30 +371,34 @@ function Hardware() {
)} )}
{/* GPU Information */} {/* GPU Information */}
<Card title="GPUs"> {!IS_MACOS ? (
{hardwareData.gpus.length > 0 ? ( <Card title="GPUs">
<DndContext {hardwareData.gpus.length > 0 ? (
sensors={sensors} <DndContext
collisionDetection={closestCenter} sensors={sensors}
onDragEnd={handleDragEnd} collisionDetection={closestCenter}
> onDragEnd={handleDragEnd}
<SortableContext
items={hardwareData.gpus.map((gpu) => gpu.id)}
strategy={verticalListSortingStrategy}
> >
{hardwareData.gpus.map((gpu, index) => ( <SortableContext
<SortableGPUItem items={hardwareData.gpus.map((gpu) => gpu.id)}
key={gpu.id || index} strategy={verticalListSortingStrategy}
gpu={gpu} >
index={index} {hardwareData.gpus.map((gpu, index) => (
/> <SortableGPUItem
))} key={gpu.id || index}
</SortableContext> gpu={gpu}
</DndContext> index={index}
) : ( />
<CardItem title="No GPUs detected" actions={<></>} /> ))}
)} </SortableContext>
</Card> </DndContext>
) : (
<CardItem title="No GPUs detected" actions={<></>} />
)}
</Card>
) : (
<></>
)}
</div> </div>
</div> </div>
</div> </div>