🐛fix: llama.cpp default NGL setting does not offload all layers to GPU (#5310)

* 🐛fix: llama.cpp default NGL setting does not offload all layers to GPU

* chore: cover more cases

* chore: clean up

* fix: should not show GPU section on Mac
This commit is contained in:
Louis 2025-06-17 15:30:07 +07:00 committed by GitHub
parent 50c25b65b6
commit 2899d58ad7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 33 additions and 25 deletions

View File

@ -17,7 +17,7 @@ export const validationRules: { [key: string]: (value: any) => boolean } = {
presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
ctx_len: (value: any) => Number.isInteger(value) && value >= 0,
ngl: (value: any) => Number.isInteger(value) && value >= 0,
ngl: (value: any) => Number.isInteger(value),
embedding: (value: any) => typeof value === 'boolean',
n_parallel: (value: any) => Number.isInteger(value) && value >= 0,
cpu_threads: (value: any) => Number.isInteger(value) && value >= 0,

View File

@ -253,11 +253,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
}
}
}
const modelSettings = extractModelLoadParams(model.settings)
return await this.apiInstance().then((api) =>
api
.post('v1/models/start', {
json: {
...extractModelLoadParams(model.settings),
...modelSettings,
model: model.id,
engine:
model.engine === 'nitro' // Legacy model cache
@ -282,6 +283,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
...(this.context_shift === false
? { 'no-context-shift': true }
: {}),
...(modelSettings.ngl === -1 || modelSettings.ngl === undefined
? { ngl: 100 }
: {}),
},
timeout: false,
signal,

View File

@ -371,30 +371,34 @@ function Hardware() {
)}
{/* GPU Information */}
<Card title="GPUs">
{hardwareData.gpus.length > 0 ? (
<DndContext
sensors={sensors}
collisionDetection={closestCenter}
onDragEnd={handleDragEnd}
>
<SortableContext
items={hardwareData.gpus.map((gpu) => gpu.id)}
strategy={verticalListSortingStrategy}
{!IS_MACOS ? (
<Card title="GPUs">
{hardwareData.gpus.length > 0 ? (
<DndContext
sensors={sensors}
collisionDetection={closestCenter}
onDragEnd={handleDragEnd}
>
{hardwareData.gpus.map((gpu, index) => (
<SortableGPUItem
key={gpu.id || index}
gpu={gpu}
index={index}
/>
))}
</SortableContext>
</DndContext>
) : (
<CardItem title="No GPUs detected" actions={<></>} />
)}
</Card>
<SortableContext
items={hardwareData.gpus.map((gpu) => gpu.id)}
strategy={verticalListSortingStrategy}
>
{hardwareData.gpus.map((gpu, index) => (
<SortableGPUItem
key={gpu.id || index}
gpu={gpu}
index={index}
/>
))}
</SortableContext>
</DndContext>
) : (
<CardItem title="No GPUs detected" actions={<></>} />
)}
</Card>
) : (
<></>
)}
</div>
</div>
</div>