🐛fix: llama.cpp default NGL setting does not offload all layers to GPU (#5310)
* 🐛fix: llama.cpp default NGL setting does not offload all layers to GPU
* chore: cover more cases
* chore: clean up
* fix: should not show GPU section on Mac
This commit is contained in:
parent
50c25b65b6
commit
2899d58ad7
@ -17,7 +17,7 @@ export const validationRules: { [key: string]: (value: any) => boolean } = {
|
||||
presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
|
||||
|
||||
ctx_len: (value: any) => Number.isInteger(value) && value >= 0,
|
||||
ngl: (value: any) => Number.isInteger(value) && value >= 0,
|
||||
ngl: (value: any) => Number.isInteger(value),
|
||||
embedding: (value: any) => typeof value === 'boolean',
|
||||
n_parallel: (value: any) => Number.isInteger(value) && value >= 0,
|
||||
cpu_threads: (value: any) => Number.isInteger(value) && value >= 0,
|
||||
|
||||
@ -253,11 +253,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
}
|
||||
}
|
||||
}
|
||||
const modelSettings = extractModelLoadParams(model.settings)
|
||||
return await this.apiInstance().then((api) =>
|
||||
api
|
||||
.post('v1/models/start', {
|
||||
json: {
|
||||
...extractModelLoadParams(model.settings),
|
||||
...modelSettings,
|
||||
model: model.id,
|
||||
engine:
|
||||
model.engine === 'nitro' // Legacy model cache
|
||||
@ -282,6 +283,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
...(this.context_shift === false
|
||||
? { 'no-context-shift': true }
|
||||
: {}),
|
||||
...(modelSettings.ngl === -1 || modelSettings.ngl === undefined
|
||||
? { ngl: 100 }
|
||||
: {}),
|
||||
},
|
||||
timeout: false,
|
||||
signal,
|
||||
|
||||
@ -371,30 +371,34 @@ function Hardware() {
|
||||
)}
|
||||
|
||||
{/* GPU Information */}
|
||||
<Card title="GPUs">
|
||||
{hardwareData.gpus.length > 0 ? (
|
||||
<DndContext
|
||||
sensors={sensors}
|
||||
collisionDetection={closestCenter}
|
||||
onDragEnd={handleDragEnd}
|
||||
>
|
||||
<SortableContext
|
||||
items={hardwareData.gpus.map((gpu) => gpu.id)}
|
||||
strategy={verticalListSortingStrategy}
|
||||
{!IS_MACOS ? (
|
||||
<Card title="GPUs">
|
||||
{hardwareData.gpus.length > 0 ? (
|
||||
<DndContext
|
||||
sensors={sensors}
|
||||
collisionDetection={closestCenter}
|
||||
onDragEnd={handleDragEnd}
|
||||
>
|
||||
{hardwareData.gpus.map((gpu, index) => (
|
||||
<SortableGPUItem
|
||||
key={gpu.id || index}
|
||||
gpu={gpu}
|
||||
index={index}
|
||||
/>
|
||||
))}
|
||||
</SortableContext>
|
||||
</DndContext>
|
||||
) : (
|
||||
<CardItem title="No GPUs detected" actions={<></>} />
|
||||
)}
|
||||
</Card>
|
||||
<SortableContext
|
||||
items={hardwareData.gpus.map((gpu) => gpu.id)}
|
||||
strategy={verticalListSortingStrategy}
|
||||
>
|
||||
{hardwareData.gpus.map((gpu, index) => (
|
||||
<SortableGPUItem
|
||||
key={gpu.id || index}
|
||||
gpu={gpu}
|
||||
index={index}
|
||||
/>
|
||||
))}
|
||||
</SortableContext>
|
||||
</DndContext>
|
||||
) : (
|
||||
<CardItem title="No GPUs detected" actions={<></>} />
|
||||
)}
|
||||
</Card>
|
||||
) : (
|
||||
<></>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user