feat: add LlamacppConfig for llama.cpp extension to improve settings (#5121)

* add engine settings

* update load options

* rename variable
This commit is contained in:
Thien Tran 2025-05-28 09:47:09 +08:00 committed by Louis
parent 9bb4deeb78
commit d5c07acdb5
No known key found for this signature in database
GPG Key ID: 44FA9F4D33C37DE2
2 changed files with 64 additions and 112 deletions

View File

@ -103,30 +103,7 @@ export type listResult = modelInfo[]
export interface loadOptions {
modelPath: string
port?: number
n_gpu_layers?: number
n_ctx?: number
threads?: number
threads_batch?: number
ctx_size?: number
n_predict?: number
batch_size?: number
ubatch_size?: number
device?: string
split_mode?: string
main_gpu?: number
flash_attn?: boolean
cont_batching?: boolean
no_mmap?: boolean
mlock?: boolean
no_kv_offload?: boolean
cache_type_k?: string
cache_type_v?: string
defrag_thold?: number
rope_scaling?: string
rope_scale?: number
rope_freq_base?: number
rope_freq_scale?: number
reasoning_budget?: number
}
export interface sessionInfo {

View File

@ -26,6 +26,33 @@ import {
import { invoke } from '@tauri-apps/api/core'
type LlamacppConfig = {
n_gpu_layers: number;
n_ctx: number; // not in SETTINGS
threads: number;
threads_batch: number;
ctx_size: number;
n_predict: number;
batch_size: number;
ubatch_size: number;
device: string;
split_mode: string;
main_gpu: number;
flash_attn: boolean;
cont_batching: boolean;
no_mmap: boolean;
mlock: boolean;
no_kv_offload: boolean;
cache_type_k: string;
cache_type_v: string;
defrag_thold: number;
rope_scaling: string;
rope_scale: number;
rope_freq_base: number;
rope_freq_scale: number;
reasoning_budget: number;
}
interface DownloadItem {
url: string
save_path: string
@ -76,6 +103,7 @@ export default class llamacpp_extension extends AIEngine {
provider: string = 'llamacpp'
readonly providerId: string = 'llamacpp'
private config: LlamacppConfig
private downloadManager
private activeSessions: Map<string, sessionInfo> = new Map()
private modelsBasePath!: string
@ -85,6 +113,13 @@ export default class llamacpp_extension extends AIEngine {
super.onLoad() // Calls registerEngine() from AIEngine
this.registerSettings(SETTINGS)
let config = {}
for (const item of SETTINGS) {
const defaultValue = item.controllerProps.value
config[item.key] = this.getSetting<typeof defaultValue>(item.key, defaultValue)
}
this.config = config as LlamacppConfig
this.downloadManager = window.core.extensionManager.getByName('@janhq/download-extension')
// Initialize models base path - assuming this would be retrieved from settings
@ -110,6 +145,10 @@ export default class llamacpp_extension extends AIEngine {
this.activeSessions.clear();
}
onSettingUpdate<T>(key: string, value: T): void {
this.config[key] = value
}
// Implement the required LocalProvider interface methods
override async list(): Promise<modelInfo[]> {
const modelsDir = await joinPath([this.modelsBasePath, this.provider])
@ -267,113 +306,49 @@ export default class llamacpp_extension extends AIEngine {
override async load(opts: loadOptions): Promise<sessionInfo> {
const args: string[] = []
const cfg = this.config
// disable llama-server webui
args.push('--no-webui')
// model option is required
// TODO: llama.cpp extension lookup model path based on modelId
args.push('-m', opts.modelPath)
args.push('--port', String(opts.port || 8080)) // Default port if not specified
if (opts.n_gpu_layers === undefined) {
// in case of CPU only build, this option will be ignored
args.push('-ngl', '99')
} else {
args.push('-ngl', String(opts.n_gpu_layers))
}
if (opts.n_ctx !== undefined) {
args.push('-c', String(opts.n_ctx))
}
// Add remaining options from the interface
if (opts.threads !== undefined) {
args.push('--threads', String(opts.threads))
}
if (opts.threads_batch !== undefined) {
args.push('--threads-batch', String(opts.threads_batch))
}
if (opts.ctx_size !== undefined) {
args.push('--ctx-size', String(opts.ctx_size))
}
if (opts.n_predict !== undefined) {
args.push('--n-predict', String(opts.n_predict))
}
if (opts.batch_size !== undefined) {
args.push('--batch-size', String(opts.batch_size))
}
if (opts.ubatch_size !== undefined) {
args.push('--ubatch-size', String(opts.ubatch_size))
}
if (opts.device !== undefined) {
args.push('--device', opts.device)
}
if (opts.split_mode !== undefined) {
args.push('--split-mode', opts.split_mode)
}
if (opts.main_gpu !== undefined) {
args.push('--main-gpu', String(opts.main_gpu))
}
if (cfg.n_gpu_layers > 0) args.push('-ngl', String(cfg.n_gpu_layers))
if (cfg.threads > 0) args.push('--threads', String(cfg.threads))
if (cfg.threads_batch > 0) args.push('--threads-batch', String(cfg.threads_batch))
if (cfg.ctx_size > 0) args.push('--ctx-size', String(cfg.ctx_size))
if (cfg.n_predict > 0) args.push('--n-predict', String(cfg.n_predict))
if (cfg.batch_size > 0) args.push('--batch-size', String(cfg.batch_size))
if (cfg.ubatch_size > 0) args.push('--ubatch-size', String(cfg.ubatch_size))
if (cfg.device.length > 0) args.push('--device', cfg.device)
if (cfg.split_mode.length > 0) args.push('--split-mode', cfg.split_mode)
if (cfg.main_gpu !== undefined) args.push('--main-gpu', String(cfg.main_gpu))
// Boolean flags
if (opts.flash_attn === true) {
args.push('--flash-attn')
}
if (cfg.flash_attn) args.push('--flash-attn')
if (cfg.cont_batching) args.push('--cont-batching')
if (cfg.no_mmap) args.push('--no-mmap')
if (cfg.mlock) args.push('--mlock')
if (cfg.no_kv_offload) args.push('--no-kv-offload')
if (opts.cont_batching === true) {
args.push('--cont-batching')
}
args.push('--cache-type-k', cfg.cache_type_k)
args.push('--cache-type-v', cfg.cache_type_v)
args.push('--defrag-thold', String(cfg.defrag_thold))
if (opts.no_mmap === true) {
args.push('--no-mmap')
}
args.push('--rope-scaling', cfg.rope_scaling)
args.push('--rope-scale', String(cfg.rope_scale))
args.push('--rope-freq-base', String(cfg.rope_freq_base))
args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
args.push('--reasoning-budget', String(cfg.reasoning_budget))
if (opts.mlock === true) {
args.push('--mlock')
}
if (opts.no_kv_offload === true) {
args.push('--no-kv-offload')
}
if (opts.cache_type_k !== undefined) {
args.push('--cache-type-k', opts.cache_type_k)
}
if (opts.cache_type_v !== undefined) {
args.push('--cache-type-v', opts.cache_type_v)
}
if (opts.defrag_thold !== undefined) {
args.push('--defrag-thold', String(opts.defrag_thold))
}
if (opts.rope_scaling !== undefined) {
args.push('--rope-scaling', opts.rope_scaling)
}
if (opts.rope_scale !== undefined) {
args.push('--rope-scale', String(opts.rope_scale))
}
if (opts.rope_freq_base !== undefined) {
args.push('--rope-freq-base', String(opts.rope_freq_base))
}
if (opts.rope_freq_scale !== undefined) {
args.push('--rope-freq-scale', String(opts.rope_freq_scale))
}
if (opts.reasoning_budget !== undefined) {
args.push('--reasoning-budget', String(opts.reasoning_budget))
}
console.log('Calling Tauri command llama_load with args:', args)
try {