feat: add LlamacppConfig for llama.cpp extension to improve settings (#5121)

* add engine settings * update load options * rename variable
2025-05-28 09:47:09 +08:00 · 2025-05-28 09:47:09 +08:00 · d5c07acdb5
commit d5c07acdb5
parent 9bb4deeb78
2 changed files with 64 additions and 112 deletions
--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@ -103,30 +103,7 @@ export type listResult = modelInfo[]
 export interface loadOptions {
  modelPath: string
  port?: number
-  n_gpu_layers?: number
  n_ctx?: number
-  threads?: number
-  threads_batch?: number
-  ctx_size?: number
-  n_predict?: number
-  batch_size?: number
-  ubatch_size?: number
-  device?: string
-  split_mode?: string
-  main_gpu?: number
-  flash_attn?: boolean
-  cont_batching?: boolean
-  no_mmap?: boolean
-  mlock?: boolean
-  no_kv_offload?: boolean
-  cache_type_k?: string
-  cache_type_v?: string
-  defrag_thold?: number
-  rope_scaling?: string
-  rope_scale?: number
-  rope_freq_base?: number
-  rope_freq_scale?: number
-  reasoning_budget?: number
 }

 export interface sessionInfo {
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -26,6 +26,33 @@ import {

 import { invoke } from '@tauri-apps/api/core'

+type LlamacppConfig = {
+  n_gpu_layers: number;
+  n_ctx: number;  // not in SETTINGS
+  threads: number;
+  threads_batch: number;
+  ctx_size: number;
+  n_predict: number;
+  batch_size: number;
+  ubatch_size: number;
+  device: string;
+  split_mode: string;
+  main_gpu: number;
+  flash_attn: boolean;
+  cont_batching: boolean;
+  no_mmap: boolean;
+  mlock: boolean;
+  no_kv_offload: boolean;
+  cache_type_k: string;
+  cache_type_v: string;
+  defrag_thold: number;
+  rope_scaling: string;
+  rope_scale: number;
+  rope_freq_base: number;
+  rope_freq_scale: number;
+  reasoning_budget: number;
+}
+
 interface DownloadItem {
  url: string
  save_path: string
@ -76,6 +103,7 @@ export default class llamacpp_extension extends AIEngine {
  provider: string = 'llamacpp'
  readonly providerId: string = 'llamacpp'

+  private config: LlamacppConfig
  private downloadManager
  private activeSessions: Map<string, sessionInfo> = new Map()
  private modelsBasePath!: string
@ -85,6 +113,13 @@ export default class llamacpp_extension extends AIEngine {
    super.onLoad() // Calls registerEngine() from AIEngine
    this.registerSettings(SETTINGS)

+    let config = {}
+    for (const item of SETTINGS) {
+      const defaultValue = item.controllerProps.value
+      config[item.key] = this.getSetting<typeof defaultValue>(item.key, defaultValue)
+    }
+    this.config = config as LlamacppConfig
+
    this.downloadManager = window.core.extensionManager.getByName('@janhq/download-extension')

    // Initialize models base path - assuming this would be retrieved from settings
@ -110,6 +145,10 @@ export default class llamacpp_extension extends AIEngine {
    this.activeSessions.clear();
  }

+  onSettingUpdate<T>(key: string, value: T): void {
+    this.config[key] = value
+  }
+
  // Implement the required LocalProvider interface methods
  override async list(): Promise<modelInfo[]> {
    const modelsDir = await joinPath([this.modelsBasePath, this.provider])
@ -267,113 +306,49 @@ export default class llamacpp_extension extends AIEngine {

  override async load(opts: loadOptions): Promise<sessionInfo> {
    const args: string[] = []
+    const cfg = this.config

    // disable llama-server webui
    args.push('--no-webui')

    // model option is required
+    // TODO: llama.cpp extension lookup model path based on modelId
    args.push('-m', opts.modelPath)
    args.push('--port', String(opts.port || 8080)) // Default port if not specified

-    if (opts.n_gpu_layers === undefined) {
-      // in case of CPU only build, this option will be ignored
-      args.push('-ngl', '99')
-    } else {
-      args.push('-ngl', String(opts.n_gpu_layers))
-    }
-
    if (opts.n_ctx !== undefined) {
      args.push('-c', String(opts.n_ctx))
    }

    // Add remaining options from the interface
-    if (opts.threads !== undefined) {
-      args.push('--threads', String(opts.threads))
-    }
-
-    if (opts.threads_batch !== undefined) {
-      args.push('--threads-batch', String(opts.threads_batch))
-    }
-
-    if (opts.ctx_size !== undefined) {
-      args.push('--ctx-size', String(opts.ctx_size))
-    }
-
-    if (opts.n_predict !== undefined) {
-      args.push('--n-predict', String(opts.n_predict))
-    }
-
-    if (opts.batch_size !== undefined) {
-      args.push('--batch-size', String(opts.batch_size))
-    }
-
-    if (opts.ubatch_size !== undefined) {
-      args.push('--ubatch-size', String(opts.ubatch_size))
-    }
-
-    if (opts.device !== undefined) {
-      args.push('--device', opts.device)
-    }
-
-    if (opts.split_mode !== undefined) {
-      args.push('--split-mode', opts.split_mode)
-    }
-
-    if (opts.main_gpu !== undefined) {
-      args.push('--main-gpu', String(opts.main_gpu))
-    }
+    if (cfg.n_gpu_layers > 0) args.push('-ngl', String(cfg.n_gpu_layers))
+    if (cfg.threads > 0) args.push('--threads', String(cfg.threads))
+    if (cfg.threads_batch > 0) args.push('--threads-batch', String(cfg.threads_batch))
+    if (cfg.ctx_size > 0) args.push('--ctx-size', String(cfg.ctx_size))
+    if (cfg.n_predict > 0) args.push('--n-predict', String(cfg.n_predict))
+    if (cfg.batch_size > 0) args.push('--batch-size', String(cfg.batch_size))
+    if (cfg.ubatch_size > 0) args.push('--ubatch-size', String(cfg.ubatch_size))
+    if (cfg.device.length > 0) args.push('--device', cfg.device)
+    if (cfg.split_mode.length > 0) args.push('--split-mode', cfg.split_mode)
+    if (cfg.main_gpu !== undefined) args.push('--main-gpu', String(cfg.main_gpu))

    // Boolean flags
-    if (opts.flash_attn === true) {
-      args.push('--flash-attn')
-    }
+    if (cfg.flash_attn) args.push('--flash-attn')
+    if (cfg.cont_batching) args.push('--cont-batching')
+    if (cfg.no_mmap) args.push('--no-mmap')
+    if (cfg.mlock) args.push('--mlock')
+    if (cfg.no_kv_offload) args.push('--no-kv-offload')

-    if (opts.cont_batching === true) {
-      args.push('--cont-batching')
-    }
+    args.push('--cache-type-k', cfg.cache_type_k)
+    args.push('--cache-type-v', cfg.cache_type_v)
+    args.push('--defrag-thold', String(cfg.defrag_thold))

-    if (opts.no_mmap === true) {
-      args.push('--no-mmap')
-    }
+    args.push('--rope-scaling', cfg.rope_scaling)
+    args.push('--rope-scale', String(cfg.rope_scale))
+    args.push('--rope-freq-base', String(cfg.rope_freq_base))
+    args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
+    args.push('--reasoning-budget', String(cfg.reasoning_budget))

-    if (opts.mlock === true) {
-      args.push('--mlock')
-    }
-
-    if (opts.no_kv_offload === true) {
-      args.push('--no-kv-offload')
-    }
-
-    if (opts.cache_type_k !== undefined) {
-      args.push('--cache-type-k', opts.cache_type_k)
-    }
-
-    if (opts.cache_type_v !== undefined) {
-      args.push('--cache-type-v', opts.cache_type_v)
-    }
-
-    if (opts.defrag_thold !== undefined) {
-      args.push('--defrag-thold', String(opts.defrag_thold))
-    }
-
-    if (opts.rope_scaling !== undefined) {
-      args.push('--rope-scaling', opts.rope_scaling)
-    }
-
-    if (opts.rope_scale !== undefined) {
-      args.push('--rope-scale', String(opts.rope_scale))
-    }
-
-    if (opts.rope_freq_base !== undefined) {
-      args.push('--rope-freq-base', String(opts.rope_freq_base))
-    }
-
-    if (opts.rope_freq_scale !== undefined) {
-      args.push('--rope-freq-scale', String(opts.rope_freq_scale))
-    }
-    if (opts.reasoning_budget !== undefined) {
-      args.push('--reasoning-budget', String(opts.reasoning_budget))
-    }
    console.log('Calling Tauri command llama_load with args:', args)

    try {