fix mmap settings and adjust flash attention

This commit is contained in:
Akarshan 2025-10-18 13:36:03 +05:30
parent 7b6e4cd172
commit 1f4977c1d1
No known key found for this signature in database
GPG Key ID: D75C9634A870665F

View File

@ -1649,12 +1649,12 @@ export default class llamacpp_extension extends AIEngine {
if (cfg.main_gpu !== undefined && cfg.main_gpu != 0)
args.push('--main-gpu', String(cfg.main_gpu))
// Note: Older llama.cpp versions are no longer supported
if (cfg.flash_attn !== undefined || cfg.flash_attn !== '') args.push('--flash-attn', String(cfg.flash_attn)) //default: auto = ON when supported
if (cfg.flash_attn !== undefined || !cfg.flash_attn || cfg.flash_attn !== '') args.push('--flash-attn', String(cfg.flash_attn)) //default: auto = ON when supported
// Boolean flags
if (cfg.ctx_shift) args.push('--context-shift')
if (cfg.cont_batching) args.push('--cont-batching')
args.push('--no-mmap')
if (cfg.no_mmap) args.push('--no-mmap')
if (cfg.mlock) args.push('--mlock')
if (cfg.no_kv_offload) args.push('--no-kv-offload')
if (isEmbedding) {