From 27c49183955e80c58b8a963e7c17f1a87158b252 Mon Sep 17 00:00:00 2001 From: Louis Date: Fri, 13 Jun 2025 00:01:25 +0700 Subject: [PATCH] fix: default settings should leave empty (#5257) * fix: default settings should leave empty * fix: default settings * fix: remove some more default settings * fix: threads and cont * fix: data * fix: default setting * fix: settings * chore: bump cortex version * chore: bump to cortex 1.0.14 * chore: clean up * typoe * chore: fix dialog hang * fix: default parameter * chore: truncate edit model title * chore: update default provider settings * chore: fix typo --------- Co-authored-by: Faisal Amir --- .../bin/version.txt | 2 +- .../resources/default_settings.json | 14 +++--- .../inference-cortex-extension/src/index.ts | 38 +++++++++------- src-tauri/binaries/download.bat | 2 +- src-tauri/binaries/download.sh | 2 +- src-tauri/src/core/mcp.rs | 43 ++++++++++++++++++- web-app/src/containers/dialogs/EditModel.tsx | 4 +- .../src/containers/dialogs/ToolApproval.tsx | 9 +++- web-app/src/lib/predefined.ts | 22 +++++----- web-app/src/routes/hub.tsx | 2 +- web-app/src/services/providers.ts | 18 +++++--- 11 files changed, 110 insertions(+), 46 deletions(-) diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt index d5f450d92..4014c4f5e 100644 --- a/extensions/inference-cortex-extension/bin/version.txt +++ b/extensions/inference-cortex-extension/bin/version.txt @@ -1 +1 @@ -1.0.13-rc6 \ No newline at end of file +1.0.13-rc9 \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json index d825affb2..451596842 100644 --- a/extensions/inference-cortex-extension/resources/default_settings.json +++ b/extensions/inference-cortex-extension/resources/default_settings.json @@ -14,7 +14,7 @@ "description": "Allows processing prompts in parallel with text generation, which usually improves performance.", "controllerType": "checkbox", "controllerProps": { - "value": true + "value": "" } }, { @@ -23,7 +23,7 @@ "description": "Number of prompts that can be processed simultaneously by the model.", "controllerType": "input", "controllerProps": { - "value": "1", + "value": "", "placeholder": "1", "type": "number", "textAlign": "right" @@ -35,8 +35,8 @@ "description": "Number of CPU cores used for model processing when running without GPU.", "controllerType": "input", "controllerProps": { - "value": "-1", - "placeholder": "Number of CPU threads", + "value": "", + "placeholder": "-1 (auto-detect)", "type": "number", "textAlign": "right" } @@ -47,7 +47,7 @@ "description": "Number of threads for batch and prompt processing (default: same as Threads).", "controllerType": "input", "controllerProps": { - "value": -1, + "value": "", "placeholder": "-1 (same as Threads)", "type": "number" } @@ -58,7 +58,7 @@ "description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.", "controllerType": "checkbox", "controllerProps": { - "value": false + "value": true } }, { @@ -76,7 +76,7 @@ "description": "Controls memory usage and precision trade-off.", "controllerType": "dropdown", "controllerProps": { - "value": "f16", + "value": "q8_0", "options": [ { "value": "q4_0", diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index b217a4f48..a49b1a852 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -8,7 +8,6 @@ import { Model, - executeOnMain, EngineEvent, LocalOAIEngine, extractModelLoadParams, @@ -56,11 +55,11 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { /** Default Engine model load settings */ n_parallel?: number - cont_batching: boolean = true + cont_batching: boolean = false caching_enabled: boolean = true flash_attn: boolean = true use_mmap: boolean = true - cache_type: string = 'f16' + cache_type: string = 'q8' cpu_threads?: number auto_unload_models: boolean = true /** @@ -118,17 +117,19 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { if (numParallel.length > 0 && parseInt(numParallel) > 0) { this.n_parallel = parseInt(numParallel) } - this.cont_batching = await this.getSetting( - Settings.cont_batching, - true - ) + if (this.n_parallel && this.n_parallel > 1) + this.cont_batching = await this.getSetting( + Settings.cont_batching, + false + ) this.caching_enabled = await this.getSetting( Settings.caching_enabled, true ) this.flash_attn = await this.getSetting(Settings.flash_attn, true) this.use_mmap = await this.getSetting(Settings.use_mmap, true) - this.cache_type = await this.getSetting(Settings.cache_type, 'f16') + if (this.caching_enabled) + this.cache_type = await this.getSetting(Settings.cache_type, 'q8') this.auto_unload_models = await this.getSetting( Settings.auto_unload_models, true @@ -136,6 +137,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { const threads_number = Number( await this.getSetting(Settings.cpu_threads, '') ) + if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number const huggingfaceToken = await this.getSetting( @@ -254,13 +256,19 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { model.engine === 'nitro' // Legacy model cache ? 'llama-cpp' : model.engine, - cont_batching: this.cont_batching, - n_parallel: this.n_parallel, - caching_enabled: this.caching_enabled, - flash_attn: this.flash_attn, - cache_type: this.cache_type, - use_mmap: this.use_mmap, - ...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}), + ...(this.n_parallel ? { n_parallel: this.n_parallel } : {}), + ...(this.use_mmap ? { use_mmap: true } : {}), + ...(this.caching_enabled ? { caching_enabled: true } : {}), + ...(this.flash_attn ? { flash_attn: true } : {}), + ...(this.caching_enabled && this.cache_type + ? { cache_type: this.cache_type } + : {}), + ...(this.cpu_threads && this.cpu_threads > 0 + ? { cpu_threads: this.cpu_threads } + : {}), + ...(this.cont_batching && this.n_parallel && this.n_parallel > 1 + ? { cont_batching: this.cont_batching } + : {}), }, timeout: false, signal, diff --git a/src-tauri/binaries/download.bat b/src-tauri/binaries/download.bat index a70366440..c69ad7970 100644 --- a/src-tauri/binaries/download.bat +++ b/src-tauri/binaries/download.bat @@ -1,6 +1,6 @@ @echo off -set CORTEX_VERSION=1.0.13-rc6 +set CORTEX_VERSION=1.0.14 set ENGINE_VERSION=b5509 set ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win set ENGINE_DOWNLOAD_GGML_URL=https://github.com/ggml-org/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win diff --git a/src-tauri/binaries/download.sh b/src-tauri/binaries/download.sh index 0e7723026..8ce0041f0 100755 --- a/src-tauri/binaries/download.sh +++ b/src-tauri/binaries/download.sh @@ -14,7 +14,7 @@ download() { } # Read CORTEX_VERSION -CORTEX_VERSION=1.0.13-rc6 +CORTEX_VERSION=1.0.14 ENGINE_VERSION=b5509 CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download" ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin diff --git a/src-tauri/src/core/mcp.rs b/src-tauri/src/core/mcp.rs index 86b010692..642505486 100644 --- a/src-tauri/src/core/mcp.rs +++ b/src-tauri/src/core/mcp.rs @@ -8,7 +8,45 @@ use tokio::{process::Command, sync::Mutex, time::timeout}; use super::{cmd::get_jan_data_folder_path, state::AppState}; -const DEFAULT_MCP_CONFIG: &str = r#"{"mcpServers":{"browsermcp":{"command":"npx","args":["@browsermcp/mcp"],"env":{},"active":false},"fetch":{"command":"uvx","args":["mcp-server-fetch"],"env":{},"active":false},"filesystem":{"command":"npx","args":["-y","@modelcontextprotocol/server-filesystem","/path/to/other/allowed/dir"],"env":{},"active":false},"playwright":{"command":"npx","args":["@playwright/mcp","--isolated"],"env":{},"active":false},"sequential-thinking":{"command":"npx","args":["-y","@modelcontextprotocol/server-sequential-thinking"],"env":{},"active":false},"tavily":{"command":"npx","args":["-y","tavily-mcp"],"env":{"TAVILY_API_KEY": "tvly-YOUR_API_KEY-here"},"active":false}}}"#; +const DEFAULT_MCP_CONFIG: &str = r#"{ + "mcpServers": { + "browsermcp": { + "command": "npx", + "args": ["@browsermcp/mcp"], + "env": {}, + "active": false + }, + "fetch": { + "command": "uvx", + "args": ["mcp-server-fetch"], + "env": {}, + "active": false + }, + "serper": { + "command": "npx", + "args": ["-y", "serper-search-scrape-mcp-server"], + "env": { "SERPER_API_KEY": "YOUR_SERPER_API_KEY_HERE" }, + "active": false + }, + "filesystem": { + "command": "npx", + "args": [ + "-y", + "@modelcontextprotocol/server-filesystem", + "/path/to/other/allowed/dir" + ], + "env": {}, + "active": false + }, + "sequential-thinking": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-sequential-thinking"], + "env": {}, + "active": false + } + } +} +"#; // Timeout for MCP tool calls (30 seconds) const MCP_TOOL_CALL_TIMEOUT: Duration = Duration::from_secs(30); @@ -116,7 +154,8 @@ async fn start_mcp_server( cmd.arg("run"); cmd.env("UV_CACHE_DIR", cache_dir.to_str().unwrap().to_string()); } - #[cfg(windows)] { + #[cfg(windows)] + { cmd.creation_flags(0x08000000); // CREATE_NO_WINDOW: prevents shell window on Windows } let app_path_str = app_path.to_str().unwrap().to_string(); diff --git a/web-app/src/containers/dialogs/EditModel.tsx b/web-app/src/containers/dialogs/EditModel.tsx index 0acaf0a7a..30ab81010 100644 --- a/web-app/src/containers/dialogs/EditModel.tsx +++ b/web-app/src/containers/dialogs/EditModel.tsx @@ -139,7 +139,9 @@ export const DialogEditModel = ({ - Edit Model: {selectedModel.id} + + Edit Model: {selectedModel.id} + Configure model capabilities by toggling the options below. diff --git a/web-app/src/containers/dialogs/ToolApproval.tsx b/web-app/src/containers/dialogs/ToolApproval.tsx index 162733274..85580013d 100644 --- a/web-app/src/containers/dialogs/ToolApproval.tsx +++ b/web-app/src/containers/dialogs/ToolApproval.tsx @@ -31,8 +31,15 @@ export default function ToolApproval() { onDeny() } + const handleDialogOpen = (open: boolean) => { + setModalOpen(open) + if (!open) { + onDeny() + } + } + return ( - +
diff --git a/web-app/src/lib/predefined.ts b/web-app/src/lib/predefined.ts index 216b08786..a87493722 100644 --- a/web-app/src/lib/predefined.ts +++ b/web-app/src/lib/predefined.ts @@ -18,20 +18,20 @@ export const modelSettings = { controller_type: 'input', controller_props: { value: 100, - placeholder: '-1', + placeholder: '100', type: 'number', }, }, temperature: { - key: 'temp', + key: 'temperature', title: 'Temperature', description: 'Temperature for sampling (higher = more random). This is the default setting on load and can be overridden by the assistant settings.', controller_type: 'input', controller_props: { - value: 0.8, - placeholder: '0.8', + value: '', + placeholder: '0.6', type: 'number', min: 0, step: 0.01, @@ -44,7 +44,7 @@ export const modelSettings = { 'Top-K sampling (0 = disabled). This is the default setting on load and can be overridden by the assistant settings.', controller_type: 'input', controller_props: { - value: 40, + value: '', placeholder: '40', type: 'number', }, @@ -56,7 +56,7 @@ export const modelSettings = { 'Top-P sampling (1.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.', controller_type: 'input', controller_props: { - value: 0.9, + value: '', placeholder: '0.9', type: 'number', }, @@ -68,7 +68,7 @@ export const modelSettings = { 'Min-P sampling (0.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.', controller_type: 'input', controller_props: { - value: 0.1, + value: '', placeholder: '0.1', type: 'number', }, @@ -80,7 +80,7 @@ export const modelSettings = { 'Number of tokens to consider for repeat penalty (0 = disabled, -1 = ctx_size). This is the default setting on load and can be overridden by the assistant settings.', controller_type: 'input', controller_props: { - value: 64, + value: '', placeholder: '64', type: 'number', }, @@ -92,7 +92,7 @@ export const modelSettings = { 'Penalize repeating token sequences (1.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.', controller_type: 'input', controller_props: { - value: 1.0, + value: '', placeholder: '1.0', type: 'number', }, @@ -104,7 +104,7 @@ export const modelSettings = { 'Repeat alpha presence penalty (0.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.', controller_type: 'input', controller_props: { - value: 0.0, + value: '', placeholder: '0.0', type: 'number', }, @@ -116,7 +116,7 @@ export const modelSettings = { 'Repeat alpha frequency penalty (0.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.', controller_type: 'input', controller_props: { - value: 0.0, + value: '', placeholder: '0.0', type: 'number', }, diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx index a142243ff..9ae3d151e 100644 --- a/web-app/src/routes/hub.tsx +++ b/web-app/src/routes/hub.tsx @@ -315,7 +315,7 @@ function Hub() { title: 'Recommended Model', disableBeacon: true, content: - "Browse and download powerful AI models from various providers, all in one place. We suggest starting with Jan-nano - a model optimized for function calling, tool integration, and research capabilities. It's ideal for building interactive AI agents.", + "Browse and download powerful AI models from various providers, all in one place. We suggest starting with Jan-Nano - a model optimized for function calling, tool integration, and research capabilities. It's ideal for building interactive AI agents.", }, { target: '.hub-download-button-step', diff --git a/web-app/src/services/providers.ts b/web-app/src/services/providers.ts index ed3fccf5a..6bd2b63f0 100644 --- a/web-app/src/services/providers.ts +++ b/web-app/src/services/providers.ts @@ -129,12 +129,20 @@ export const getProviders = async (): Promise => { provider: providerName, settings: Object.values(modelSettings).reduce( (acc, setting) => { - let value = model[ - setting.key as keyof typeof model - ] as keyof typeof setting.controller_props.value + let value = setting.controller_props.value if (setting.key === 'ctx_len') { - // @ts-expect-error dynamic type - value = 4096 // Default context length for Llama.cpp models + value = 8192 // Default context length for Llama.cpp models + } + // Set temperature to 0.6 for DefaultToolUseSupportedModels + if ( + Object.values(DefaultToolUseSupportedModels).some((v) => + model.id.toLowerCase().includes(v.toLowerCase()) + ) + ) { + if (setting.key === 'temperature') value = 0.7 // Default temperature for tool-supported models + if (setting.key === 'top_k') value = 20 // Default top_k for tool-supported models + if (setting.key === 'top_p') value = 0.8 // Default top_p for tool-supported models + if (setting.key === 'min_p') value = 0 // Default min_p for tool-supported models } acc[setting.key] = { ...setting,