From 27c49183955e80c58b8a963e7c17f1a87158b252 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Fri, 13 Jun 2025 00:01:25 +0700
Subject: [PATCH] fix: default settings should leave empty (#5257)

* fix: default settings should leave empty

* fix: default settings

* fix: remove some more default settings

* fix: threads and cont

* fix: data

* fix: default setting

* fix: settings

* chore: bump cortex version

* chore: bump to cortex 1.0.14

* chore: clean up

* typoe

* chore: fix dialog hang

* fix: default parameter

* chore: truncate edit model title

* chore: update default provider settings

* chore: fix typo

---------

Co-authored-by: Faisal Amir <urmauur@gmail.com>
---
 .../bin/version.txt                           |  2 +-
 .../resources/default_settings.json           | 14 +++---
 .../inference-cortex-extension/src/index.ts   | 38 +++++++++-------
 src-tauri/binaries/download.bat               |  2 +-
 src-tauri/binaries/download.sh                |  2 +-
 src-tauri/src/core/mcp.rs                     | 43 ++++++++++++++++++-
 web-app/src/containers/dialogs/EditModel.tsx  |  4 +-
 .../src/containers/dialogs/ToolApproval.tsx   |  9 +++-
 web-app/src/lib/predefined.ts                 | 22 +++++-----
 web-app/src/routes/hub.tsx                    |  2 +-
 web-app/src/services/providers.ts             | 18 +++++---
 11 files changed, 110 insertions(+), 46 deletions(-)

diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
index d5f450d92..4014c4f5e 100644
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -1 +1 @@
-1.0.13-rc6
\ No newline at end of file
+1.0.13-rc9
\ No newline at end of file
diff --git a/extensions/inference-cortex-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json
index d825affb2..451596842 100644
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
@@ -14,7 +14,7 @@
     "description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
     "controllerType": "checkbox",
     "controllerProps": {
-      "value": true
+      "value": ""
     }
   },
   {
@@ -23,7 +23,7 @@
     "description": "Number of prompts that can be processed simultaneously by the model.",
     "controllerType": "input",
     "controllerProps": {
-      "value": "1",
+      "value": "",
       "placeholder": "1",
       "type": "number",
       "textAlign": "right"
@@ -35,8 +35,8 @@
     "description": "Number of CPU cores used for model processing when running without GPU.",
     "controllerType": "input",
     "controllerProps": {
-      "value": "-1",
-      "placeholder": "Number of CPU threads",
+      "value": "",
+      "placeholder": "-1 (auto-detect)",
       "type": "number",
       "textAlign": "right"
     }
@@ -47,7 +47,7 @@
     "description": "Number of threads for batch and prompt processing (default: same as Threads).",
     "controllerType": "input",
     "controllerProps": {
-      "value": -1,
+      "value": "",
       "placeholder": "-1 (same as Threads)",
       "type": "number"
     }
@@ -58,7 +58,7 @@
     "description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
     "controllerType": "checkbox",
     "controllerProps": {
-      "value": false
+      "value": true
     }
   },
   {
@@ -76,7 +76,7 @@
     "description": "Controls memory usage and precision trade-off.",
     "controllerType": "dropdown",
     "controllerProps": {
-      "value": "f16",
+      "value": "q8_0",
       "options": [
         {
           "value": "q4_0",
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index b217a4f48..a49b1a852 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -8,7 +8,6 @@
 
 import {
   Model,
-  executeOnMain,
   EngineEvent,
   LocalOAIEngine,
   extractModelLoadParams,
@@ -56,11 +55,11 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
   /** Default Engine model load settings */
   n_parallel?: number
-  cont_batching: boolean = true
+  cont_batching: boolean = false
   caching_enabled: boolean = true
   flash_attn: boolean = true
   use_mmap: boolean = true
-  cache_type: string = 'f16'
+  cache_type: string = 'q8'
   cpu_threads?: number
   auto_unload_models: boolean = true
   /**
@@ -118,17 +117,19 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
     if (numParallel.length > 0 && parseInt(numParallel) > 0) {
       this.n_parallel = parseInt(numParallel)
     }
-    this.cont_batching = await this.getSetting<boolean>(
-      Settings.cont_batching,
-      true
-    )
+    if (this.n_parallel && this.n_parallel > 1)
+      this.cont_batching = await this.getSetting<boolean>(
+        Settings.cont_batching,
+        false
+      )
     this.caching_enabled = await this.getSetting<boolean>(
       Settings.caching_enabled,
       true
     )
     this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
     this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
-    this.cache_type = await this.getSetting<string>(Settings.cache_type, 'f16')
+    if (this.caching_enabled)
+      this.cache_type = await this.getSetting<string>(Settings.cache_type, 'q8')
     this.auto_unload_models = await this.getSetting<boolean>(
       Settings.auto_unload_models,
       true
@@ -136,6 +137,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
     const threads_number = Number(
       await this.getSetting<string>(Settings.cpu_threads, '')
     )
+
     if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
 
     const huggingfaceToken = await this.getSetting<string>(
@@ -254,13 +256,19 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
               model.engine === 'nitro' // Legacy model cache
                 ? 'llama-cpp'
                 : model.engine,
-            cont_batching: this.cont_batching,
-            n_parallel: this.n_parallel,
-            caching_enabled: this.caching_enabled,
-            flash_attn: this.flash_attn,
-            cache_type: this.cache_type,
-            use_mmap: this.use_mmap,
-            ...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}),
+            ...(this.n_parallel ? { n_parallel: this.n_parallel } : {}),
+            ...(this.use_mmap ? { use_mmap: true } : {}),
+            ...(this.caching_enabled ? { caching_enabled: true } : {}),
+            ...(this.flash_attn ? { flash_attn: true } : {}),
+            ...(this.caching_enabled && this.cache_type
+              ? { cache_type: this.cache_type }
+              : {}),
+            ...(this.cpu_threads && this.cpu_threads > 0
+              ? { cpu_threads: this.cpu_threads }
+              : {}),
+            ...(this.cont_batching && this.n_parallel && this.n_parallel > 1
+              ? { cont_batching: this.cont_batching }
+              : {}),
           },
           timeout: false,
           signal,
diff --git a/src-tauri/binaries/download.bat b/src-tauri/binaries/download.bat
index a70366440..c69ad7970 100644
--- a/src-tauri/binaries/download.bat
+++ b/src-tauri/binaries/download.bat
@@ -1,6 +1,6 @@
 @echo off
 
-set CORTEX_VERSION=1.0.13-rc6
+set CORTEX_VERSION=1.0.14
 set ENGINE_VERSION=b5509
 set ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
 set ENGINE_DOWNLOAD_GGML_URL=https://github.com/ggml-org/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
diff --git a/src-tauri/binaries/download.sh b/src-tauri/binaries/download.sh
index 0e7723026..8ce0041f0 100755
--- a/src-tauri/binaries/download.sh
+++ b/src-tauri/binaries/download.sh
@@ -14,7 +14,7 @@ download() {
 }
 
 # Read CORTEX_VERSION
-CORTEX_VERSION=1.0.13-rc6
+CORTEX_VERSION=1.0.14
 ENGINE_VERSION=b5509
 CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
 ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
diff --git a/src-tauri/src/core/mcp.rs b/src-tauri/src/core/mcp.rs
index 86b010692..642505486 100644
--- a/src-tauri/src/core/mcp.rs
+++ b/src-tauri/src/core/mcp.rs
@@ -8,7 +8,45 @@ use tokio::{process::Command, sync::Mutex, time::timeout};
 
 use super::{cmd::get_jan_data_folder_path, state::AppState};
 
-const DEFAULT_MCP_CONFIG: &str = r#"{"mcpServers":{"browsermcp":{"command":"npx","args":["@browsermcp/mcp"],"env":{},"active":false},"fetch":{"command":"uvx","args":["mcp-server-fetch"],"env":{},"active":false},"filesystem":{"command":"npx","args":["-y","@modelcontextprotocol/server-filesystem","/path/to/other/allowed/dir"],"env":{},"active":false},"playwright":{"command":"npx","args":["@playwright/mcp","--isolated"],"env":{},"active":false},"sequential-thinking":{"command":"npx","args":["-y","@modelcontextprotocol/server-sequential-thinking"],"env":{},"active":false},"tavily":{"command":"npx","args":["-y","tavily-mcp"],"env":{"TAVILY_API_KEY": "tvly-YOUR_API_KEY-here"},"active":false}}}"#;
+const DEFAULT_MCP_CONFIG: &str = r#"{
+  "mcpServers": {
+    "browsermcp": {
+      "command": "npx",
+      "args": ["@browsermcp/mcp"],
+      "env": {},
+      "active": false
+    },
+    "fetch": {
+      "command": "uvx",
+      "args": ["mcp-server-fetch"],
+      "env": {},
+      "active": false
+    },
+    "serper": {
+      "command": "npx",
+      "args": ["-y", "serper-search-scrape-mcp-server"],
+      "env": { "SERPER_API_KEY": "YOUR_SERPER_API_KEY_HERE" },
+      "active": false
+    },
+    "filesystem": {
+      "command": "npx",
+      "args": [
+        "-y",
+        "@modelcontextprotocol/server-filesystem",
+        "/path/to/other/allowed/dir"
+      ],
+      "env": {},
+      "active": false
+    },
+    "sequential-thinking": {
+      "command": "npx",
+      "args": ["-y", "@modelcontextprotocol/server-sequential-thinking"],
+      "env": {},
+      "active": false
+    }
+  }
+}
+"#;
 
 // Timeout for MCP tool calls (30 seconds)
 const MCP_TOOL_CALL_TIMEOUT: Duration = Duration::from_secs(30);
@@ -116,7 +154,8 @@ async fn start_mcp_server<R: Runtime>(
             cmd.arg("run");
             cmd.env("UV_CACHE_DIR", cache_dir.to_str().unwrap().to_string());
         }
-        #[cfg(windows)] {
+        #[cfg(windows)]
+        {
             cmd.creation_flags(0x08000000); // CREATE_NO_WINDOW: prevents shell window on Windows
         }
         let app_path_str = app_path.to_str().unwrap().to_string();
diff --git a/web-app/src/containers/dialogs/EditModel.tsx b/web-app/src/containers/dialogs/EditModel.tsx
index 0acaf0a7a..30ab81010 100644
--- a/web-app/src/containers/dialogs/EditModel.tsx
+++ b/web-app/src/containers/dialogs/EditModel.tsx
@@ -139,7 +139,9 @@ export const DialogEditModel = ({
       </DialogTrigger>
       <DialogContent>
         <DialogHeader>
-          <DialogTitle>Edit Model: {selectedModel.id}</DialogTitle>
+          <DialogTitle className="line-clamp-1" title={selectedModel.id}>
+            Edit Model: {selectedModel.id}
+          </DialogTitle>
           <DialogDescription>
             Configure model capabilities by toggling the options below.
           </DialogDescription>
diff --git a/web-app/src/containers/dialogs/ToolApproval.tsx b/web-app/src/containers/dialogs/ToolApproval.tsx
index 162733274..85580013d 100644
--- a/web-app/src/containers/dialogs/ToolApproval.tsx
+++ b/web-app/src/containers/dialogs/ToolApproval.tsx
@@ -31,8 +31,15 @@ export default function ToolApproval() {
     onDeny()
   }
 
+  const handleDialogOpen = (open: boolean) => {
+    setModalOpen(open)
+    if (!open) {
+      onDeny()
+    }
+  }
+
   return (
-    <Dialog open={isModalOpen} onOpenChange={setModalOpen}>
+    <Dialog open={isModalOpen} onOpenChange={handleDialogOpen}>
       <DialogContent showCloseButton={false}>
         <DialogHeader>
           <div className="flex items-start gap-3">
diff --git a/web-app/src/lib/predefined.ts b/web-app/src/lib/predefined.ts
index 216b08786..a87493722 100644
--- a/web-app/src/lib/predefined.ts
+++ b/web-app/src/lib/predefined.ts
@@ -18,20 +18,20 @@ export const modelSettings = {
     controller_type: 'input',
     controller_props: {
       value: 100,
-      placeholder: '-1',
+      placeholder: '100',
       type: 'number',
     },
   },
 
   temperature: {
-    key: 'temp',
+    key: 'temperature',
     title: 'Temperature',
     description:
       'Temperature for sampling (higher = more random). This is the default setting on load and can be overridden by the assistant settings.',
     controller_type: 'input',
     controller_props: {
-      value: 0.8,
-      placeholder: '0.8',
+      value: '',
+      placeholder: '0.6',
       type: 'number',
       min: 0,
       step: 0.01,
@@ -44,7 +44,7 @@ export const modelSettings = {
       'Top-K sampling (0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
     controller_type: 'input',
     controller_props: {
-      value: 40,
+      value: '',
       placeholder: '40',
       type: 'number',
     },
@@ -56,7 +56,7 @@ export const modelSettings = {
       'Top-P sampling (1.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
     controller_type: 'input',
     controller_props: {
-      value: 0.9,
+      value: '',
       placeholder: '0.9',
       type: 'number',
     },
@@ -68,7 +68,7 @@ export const modelSettings = {
       'Min-P sampling (0.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
     controller_type: 'input',
     controller_props: {
-      value: 0.1,
+      value: '',
       placeholder: '0.1',
       type: 'number',
     },
@@ -80,7 +80,7 @@ export const modelSettings = {
       'Number of tokens to consider for repeat penalty (0 = disabled, -1 = ctx_size). This is the default setting on load and can be overridden by the assistant settings.',
     controller_type: 'input',
     controller_props: {
-      value: 64,
+      value: '',
       placeholder: '64',
       type: 'number',
     },
@@ -92,7 +92,7 @@ export const modelSettings = {
       'Penalize repeating token sequences (1.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
     controller_type: 'input',
     controller_props: {
-      value: 1.0,
+      value: '',
       placeholder: '1.0',
       type: 'number',
     },
@@ -104,7 +104,7 @@ export const modelSettings = {
       'Repeat alpha presence penalty (0.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
     controller_type: 'input',
     controller_props: {
-      value: 0.0,
+      value: '',
       placeholder: '0.0',
       type: 'number',
     },
@@ -116,7 +116,7 @@ export const modelSettings = {
       'Repeat alpha frequency penalty (0.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
     controller_type: 'input',
     controller_props: {
-      value: 0.0,
+      value: '',
       placeholder: '0.0',
       type: 'number',
     },
diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx
index a142243ff..9ae3d151e 100644
--- a/web-app/src/routes/hub.tsx
+++ b/web-app/src/routes/hub.tsx
@@ -315,7 +315,7 @@ function Hub() {
       title: 'Recommended Model',
       disableBeacon: true,
       content:
-        "Browse and download powerful AI models from various providers, all in one place. We suggest starting with Jan-nano - a model optimized for function calling, tool integration, and research capabilities. It's ideal for building interactive AI agents.",
+        "Browse and download powerful AI models from various providers, all in one place. We suggest starting with Jan-Nano - a model optimized for function calling, tool integration, and research capabilities. It's ideal for building interactive AI agents.",
     },
     {
       target: '.hub-download-button-step',
diff --git a/web-app/src/services/providers.ts b/web-app/src/services/providers.ts
index ed3fccf5a..6bd2b63f0 100644
--- a/web-app/src/services/providers.ts
+++ b/web-app/src/services/providers.ts
@@ -129,12 +129,20 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
         provider: providerName,
         settings: Object.values(modelSettings).reduce(
           (acc, setting) => {
-            let value = model[
-              setting.key as keyof typeof model
-            ] as keyof typeof setting.controller_props.value
+            let value = setting.controller_props.value
             if (setting.key === 'ctx_len') {
-              // @ts-expect-error dynamic type
-              value = 4096 // Default context length for Llama.cpp models
+              value = 8192 // Default context length for Llama.cpp models
+            }
+            // Set temperature to 0.6 for DefaultToolUseSupportedModels
+            if (
+              Object.values(DefaultToolUseSupportedModels).some((v) =>
+                model.id.toLowerCase().includes(v.toLowerCase())
+              )
+            ) {
+              if (setting.key === 'temperature') value = 0.7 // Default temperature for tool-supported models
+              if (setting.key === 'top_k') value = 20 // Default top_k for tool-supported models
+              if (setting.key === 'top_p') value = 0.8 // Default top_p for tool-supported models
+              if (setting.key === 'min_p') value = 0 // Default min_p for tool-supported models
             }
             acc[setting.key] = {
               ...setting,