fix: default settings should leave empty (#5257)

* fix: default settings should leave empty * fix: default settings * fix: remove some more default settings * fix: threads and cont * fix: data * fix: default setting * fix: settings * chore: bump cortex version * chore: bump to cortex 1.0.14 * chore: clean up * typoe * chore: fix dialog hang * fix: default parameter * chore: truncate edit model title * chore: update default provider settings * chore: fix typo --------- Co-authored-by: Faisal Amir <urmauur@gmail.com>
2025-06-13 00:01:25 +07:00 · 2025-06-13 00:01:25 +07:00 · 27c4918395
commit 27c4918395
parent e22452b26e
11 changed files with 110 additions and 46 deletions
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@ -1 +1 @@
-1.0.13-rc6
+1.0.13-rc9
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
@ -14,7 +14,7 @@
    "description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
    "controllerType": "checkbox",
    "controllerProps": {
-      "value": true
+      "value": ""
    }
  },
  {
@ -23,7 +23,7 @@
    "description": "Number of prompts that can be processed simultaneously by the model.",
    "controllerType": "input",
    "controllerProps": {
-      "value": "1",
+      "value": "",
      "placeholder": "1",
      "type": "number",
      "textAlign": "right"
@ -35,8 +35,8 @@
    "description": "Number of CPU cores used for model processing when running without GPU.",
    "controllerType": "input",
    "controllerProps": {
-      "value": "-1",
-      "placeholder": "Number of CPU threads",
+      "value": "",
+      "placeholder": "-1 (auto-detect)",
      "type": "number",
      "textAlign": "right"
    }
@ -47,7 +47,7 @@
    "description": "Number of threads for batch and prompt processing (default: same as Threads).",
    "controllerType": "input",
    "controllerProps": {
-      "value": -1,
+      "value": "",
      "placeholder": "-1 (same as Threads)",
      "type": "number"
    }
@ -58,7 +58,7 @@
    "description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
    "controllerType": "checkbox",
    "controllerProps": {
-      "value": false
+      "value": true
    }
  },
  {
@ -76,7 +76,7 @@
    "description": "Controls memory usage and precision trade-off.",
    "controllerType": "dropdown",
    "controllerProps": {
-      "value": "f16",
+      "value": "q8_0",
      "options": [
        {
          "value": "q4_0",
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@ -8,7 +8,6 @@

 import {
  Model,
-  executeOnMain,
  EngineEvent,
  LocalOAIEngine,
  extractModelLoadParams,
@ -56,11 +55,11 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {

  /** Default Engine model load settings */
  n_parallel?: number
-  cont_batching: boolean = true
+  cont_batching: boolean = false
  caching_enabled: boolean = true
  flash_attn: boolean = true
  use_mmap: boolean = true
-  cache_type: string = 'f16'
+  cache_type: string = 'q8'
  cpu_threads?: number
  auto_unload_models: boolean = true
  /**
@ -118,17 +117,19 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    if (numParallel.length > 0 && parseInt(numParallel) > 0) {
      this.n_parallel = parseInt(numParallel)
    }
-    this.cont_batching = await this.getSetting<boolean>(
-      Settings.cont_batching,
-      true
-    )
+    if (this.n_parallel && this.n_parallel > 1)
+      this.cont_batching = await this.getSetting<boolean>(
+        Settings.cont_batching,
+        false
+      )
    this.caching_enabled = await this.getSetting<boolean>(
      Settings.caching_enabled,
      true
    )
    this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
    this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
-    this.cache_type = await this.getSetting<string>(Settings.cache_type, 'f16')
+    if (this.caching_enabled)
+      this.cache_type = await this.getSetting<string>(Settings.cache_type, 'q8')
    this.auto_unload_models = await this.getSetting<boolean>(
      Settings.auto_unload_models,
      true
@ -136,6 +137,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    const threads_number = Number(
      await this.getSetting<string>(Settings.cpu_threads, '')
    )
+
    if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number

    const huggingfaceToken = await this.getSetting<string>(
@ -254,13 +256,19 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
              model.engine === 'nitro' // Legacy model cache
                ? 'llama-cpp'
                : model.engine,
-            cont_batching: this.cont_batching,
-            n_parallel: this.n_parallel,
-            caching_enabled: this.caching_enabled,
-            flash_attn: this.flash_attn,
-            cache_type: this.cache_type,
-            use_mmap: this.use_mmap,
-            ...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}),
+            ...(this.n_parallel ? { n_parallel: this.n_parallel } : {}),
+            ...(this.use_mmap ? { use_mmap: true } : {}),
+            ...(this.caching_enabled ? { caching_enabled: true } : {}),
+            ...(this.flash_attn ? { flash_attn: true } : {}),
+            ...(this.caching_enabled && this.cache_type
+              ? { cache_type: this.cache_type }
+              : {}),
+            ...(this.cpu_threads && this.cpu_threads > 0
+              ? { cpu_threads: this.cpu_threads }
+              : {}),
+            ...(this.cont_batching && this.n_parallel && this.n_parallel > 1
+              ? { cont_batching: this.cont_batching }
+              : {}),
          },
          timeout: false,
          signal,
--- a/src-tauri/binaries/download.bat
+++ b/src-tauri/binaries/download.bat
@ -1,6 +1,6 @@
@echo off

-set CORTEX_VERSION=1.0.13-rc6
+set CORTEX_VERSION=1.0.14
 set ENGINE_VERSION=b5509
 set ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
 set ENGINE_DOWNLOAD_GGML_URL=https://github.com/ggml-org/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
--- a/src-tauri/binaries/download.sh
+++ b/src-tauri/binaries/download.sh
@ -14,7 +14,7 @@ download() {
 }

 # Read CORTEX_VERSION
-CORTEX_VERSION=1.0.13-rc6
+CORTEX_VERSION=1.0.14
 ENGINE_VERSION=b5509
 CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
 ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
--- a/src-tauri/src/core/mcp.rs
+++ b/src-tauri/src/core/mcp.rs
@ -8,7 +8,45 @@ use tokio::{process::Command, sync::Mutex, time::timeout};

 use super::{cmd::get_jan_data_folder_path, state::AppState};

-const DEFAULT_MCP_CONFIG: &str = r#"{"mcpServers":{"browsermcp":{"command":"npx","args":["@browsermcp/mcp"],"env":{},"active":false},"fetch":{"command":"uvx","args":["mcp-server-fetch"],"env":{},"active":false},"filesystem":{"command":"npx","args":["-y","@modelcontextprotocol/server-filesystem","/path/to/other/allowed/dir"],"env":{},"active":false},"playwright":{"command":"npx","args":["@playwright/mcp","--isolated"],"env":{},"active":false},"sequential-thinking":{"command":"npx","args":["-y","@modelcontextprotocol/server-sequential-thinking"],"env":{},"active":false},"tavily":{"command":"npx","args":["-y","tavily-mcp"],"env":{"TAVILY_API_KEY": "tvly-YOUR_API_KEY-here"},"active":false}}}"#;
+const DEFAULT_MCP_CONFIG: &str = r#"{
+  "mcpServers": {
+    "browsermcp": {
+      "command": "npx",
+      "args": ["@browsermcp/mcp"],
+      "env": {},
+      "active": false
+    },
+    "fetch": {
+      "command": "uvx",
+      "args": ["mcp-server-fetch"],
+      "env": {},
+      "active": false
+    },
+    "serper": {
+      "command": "npx",
+      "args": ["-y", "serper-search-scrape-mcp-server"],
+      "env": { "SERPER_API_KEY": "YOUR_SERPER_API_KEY_HERE" },
+      "active": false
+    },
+    "filesystem": {
+      "command": "npx",
+      "args": [
+        "-y",
+        "@modelcontextprotocol/server-filesystem",
+        "/path/to/other/allowed/dir"
+      ],
+      "env": {},
+      "active": false
+    },
+    "sequential-thinking": {
+      "command": "npx",
+      "args": ["-y", "@modelcontextprotocol/server-sequential-thinking"],
+      "env": {},
+      "active": false
+    }
+  }
+}
+"#;

 // Timeout for MCP tool calls (30 seconds)
 const MCP_TOOL_CALL_TIMEOUT: Duration = Duration::from_secs(30);
@ -116,7 +154,8 @@ async fn start_mcp_server<R: Runtime>(
            cmd.arg("run");
            cmd.env("UV_CACHE_DIR", cache_dir.to_str().unwrap().to_string());
        }
-        #[cfg(windows)] {
+        #[cfg(windows)]
+        {
            cmd.creation_flags(0x08000000); // CREATE_NO_WINDOW: prevents shell window on Windows
        }
        let app_path_str = app_path.to_str().unwrap().to_string();
--- a/web-app/src/containers/dialogs/EditModel.tsx
+++ b/web-app/src/containers/dialogs/EditModel.tsx
@ -139,7 +139,9 @@ export const DialogEditModel = ({
      </DialogTrigger>
      <DialogContent>
        <DialogHeader>
-          <DialogTitle>Edit Model: {selectedModel.id}</DialogTitle>
+          <DialogTitle className="line-clamp-1" title={selectedModel.id}>
+            Edit Model: {selectedModel.id}
+          </DialogTitle>
          <DialogDescription>
            Configure model capabilities by toggling the options below.
          </DialogDescription>
--- a/web-app/src/containers/dialogs/ToolApproval.tsx
+++ b/web-app/src/containers/dialogs/ToolApproval.tsx
@ -31,8 +31,15 @@ export default function ToolApproval() {
    onDeny()
  }

+  const handleDialogOpen = (open: boolean) => {
+    setModalOpen(open)
+    if (!open) {
+      onDeny()
+    }
+  }
+
  return (
-    <Dialog open={isModalOpen} onOpenChange={setModalOpen}>
+    <Dialog open={isModalOpen} onOpenChange={handleDialogOpen}>
      <DialogContent showCloseButton={false}>
        <DialogHeader>
          <div className="flex items-start gap-3">
--- a/web-app/src/lib/predefined.ts
+++ b/web-app/src/lib/predefined.ts
@ -18,20 +18,20 @@ export const modelSettings = {
    controller_type: 'input',
    controller_props: {
      value: 100,
-      placeholder: '-1',
+      placeholder: '100',
      type: 'number',
    },
  },

  temperature: {
-    key: 'temp',
+    key: 'temperature',
    title: 'Temperature',
    description:
      'Temperature for sampling (higher = more random). This is the default setting on load and can be overridden by the assistant settings.',
    controller_type: 'input',
    controller_props: {
-      value: 0.8,
-      placeholder: '0.8',
+      value: '',
+      placeholder: '0.6',
      type: 'number',
      min: 0,
      step: 0.01,
@ -44,7 +44,7 @@ export const modelSettings = {
      'Top-K sampling (0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
    controller_type: 'input',
    controller_props: {
-      value: 40,
+      value: '',
      placeholder: '40',
      type: 'number',
    },
@ -56,7 +56,7 @@ export const modelSettings = {
      'Top-P sampling (1.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
    controller_type: 'input',
    controller_props: {
-      value: 0.9,
+      value: '',
      placeholder: '0.9',
      type: 'number',
    },
@ -68,7 +68,7 @@ export const modelSettings = {
      'Min-P sampling (0.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
    controller_type: 'input',
    controller_props: {
-      value: 0.1,
+      value: '',
      placeholder: '0.1',
      type: 'number',
    },
@ -80,7 +80,7 @@ export const modelSettings = {
      'Number of tokens to consider for repeat penalty (0 = disabled, -1 = ctx_size). This is the default setting on load and can be overridden by the assistant settings.',
    controller_type: 'input',
    controller_props: {
-      value: 64,
+      value: '',
      placeholder: '64',
      type: 'number',
    },
@ -92,7 +92,7 @@ export const modelSettings = {
      'Penalize repeating token sequences (1.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
    controller_type: 'input',
    controller_props: {
-      value: 1.0,
+      value: '',
      placeholder: '1.0',
      type: 'number',
    },
@ -104,7 +104,7 @@ export const modelSettings = {
      'Repeat alpha presence penalty (0.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
    controller_type: 'input',
    controller_props: {
-      value: 0.0,
+      value: '',
      placeholder: '0.0',
      type: 'number',
    },
@ -116,7 +116,7 @@ export const modelSettings = {
      'Repeat alpha frequency penalty (0.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
    controller_type: 'input',
    controller_props: {
-      value: 0.0,
+      value: '',
      placeholder: '0.0',
      type: 'number',
    },
--- a/web-app/src/routes/hub.tsx
+++ b/web-app/src/routes/hub.tsx
@ -315,7 +315,7 @@ function Hub() {
      title: 'Recommended Model',
      disableBeacon: true,
      content:
-        "Browse and download powerful AI models from various providers, all in one place. We suggest starting with Jan-nano - a model optimized for function calling, tool integration, and research capabilities. It's ideal for building interactive AI agents.",
+        "Browse and download powerful AI models from various providers, all in one place. We suggest starting with Jan-Nano - a model optimized for function calling, tool integration, and research capabilities. It's ideal for building interactive AI agents.",
    },
    {
      target: '.hub-download-button-step',
--- a/web-app/src/services/providers.ts
+++ b/web-app/src/services/providers.ts
@ -129,12 +129,20 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
        provider: providerName,
        settings: Object.values(modelSettings).reduce(
          (acc, setting) => {
-            let value = model[
-              setting.key as keyof typeof model
-            ] as keyof typeof setting.controller_props.value
+            let value = setting.controller_props.value
            if (setting.key === 'ctx_len') {
-              // @ts-expect-error dynamic type
-              value = 4096 // Default context length for Llama.cpp models
+              value = 8192 // Default context length for Llama.cpp models
+            }
+            // Set temperature to 0.6 for DefaultToolUseSupportedModels
+            if (
+              Object.values(DefaultToolUseSupportedModels).some((v) =>
+                model.id.toLowerCase().includes(v.toLowerCase())
+              )
+            ) {
+              if (setting.key === 'temperature') value = 0.7 // Default temperature for tool-supported models
+              if (setting.key === 'top_k') value = 20 // Default top_k for tool-supported models
+              if (setting.key === 'top_p') value = 0.8 // Default top_p for tool-supported models
+              if (setting.key === 'min_p') value = 0 // Default min_p for tool-supported models
            }
            acc[setting.key] = {
              ...setting,