fix: default settings should leave empty (#5257)
* fix: default settings should leave empty * fix: default settings * fix: remove some more default settings * fix: threads and cont * fix: data * fix: default setting * fix: settings * chore: bump cortex version * chore: bump to cortex 1.0.14 * chore: clean up * typoe * chore: fix dialog hang * fix: default parameter * chore: truncate edit model title * chore: update default provider settings * chore: fix typo --------- Co-authored-by: Faisal Amir <urmauur@gmail.com>
This commit is contained in:
parent
e22452b26e
commit
27c4918395
@ -1 +1 @@
|
||||
1.0.13-rc6
|
||||
1.0.13-rc9
|
||||
@ -14,7 +14,7 @@
|
||||
"description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": true
|
||||
"value": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -23,7 +23,7 @@
|
||||
"description": "Number of prompts that can be processed simultaneously by the model.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": "1",
|
||||
"value": "",
|
||||
"placeholder": "1",
|
||||
"type": "number",
|
||||
"textAlign": "right"
|
||||
@ -35,8 +35,8 @@
|
||||
"description": "Number of CPU cores used for model processing when running without GPU.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": "-1",
|
||||
"placeholder": "Number of CPU threads",
|
||||
"value": "",
|
||||
"placeholder": "-1 (auto-detect)",
|
||||
"type": "number",
|
||||
"textAlign": "right"
|
||||
}
|
||||
@ -47,7 +47,7 @@
|
||||
"description": "Number of threads for batch and prompt processing (default: same as Threads).",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": -1,
|
||||
"value": "",
|
||||
"placeholder": "-1 (same as Threads)",
|
||||
"type": "number"
|
||||
}
|
||||
@ -58,7 +58,7 @@
|
||||
"description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": false
|
||||
"value": true
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -76,7 +76,7 @@
|
||||
"description": "Controls memory usage and precision trade-off.",
|
||||
"controllerType": "dropdown",
|
||||
"controllerProps": {
|
||||
"value": "f16",
|
||||
"value": "q8_0",
|
||||
"options": [
|
||||
{
|
||||
"value": "q4_0",
|
||||
|
||||
@ -8,7 +8,6 @@
|
||||
|
||||
import {
|
||||
Model,
|
||||
executeOnMain,
|
||||
EngineEvent,
|
||||
LocalOAIEngine,
|
||||
extractModelLoadParams,
|
||||
@ -56,11 +55,11 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
|
||||
/** Default Engine model load settings */
|
||||
n_parallel?: number
|
||||
cont_batching: boolean = true
|
||||
cont_batching: boolean = false
|
||||
caching_enabled: boolean = true
|
||||
flash_attn: boolean = true
|
||||
use_mmap: boolean = true
|
||||
cache_type: string = 'f16'
|
||||
cache_type: string = 'q8'
|
||||
cpu_threads?: number
|
||||
auto_unload_models: boolean = true
|
||||
/**
|
||||
@ -118,17 +117,19 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
if (numParallel.length > 0 && parseInt(numParallel) > 0) {
|
||||
this.n_parallel = parseInt(numParallel)
|
||||
}
|
||||
this.cont_batching = await this.getSetting<boolean>(
|
||||
Settings.cont_batching,
|
||||
true
|
||||
)
|
||||
if (this.n_parallel && this.n_parallel > 1)
|
||||
this.cont_batching = await this.getSetting<boolean>(
|
||||
Settings.cont_batching,
|
||||
false
|
||||
)
|
||||
this.caching_enabled = await this.getSetting<boolean>(
|
||||
Settings.caching_enabled,
|
||||
true
|
||||
)
|
||||
this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
|
||||
this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
|
||||
this.cache_type = await this.getSetting<string>(Settings.cache_type, 'f16')
|
||||
if (this.caching_enabled)
|
||||
this.cache_type = await this.getSetting<string>(Settings.cache_type, 'q8')
|
||||
this.auto_unload_models = await this.getSetting<boolean>(
|
||||
Settings.auto_unload_models,
|
||||
true
|
||||
@ -136,6 +137,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
const threads_number = Number(
|
||||
await this.getSetting<string>(Settings.cpu_threads, '')
|
||||
)
|
||||
|
||||
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
|
||||
|
||||
const huggingfaceToken = await this.getSetting<string>(
|
||||
@ -254,13 +256,19 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
model.engine === 'nitro' // Legacy model cache
|
||||
? 'llama-cpp'
|
||||
: model.engine,
|
||||
cont_batching: this.cont_batching,
|
||||
n_parallel: this.n_parallel,
|
||||
caching_enabled: this.caching_enabled,
|
||||
flash_attn: this.flash_attn,
|
||||
cache_type: this.cache_type,
|
||||
use_mmap: this.use_mmap,
|
||||
...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}),
|
||||
...(this.n_parallel ? { n_parallel: this.n_parallel } : {}),
|
||||
...(this.use_mmap ? { use_mmap: true } : {}),
|
||||
...(this.caching_enabled ? { caching_enabled: true } : {}),
|
||||
...(this.flash_attn ? { flash_attn: true } : {}),
|
||||
...(this.caching_enabled && this.cache_type
|
||||
? { cache_type: this.cache_type }
|
||||
: {}),
|
||||
...(this.cpu_threads && this.cpu_threads > 0
|
||||
? { cpu_threads: this.cpu_threads }
|
||||
: {}),
|
||||
...(this.cont_batching && this.n_parallel && this.n_parallel > 1
|
||||
? { cont_batching: this.cont_batching }
|
||||
: {}),
|
||||
},
|
||||
timeout: false,
|
||||
signal,
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
@echo off
|
||||
|
||||
set CORTEX_VERSION=1.0.13-rc6
|
||||
set CORTEX_VERSION=1.0.14
|
||||
set ENGINE_VERSION=b5509
|
||||
set ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
||||
set ENGINE_DOWNLOAD_GGML_URL=https://github.com/ggml-org/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
||||
|
||||
@ -14,7 +14,7 @@ download() {
|
||||
}
|
||||
|
||||
# Read CORTEX_VERSION
|
||||
CORTEX_VERSION=1.0.13-rc6
|
||||
CORTEX_VERSION=1.0.14
|
||||
ENGINE_VERSION=b5509
|
||||
CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
|
||||
ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
|
||||
|
||||
@ -8,7 +8,45 @@ use tokio::{process::Command, sync::Mutex, time::timeout};
|
||||
|
||||
use super::{cmd::get_jan_data_folder_path, state::AppState};
|
||||
|
||||
const DEFAULT_MCP_CONFIG: &str = r#"{"mcpServers":{"browsermcp":{"command":"npx","args":["@browsermcp/mcp"],"env":{},"active":false},"fetch":{"command":"uvx","args":["mcp-server-fetch"],"env":{},"active":false},"filesystem":{"command":"npx","args":["-y","@modelcontextprotocol/server-filesystem","/path/to/other/allowed/dir"],"env":{},"active":false},"playwright":{"command":"npx","args":["@playwright/mcp","--isolated"],"env":{},"active":false},"sequential-thinking":{"command":"npx","args":["-y","@modelcontextprotocol/server-sequential-thinking"],"env":{},"active":false},"tavily":{"command":"npx","args":["-y","tavily-mcp"],"env":{"TAVILY_API_KEY": "tvly-YOUR_API_KEY-here"},"active":false}}}"#;
|
||||
const DEFAULT_MCP_CONFIG: &str = r#"{
|
||||
"mcpServers": {
|
||||
"browsermcp": {
|
||||
"command": "npx",
|
||||
"args": ["@browsermcp/mcp"],
|
||||
"env": {},
|
||||
"active": false
|
||||
},
|
||||
"fetch": {
|
||||
"command": "uvx",
|
||||
"args": ["mcp-server-fetch"],
|
||||
"env": {},
|
||||
"active": false
|
||||
},
|
||||
"serper": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "serper-search-scrape-mcp-server"],
|
||||
"env": { "SERPER_API_KEY": "YOUR_SERPER_API_KEY_HERE" },
|
||||
"active": false
|
||||
},
|
||||
"filesystem": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"-y",
|
||||
"@modelcontextprotocol/server-filesystem",
|
||||
"/path/to/other/allowed/dir"
|
||||
],
|
||||
"env": {},
|
||||
"active": false
|
||||
},
|
||||
"sequential-thinking": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-sequential-thinking"],
|
||||
"env": {},
|
||||
"active": false
|
||||
}
|
||||
}
|
||||
}
|
||||
"#;
|
||||
|
||||
// Timeout for MCP tool calls (30 seconds)
|
||||
const MCP_TOOL_CALL_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
@ -116,7 +154,8 @@ async fn start_mcp_server<R: Runtime>(
|
||||
cmd.arg("run");
|
||||
cmd.env("UV_CACHE_DIR", cache_dir.to_str().unwrap().to_string());
|
||||
}
|
||||
#[cfg(windows)] {
|
||||
#[cfg(windows)]
|
||||
{
|
||||
cmd.creation_flags(0x08000000); // CREATE_NO_WINDOW: prevents shell window on Windows
|
||||
}
|
||||
let app_path_str = app_path.to_str().unwrap().to_string();
|
||||
|
||||
@ -139,7 +139,9 @@ export const DialogEditModel = ({
|
||||
</DialogTrigger>
|
||||
<DialogContent>
|
||||
<DialogHeader>
|
||||
<DialogTitle>Edit Model: {selectedModel.id}</DialogTitle>
|
||||
<DialogTitle className="line-clamp-1" title={selectedModel.id}>
|
||||
Edit Model: {selectedModel.id}
|
||||
</DialogTitle>
|
||||
<DialogDescription>
|
||||
Configure model capabilities by toggling the options below.
|
||||
</DialogDescription>
|
||||
|
||||
@ -31,8 +31,15 @@ export default function ToolApproval() {
|
||||
onDeny()
|
||||
}
|
||||
|
||||
const handleDialogOpen = (open: boolean) => {
|
||||
setModalOpen(open)
|
||||
if (!open) {
|
||||
onDeny()
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<Dialog open={isModalOpen} onOpenChange={setModalOpen}>
|
||||
<Dialog open={isModalOpen} onOpenChange={handleDialogOpen}>
|
||||
<DialogContent showCloseButton={false}>
|
||||
<DialogHeader>
|
||||
<div className="flex items-start gap-3">
|
||||
|
||||
@ -18,20 +18,20 @@ export const modelSettings = {
|
||||
controller_type: 'input',
|
||||
controller_props: {
|
||||
value: 100,
|
||||
placeholder: '-1',
|
||||
placeholder: '100',
|
||||
type: 'number',
|
||||
},
|
||||
},
|
||||
|
||||
temperature: {
|
||||
key: 'temp',
|
||||
key: 'temperature',
|
||||
title: 'Temperature',
|
||||
description:
|
||||
'Temperature for sampling (higher = more random). This is the default setting on load and can be overridden by the assistant settings.',
|
||||
controller_type: 'input',
|
||||
controller_props: {
|
||||
value: 0.8,
|
||||
placeholder: '0.8',
|
||||
value: '',
|
||||
placeholder: '0.6',
|
||||
type: 'number',
|
||||
min: 0,
|
||||
step: 0.01,
|
||||
@ -44,7 +44,7 @@ export const modelSettings = {
|
||||
'Top-K sampling (0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
|
||||
controller_type: 'input',
|
||||
controller_props: {
|
||||
value: 40,
|
||||
value: '',
|
||||
placeholder: '40',
|
||||
type: 'number',
|
||||
},
|
||||
@ -56,7 +56,7 @@ export const modelSettings = {
|
||||
'Top-P sampling (1.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
|
||||
controller_type: 'input',
|
||||
controller_props: {
|
||||
value: 0.9,
|
||||
value: '',
|
||||
placeholder: '0.9',
|
||||
type: 'number',
|
||||
},
|
||||
@ -68,7 +68,7 @@ export const modelSettings = {
|
||||
'Min-P sampling (0.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
|
||||
controller_type: 'input',
|
||||
controller_props: {
|
||||
value: 0.1,
|
||||
value: '',
|
||||
placeholder: '0.1',
|
||||
type: 'number',
|
||||
},
|
||||
@ -80,7 +80,7 @@ export const modelSettings = {
|
||||
'Number of tokens to consider for repeat penalty (0 = disabled, -1 = ctx_size). This is the default setting on load and can be overridden by the assistant settings.',
|
||||
controller_type: 'input',
|
||||
controller_props: {
|
||||
value: 64,
|
||||
value: '',
|
||||
placeholder: '64',
|
||||
type: 'number',
|
||||
},
|
||||
@ -92,7 +92,7 @@ export const modelSettings = {
|
||||
'Penalize repeating token sequences (1.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
|
||||
controller_type: 'input',
|
||||
controller_props: {
|
||||
value: 1.0,
|
||||
value: '',
|
||||
placeholder: '1.0',
|
||||
type: 'number',
|
||||
},
|
||||
@ -104,7 +104,7 @@ export const modelSettings = {
|
||||
'Repeat alpha presence penalty (0.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
|
||||
controller_type: 'input',
|
||||
controller_props: {
|
||||
value: 0.0,
|
||||
value: '',
|
||||
placeholder: '0.0',
|
||||
type: 'number',
|
||||
},
|
||||
@ -116,7 +116,7 @@ export const modelSettings = {
|
||||
'Repeat alpha frequency penalty (0.0 = disabled). This is the default setting on load and can be overridden by the assistant settings.',
|
||||
controller_type: 'input',
|
||||
controller_props: {
|
||||
value: 0.0,
|
||||
value: '',
|
||||
placeholder: '0.0',
|
||||
type: 'number',
|
||||
},
|
||||
|
||||
@ -315,7 +315,7 @@ function Hub() {
|
||||
title: 'Recommended Model',
|
||||
disableBeacon: true,
|
||||
content:
|
||||
"Browse and download powerful AI models from various providers, all in one place. We suggest starting with Jan-nano - a model optimized for function calling, tool integration, and research capabilities. It's ideal for building interactive AI agents.",
|
||||
"Browse and download powerful AI models from various providers, all in one place. We suggest starting with Jan-Nano - a model optimized for function calling, tool integration, and research capabilities. It's ideal for building interactive AI agents.",
|
||||
},
|
||||
{
|
||||
target: '.hub-download-button-step',
|
||||
|
||||
@ -129,12 +129,20 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
|
||||
provider: providerName,
|
||||
settings: Object.values(modelSettings).reduce(
|
||||
(acc, setting) => {
|
||||
let value = model[
|
||||
setting.key as keyof typeof model
|
||||
] as keyof typeof setting.controller_props.value
|
||||
let value = setting.controller_props.value
|
||||
if (setting.key === 'ctx_len') {
|
||||
// @ts-expect-error dynamic type
|
||||
value = 4096 // Default context length for Llama.cpp models
|
||||
value = 8192 // Default context length for Llama.cpp models
|
||||
}
|
||||
// Set temperature to 0.6 for DefaultToolUseSupportedModels
|
||||
if (
|
||||
Object.values(DefaultToolUseSupportedModels).some((v) =>
|
||||
model.id.toLowerCase().includes(v.toLowerCase())
|
||||
)
|
||||
) {
|
||||
if (setting.key === 'temperature') value = 0.7 // Default temperature for tool-supported models
|
||||
if (setting.key === 'top_k') value = 20 // Default top_k for tool-supported models
|
||||
if (setting.key === 'top_p') value = 0.8 // Default top_p for tool-supported models
|
||||
if (setting.key === 'min_p') value = 0 // Default min_p for tool-supported models
|
||||
}
|
||||
acc[setting.key] = {
|
||||
...setting,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user