diff --git a/web-app/src/containers/DropdownModelProvider.tsx b/web-app/src/containers/DropdownModelProvider.tsx
index 75a7d71d7..d34228daf 100644
--- a/web-app/src/containers/DropdownModelProvider.tsx
+++ b/web-app/src/containers/DropdownModelProvider.tsx
@@ -25,6 +25,7 @@ const DropdownModelProvider = ({ model }: DropdownModelProviderProps) => {
providers,
getProviderByName,
selectModelProvider,
+ getModelBy,
selectedProvider,
selectedModel,
} = useModelProvider()
@@ -52,8 +53,11 @@ const DropdownModelProvider = ({ model }: DropdownModelProviderProps) => {
}
}, [selectedProvider, selectedModel])
- if (!providers.length) return null
+ const currentModel = selectedModel?.id
+ ? getModelBy(selectedModel?.id)
+ : undefined
+ if (!providers.length) return null
const provider = getProviderByName(selectedProvider)
@@ -81,9 +85,9 @@ const DropdownModelProvider = ({ model }: DropdownModelProviderProps) => {
- {selectedModel && (
+ {currentModel && (
)}
diff --git a/web-app/src/containers/ModelSetting.tsx b/web-app/src/containers/ModelSetting.tsx
index 7ccbc794b..e7a9d0d08 100644
--- a/web-app/src/containers/ModelSetting.tsx
+++ b/web-app/src/containers/ModelSetting.tsx
@@ -57,11 +57,19 @@ export function ModelSetting({ model, provider }: ModelSettingProps) {
models: updatedModels,
})
+ const params = Object.entries(updatedModel.settings).reduce(
+ (acc, [key, value]) => {
+ const rawVal = value.controller_props?.value
+ const num = parseFloat(rawVal as string)
+ acc[key] = !isNaN(num) ? num : rawVal
+ return acc
+ },
+ {} as Record
+ ) as ModelSettingParams
updateModel({
id: model.id,
- settings: Object.entries(updatedModel.settings).map(([key, value]) => ({
- [key]: value.controller_props?.value,
- })) as ModelSettingParams,
+ settings: params,
+ ...(params as unknown as object),
})
}
}
@@ -75,7 +83,7 @@ export function ModelSetting({ model, provider }: ModelSettingProps) {
- Model Setting {model.id}
+ Model Settings - {model.id}
Configure model settings to optimize performance and behavior.
diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts
index 2bff798a5..85e7a88f2 100644
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@@ -180,14 +180,14 @@ export const useChat = () => {
resetTokenSpeed,
provider,
getMessages,
+ setAbortController,
updateStreamingContent,
addMessage,
setPrompt,
selectedModel,
- currentAssistant?.instructions,
- setAbortController,
- updateLoadingModel,
+ currentAssistant,
tools,
+ updateLoadingModel,
updateTokenSpeed,
]
)
diff --git a/web-app/src/hooks/useModelProvider.ts b/web-app/src/hooks/useModelProvider.ts
index 8fa4b976b..49bbbee39 100644
--- a/web-app/src/hooks/useModelProvider.ts
+++ b/web-app/src/hooks/useModelProvider.ts
@@ -6,6 +6,7 @@ type ModelProviderState = {
providers: ModelProvider[]
selectedProvider: string
selectedModel: Model | null
+ getModelBy: (modelId: string) => Model | undefined
setProviders: (providers: ModelProvider[]) => void
getProviderByName: (providerName: string) => ModelProvider | undefined
updateProvider: (providerName: string, data: Partial) => void
@@ -24,6 +25,13 @@ export const useModelProvider = create()(
providers: [],
selectedProvider: 'llama.cpp',
selectedModel: null,
+ getModelBy: (modelId: string) => {
+ const provider = get().providers.find(
+ (provider) => provider.provider === get().selectedProvider
+ )
+ if (!provider) return undefined
+ return provider.models.find((model) => model.id === modelId)
+ },
setProviders: (providers) =>
set((state) => {
const existingProviders = state.providers
diff --git a/web-app/src/lib/predefined.ts b/web-app/src/lib/predefined.ts
index b3901f627..4fdc24cb9 100644
--- a/web-app/src/lib/predefined.ts
+++ b/web-app/src/lib/predefined.ts
@@ -1,6 +1,6 @@
export const modelSettings = {
- ctx_size: {
- key: 'ctx_size',
+ ctx_len: {
+ key: 'ctx_len',
title: 'Context Size',
description: 'Size of the prompt context (0 = loaded from model).',
controller_type: 'input',
@@ -10,290 +10,108 @@ export const modelSettings = {
type: 'number',
},
},
- n_predict: {
- key: 'n_predict',
- title: 'Max Tokens to Predict',
- description: 'Maximum number of tokens to generate (-1 = infinity).',
- controller_type: 'input',
- controller_props: {
- value: -1,
- placeholder: '-1',
- type: 'number',
- },
- },
- batch_size: {
- key: 'batch_size',
- title: 'Batch Size',
- description: 'Logical maximum batch size for processing prompts.',
- controller_type: 'input',
- controller_props: {
- value: 2048,
- placeholder: '2048',
- type: 'number',
- },
- },
- ubatch_size: {
- key: 'ubatch_size',
- title: 'uBatch Size',
- description: 'Physical maximum batch size for processing prompts.',
- controller_type: 'input',
- controller_props: {
- value: 512,
- placeholder: '512',
- type: 'number',
- },
- },
- n_gpu_layers: {
- key: 'n_gpu_layers',
+ ngl: {
+ key: 'ngl',
title: 'GPU Layers',
description:
'Number of model layers to offload to the GPU (-1 for all layers, 0 for CPU only).',
controller_type: 'input',
controller_props: {
- value: -1,
+ value: 100,
placeholder: '-1',
type: 'number',
},
},
- device: {
- key: 'device',
- title: 'Devices for Offload',
- description:
- "Comma-separated list of devices to use for offloading (e.g., 'cuda:0', 'cuda:0,cuda:1'). Leave empty to use default/CPU only.",
+
+ temperature: {
+ key: 'temp',
+ title: 'Temperature',
+ description: 'Temperature for sampling (higher = more random).',
controller_type: 'input',
controller_props: {
- value: '',
- placeholder: 'cuda:0',
- type: 'text',
+ value: 0.8,
+ placeholder: '0.8',
+ type: 'number',
+ min: 0,
+ step: 0.01,
},
},
- split_mode: {
- key: 'split_mode',
- title: 'GPU Split Mode',
- description: 'How to split the model across multiple GPUs.',
- controller_type: 'dropdown',
- controller_props: {
- value: 'layer',
- options: [
- { value: 'none', name: 'None' },
- { value: 'layer', name: 'Layer' },
- { value: 'row', name: 'Row' },
- ],
- },
- },
- main_gpu: {
- key: 'main_gpu',
- title: 'Main GPU Index',
- description:
- 'The GPU to use for the model (split-mode=none) or intermediate results (split-mode=row).',
+ top_k: {
+ key: 'top_k',
+ title: 'Top K',
+ description: 'Top-K sampling (0 = disabled).',
controller_type: 'input',
controller_props: {
- value: 0,
- placeholder: '0',
+ value: 40,
+ placeholder: '40',
type: 'number',
},
},
- flash_attn: {
- key: 'flash_attn',
- title: 'Flash Attention',
- description: 'Enable Flash Attention for optimized performance.',
- controller_type: 'checkbox',
+ top_p: {
+ key: 'top_p',
+ title: 'Top P',
+ description: 'Top-P sampling (1.0 = disabled).',
+ controller_type: 'input',
controller_props: {
- value: false,
+ value: 0.9,
+ placeholder: '0.9',
+ type: 'number',
},
},
- cont_batching: {
- key: 'cont_batching',
- title: 'Continuous Batching',
- description:
- 'Enable continuous batching (a.k.a dynamic batching) for concurrent requests (default: enabled).',
- controller_type: 'checkbox',
- controller_props: {
- value: true,
- },
- },
- no_mmap: {
- key: 'no_mmap',
- title: 'Disable mmap',
- description:
- 'Do not memory-map model (slower load but may reduce pageouts if not using mlock).',
- controller_type: 'checkbox',
- controller_props: {
- value: false,
- },
- },
- mlock: {
- key: 'mlock',
- title: 'MLock',
- description:
- 'Force system to keep model in RAM, preventing swapping/compression.',
- controller_type: 'checkbox',
- controller_props: {
- value: false,
- },
- },
- no_kv_offload: {
- key: 'no_kv_offload',
- title: 'Disable KV Offload',
- description: 'Disable KV cache offload to GPU (if GPU is used).',
- controller_type: 'checkbox',
- controller_props: {
- value: false,
- },
- },
- cache_type_k: {
- key: 'cache_type_k',
- title: 'KV Cache K Type',
- description: 'KV cache data type for Keys (default: f16).',
- controller_type: 'dropdown',
- controller_props: {
- value: 'f16',
- options: [
- { value: 'f32', name: 'f32' },
- { value: 'f16', name: 'f16' },
- { value: 'bf16', name: 'bf16' },
- { value: 'q8_0', name: 'q8_0' },
- { value: 'q4_0', name: 'q4_0' },
- { value: 'q4_1', name: 'q4_1' },
- { value: 'iq4_nl', name: 'iq4_nl' },
- { value: 'q5_0', name: 'q5_0' },
- { value: 'q5_1', name: 'q5_1' },
- ],
- },
- },
- cache_type_v: {
- key: 'cache_type_v',
- title: 'KV Cache V Type',
- description: 'KV cache data type for Values (default: f16).',
- controller_type: 'dropdown',
- controller_props: {
- value: 'f16',
- options: [
- { value: 'f32', name: 'f32' },
- { value: 'f16', name: 'f16' },
- { value: 'bf16', name: 'bf16' },
- { value: 'q8_0', name: 'q8_0' },
- { value: 'q4_0', name: 'q4_0' },
- { value: 'q4_1', name: 'q4_1' },
- { value: 'iq4_nl', name: 'iq4_nl' },
- { value: 'q5_0', name: 'q5_0' },
- { value: 'q5_1', name: 'q5_1' },
- ],
- },
- },
- defrag_thold: {
- key: 'defrag_thold',
- title: 'KV Cache Defragmentation Threshold',
- description: 'Threshold for KV cache defragmentation (< 0 to disable).',
+ min_p: {
+ key: 'min_p',
+ title: 'Min P',
+ description: 'Min-P sampling (0.0 = disabled).',
controller_type: 'input',
controller_props: {
value: 0.1,
placeholder: '0.1',
type: 'number',
- step: 0.01,
},
},
- rope_scaling: {
- key: 'rope_scaling',
- title: 'RoPE Scaling Method',
- description: 'RoPE frequency scaling method.',
- controller_type: 'dropdown',
- controller_props: {
- value: 'none',
- options: [
- { value: 'none', name: 'None' },
- { value: 'linear', name: 'Linear' },
- { value: 'yarn', name: 'YaRN' },
- ],
- },
- },
- rope_scale: {
- key: 'rope_scale',
- title: 'RoPE Scale Factor',
- description: 'RoPE context scaling factor.',
- controller_type: 'input',
- controller_props: {
- value: 1.0,
- placeholder: '1.0',
- type: 'number',
- min: 0,
- step: 0.01,
- },
- },
- rope_freq_base: {
- key: 'rope_freq_base',
- title: 'RoPE Frequency Base',
- description: 'RoPE base frequency (0 = loaded from model).',
- controller_type: 'input',
- controller_props: {
- value: 0,
- placeholder: '0 (model default)',
- type: 'number',
- },
- },
- rope_freq_scale: {
- key: 'rope_freq_scale',
- title: 'RoPE Frequency Scale Factor',
- description: 'RoPE frequency scaling factor.',
- controller_type: 'input',
- controller_props: {
- value: 1.0,
- placeholder: '1.0',
- type: 'number',
- min: 0,
- step: 0.01,
- },
- },
- mirostat: {
- key: 'mirostat',
- title: 'Mirostat Mode',
+ repeat_last_n: {
+ key: 'repeat_last_n',
+ title: 'Repeat Last N',
description:
- 'Use Mirostat sampling (0: disabled, 1: Mirostat V1, 2: Mirostat V2).',
- controller_type: 'dropdown',
- controller_props: {
- value: 0,
- options: [
- { value: 0, name: 'Disabled' },
- { value: 1, name: 'Mirostat V1' },
- { value: 2, name: 'Mirostat V2' },
- ],
- },
- },
- mirostat_lr: {
- key: 'mirostat_lr',
- title: 'Mirostat Learning Rate',
- description: 'Mirostat learning rate (eta).',
+ 'Number of tokens to consider for repeat penalty (0 = disabled, -1 = ctx_size).',
controller_type: 'input',
controller_props: {
- value: 0.1,
- placeholder: '0.1',
+ value: 64,
+ placeholder: '64',
type: 'number',
- min: 0,
- step: 0.01,
},
},
- mirostat_ent: {
- key: 'mirostat_ent',
- title: 'Mirostat Target Entropy',
- description: 'Mirostat target entropy (tau).',
+ repeat_penalty: {
+ key: 'repeat_penalty',
+ title: 'Repeat Penalty',
+ description: 'Penalize repeating token sequences (1.0 = disabled).',
controller_type: 'input',
controller_props: {
- value: 5.0,
- placeholder: '5.0',
+ value: 1.0,
+ placeholder: '1.0',
type: 'number',
- min: 0,
- step: 0.01,
},
},
- json_schema_file: {
- key: 'json_schema_file',
- title: 'JSON Schema File',
- description: 'Path to a JSON schema file to constrain generations.',
+ presence_penalty: {
+ key: 'presence_penalty',
+ title: 'Presence Penalty',
+ description: 'Repeat alpha presence penalty (0.0 = disabled).',
controller_type: 'input',
controller_props: {
- value: '',
- placeholder: 'path/to/schema.json',
- type: 'text',
+ value: 0.0,
+ placeholder: '0.0',
+ type: 'number',
+ },
+ },
+ frequency_penalty: {
+ key: 'frequency_penalty',
+ title: 'Frequency Penalty',
+ description: 'Repeat alpha frequency penalty (0.0 = disabled).',
+ controller_type: 'input',
+ controller_props: {
+ value: 0.0,
+ placeholder: '0.0',
+ type: 'number',
},
},
}