diff --git a/web-app/src/containers/DropdownModelProvider.tsx b/web-app/src/containers/DropdownModelProvider.tsx index 75a7d71d7..d34228daf 100644 --- a/web-app/src/containers/DropdownModelProvider.tsx +++ b/web-app/src/containers/DropdownModelProvider.tsx @@ -25,6 +25,7 @@ const DropdownModelProvider = ({ model }: DropdownModelProviderProps) => { providers, getProviderByName, selectModelProvider, + getModelBy, selectedProvider, selectedModel, } = useModelProvider() @@ -52,8 +53,11 @@ const DropdownModelProvider = ({ model }: DropdownModelProviderProps) => { } }, [selectedProvider, selectedModel]) - if (!providers.length) return null + const currentModel = selectedModel?.id + ? getModelBy(selectedModel?.id) + : undefined + if (!providers.length) return null const provider = getProviderByName(selectedProvider) @@ -81,9 +85,9 @@ const DropdownModelProvider = ({ model }: DropdownModelProviderProps) => { - {selectedModel && ( + {currentModel && ( )} diff --git a/web-app/src/containers/ModelSetting.tsx b/web-app/src/containers/ModelSetting.tsx index 7ccbc794b..e7a9d0d08 100644 --- a/web-app/src/containers/ModelSetting.tsx +++ b/web-app/src/containers/ModelSetting.tsx @@ -57,11 +57,19 @@ export function ModelSetting({ model, provider }: ModelSettingProps) { models: updatedModels, }) + const params = Object.entries(updatedModel.settings).reduce( + (acc, [key, value]) => { + const rawVal = value.controller_props?.value + const num = parseFloat(rawVal as string) + acc[key] = !isNaN(num) ? num : rawVal + return acc + }, + {} as Record + ) as ModelSettingParams updateModel({ id: model.id, - settings: Object.entries(updatedModel.settings).map(([key, value]) => ({ - [key]: value.controller_props?.value, - })) as ModelSettingParams, + settings: params, + ...(params as unknown as object), }) } } @@ -75,7 +83,7 @@ export function ModelSetting({ model, provider }: ModelSettingProps) { - Model Setting {model.id} + Model Settings - {model.id} Configure model settings to optimize performance and behavior. diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts index 2bff798a5..85e7a88f2 100644 --- a/web-app/src/hooks/useChat.ts +++ b/web-app/src/hooks/useChat.ts @@ -180,14 +180,14 @@ export const useChat = () => { resetTokenSpeed, provider, getMessages, + setAbortController, updateStreamingContent, addMessage, setPrompt, selectedModel, - currentAssistant?.instructions, - setAbortController, - updateLoadingModel, + currentAssistant, tools, + updateLoadingModel, updateTokenSpeed, ] ) diff --git a/web-app/src/hooks/useModelProvider.ts b/web-app/src/hooks/useModelProvider.ts index 8fa4b976b..49bbbee39 100644 --- a/web-app/src/hooks/useModelProvider.ts +++ b/web-app/src/hooks/useModelProvider.ts @@ -6,6 +6,7 @@ type ModelProviderState = { providers: ModelProvider[] selectedProvider: string selectedModel: Model | null + getModelBy: (modelId: string) => Model | undefined setProviders: (providers: ModelProvider[]) => void getProviderByName: (providerName: string) => ModelProvider | undefined updateProvider: (providerName: string, data: Partial) => void @@ -24,6 +25,13 @@ export const useModelProvider = create()( providers: [], selectedProvider: 'llama.cpp', selectedModel: null, + getModelBy: (modelId: string) => { + const provider = get().providers.find( + (provider) => provider.provider === get().selectedProvider + ) + if (!provider) return undefined + return provider.models.find((model) => model.id === modelId) + }, setProviders: (providers) => set((state) => { const existingProviders = state.providers diff --git a/web-app/src/lib/predefined.ts b/web-app/src/lib/predefined.ts index b3901f627..4fdc24cb9 100644 --- a/web-app/src/lib/predefined.ts +++ b/web-app/src/lib/predefined.ts @@ -1,6 +1,6 @@ export const modelSettings = { - ctx_size: { - key: 'ctx_size', + ctx_len: { + key: 'ctx_len', title: 'Context Size', description: 'Size of the prompt context (0 = loaded from model).', controller_type: 'input', @@ -10,290 +10,108 @@ export const modelSettings = { type: 'number', }, }, - n_predict: { - key: 'n_predict', - title: 'Max Tokens to Predict', - description: 'Maximum number of tokens to generate (-1 = infinity).', - controller_type: 'input', - controller_props: { - value: -1, - placeholder: '-1', - type: 'number', - }, - }, - batch_size: { - key: 'batch_size', - title: 'Batch Size', - description: 'Logical maximum batch size for processing prompts.', - controller_type: 'input', - controller_props: { - value: 2048, - placeholder: '2048', - type: 'number', - }, - }, - ubatch_size: { - key: 'ubatch_size', - title: 'uBatch Size', - description: 'Physical maximum batch size for processing prompts.', - controller_type: 'input', - controller_props: { - value: 512, - placeholder: '512', - type: 'number', - }, - }, - n_gpu_layers: { - key: 'n_gpu_layers', + ngl: { + key: 'ngl', title: 'GPU Layers', description: 'Number of model layers to offload to the GPU (-1 for all layers, 0 for CPU only).', controller_type: 'input', controller_props: { - value: -1, + value: 100, placeholder: '-1', type: 'number', }, }, - device: { - key: 'device', - title: 'Devices for Offload', - description: - "Comma-separated list of devices to use for offloading (e.g., 'cuda:0', 'cuda:0,cuda:1'). Leave empty to use default/CPU only.", + + temperature: { + key: 'temp', + title: 'Temperature', + description: 'Temperature for sampling (higher = more random).', controller_type: 'input', controller_props: { - value: '', - placeholder: 'cuda:0', - type: 'text', + value: 0.8, + placeholder: '0.8', + type: 'number', + min: 0, + step: 0.01, }, }, - split_mode: { - key: 'split_mode', - title: 'GPU Split Mode', - description: 'How to split the model across multiple GPUs.', - controller_type: 'dropdown', - controller_props: { - value: 'layer', - options: [ - { value: 'none', name: 'None' }, - { value: 'layer', name: 'Layer' }, - { value: 'row', name: 'Row' }, - ], - }, - }, - main_gpu: { - key: 'main_gpu', - title: 'Main GPU Index', - description: - 'The GPU to use for the model (split-mode=none) or intermediate results (split-mode=row).', + top_k: { + key: 'top_k', + title: 'Top K', + description: 'Top-K sampling (0 = disabled).', controller_type: 'input', controller_props: { - value: 0, - placeholder: '0', + value: 40, + placeholder: '40', type: 'number', }, }, - flash_attn: { - key: 'flash_attn', - title: 'Flash Attention', - description: 'Enable Flash Attention for optimized performance.', - controller_type: 'checkbox', + top_p: { + key: 'top_p', + title: 'Top P', + description: 'Top-P sampling (1.0 = disabled).', + controller_type: 'input', controller_props: { - value: false, + value: 0.9, + placeholder: '0.9', + type: 'number', }, }, - cont_batching: { - key: 'cont_batching', - title: 'Continuous Batching', - description: - 'Enable continuous batching (a.k.a dynamic batching) for concurrent requests (default: enabled).', - controller_type: 'checkbox', - controller_props: { - value: true, - }, - }, - no_mmap: { - key: 'no_mmap', - title: 'Disable mmap', - description: - 'Do not memory-map model (slower load but may reduce pageouts if not using mlock).', - controller_type: 'checkbox', - controller_props: { - value: false, - }, - }, - mlock: { - key: 'mlock', - title: 'MLock', - description: - 'Force system to keep model in RAM, preventing swapping/compression.', - controller_type: 'checkbox', - controller_props: { - value: false, - }, - }, - no_kv_offload: { - key: 'no_kv_offload', - title: 'Disable KV Offload', - description: 'Disable KV cache offload to GPU (if GPU is used).', - controller_type: 'checkbox', - controller_props: { - value: false, - }, - }, - cache_type_k: { - key: 'cache_type_k', - title: 'KV Cache K Type', - description: 'KV cache data type for Keys (default: f16).', - controller_type: 'dropdown', - controller_props: { - value: 'f16', - options: [ - { value: 'f32', name: 'f32' }, - { value: 'f16', name: 'f16' }, - { value: 'bf16', name: 'bf16' }, - { value: 'q8_0', name: 'q8_0' }, - { value: 'q4_0', name: 'q4_0' }, - { value: 'q4_1', name: 'q4_1' }, - { value: 'iq4_nl', name: 'iq4_nl' }, - { value: 'q5_0', name: 'q5_0' }, - { value: 'q5_1', name: 'q5_1' }, - ], - }, - }, - cache_type_v: { - key: 'cache_type_v', - title: 'KV Cache V Type', - description: 'KV cache data type for Values (default: f16).', - controller_type: 'dropdown', - controller_props: { - value: 'f16', - options: [ - { value: 'f32', name: 'f32' }, - { value: 'f16', name: 'f16' }, - { value: 'bf16', name: 'bf16' }, - { value: 'q8_0', name: 'q8_0' }, - { value: 'q4_0', name: 'q4_0' }, - { value: 'q4_1', name: 'q4_1' }, - { value: 'iq4_nl', name: 'iq4_nl' }, - { value: 'q5_0', name: 'q5_0' }, - { value: 'q5_1', name: 'q5_1' }, - ], - }, - }, - defrag_thold: { - key: 'defrag_thold', - title: 'KV Cache Defragmentation Threshold', - description: 'Threshold for KV cache defragmentation (< 0 to disable).', + min_p: { + key: 'min_p', + title: 'Min P', + description: 'Min-P sampling (0.0 = disabled).', controller_type: 'input', controller_props: { value: 0.1, placeholder: '0.1', type: 'number', - step: 0.01, }, }, - rope_scaling: { - key: 'rope_scaling', - title: 'RoPE Scaling Method', - description: 'RoPE frequency scaling method.', - controller_type: 'dropdown', - controller_props: { - value: 'none', - options: [ - { value: 'none', name: 'None' }, - { value: 'linear', name: 'Linear' }, - { value: 'yarn', name: 'YaRN' }, - ], - }, - }, - rope_scale: { - key: 'rope_scale', - title: 'RoPE Scale Factor', - description: 'RoPE context scaling factor.', - controller_type: 'input', - controller_props: { - value: 1.0, - placeholder: '1.0', - type: 'number', - min: 0, - step: 0.01, - }, - }, - rope_freq_base: { - key: 'rope_freq_base', - title: 'RoPE Frequency Base', - description: 'RoPE base frequency (0 = loaded from model).', - controller_type: 'input', - controller_props: { - value: 0, - placeholder: '0 (model default)', - type: 'number', - }, - }, - rope_freq_scale: { - key: 'rope_freq_scale', - title: 'RoPE Frequency Scale Factor', - description: 'RoPE frequency scaling factor.', - controller_type: 'input', - controller_props: { - value: 1.0, - placeholder: '1.0', - type: 'number', - min: 0, - step: 0.01, - }, - }, - mirostat: { - key: 'mirostat', - title: 'Mirostat Mode', + repeat_last_n: { + key: 'repeat_last_n', + title: 'Repeat Last N', description: - 'Use Mirostat sampling (0: disabled, 1: Mirostat V1, 2: Mirostat V2).', - controller_type: 'dropdown', - controller_props: { - value: 0, - options: [ - { value: 0, name: 'Disabled' }, - { value: 1, name: 'Mirostat V1' }, - { value: 2, name: 'Mirostat V2' }, - ], - }, - }, - mirostat_lr: { - key: 'mirostat_lr', - title: 'Mirostat Learning Rate', - description: 'Mirostat learning rate (eta).', + 'Number of tokens to consider for repeat penalty (0 = disabled, -1 = ctx_size).', controller_type: 'input', controller_props: { - value: 0.1, - placeholder: '0.1', + value: 64, + placeholder: '64', type: 'number', - min: 0, - step: 0.01, }, }, - mirostat_ent: { - key: 'mirostat_ent', - title: 'Mirostat Target Entropy', - description: 'Mirostat target entropy (tau).', + repeat_penalty: { + key: 'repeat_penalty', + title: 'Repeat Penalty', + description: 'Penalize repeating token sequences (1.0 = disabled).', controller_type: 'input', controller_props: { - value: 5.0, - placeholder: '5.0', + value: 1.0, + placeholder: '1.0', type: 'number', - min: 0, - step: 0.01, }, }, - json_schema_file: { - key: 'json_schema_file', - title: 'JSON Schema File', - description: 'Path to a JSON schema file to constrain generations.', + presence_penalty: { + key: 'presence_penalty', + title: 'Presence Penalty', + description: 'Repeat alpha presence penalty (0.0 = disabled).', controller_type: 'input', controller_props: { - value: '', - placeholder: 'path/to/schema.json', - type: 'text', + value: 0.0, + placeholder: '0.0', + type: 'number', + }, + }, + frequency_penalty: { + key: 'frequency_penalty', + title: 'Frequency Penalty', + description: 'Repeat alpha frequency penalty (0.0 = disabled).', + controller_type: 'input', + controller_props: { + value: 0.0, + placeholder: '0.0', + type: 'number', }, }, }