chore: add function to model settings (#5108)

This commit is contained in:
Louis 2025-05-26 18:53:08 +07:00 committed by GitHub
parent 3aaa6078c0
commit 0fbc4a4664
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 93 additions and 255 deletions

View File

@ -25,6 +25,7 @@ const DropdownModelProvider = ({ model }: DropdownModelProviderProps) => {
providers,
getProviderByName,
selectModelProvider,
getModelBy,
selectedProvider,
selectedModel,
} = useModelProvider()
@ -52,8 +53,11 @@ const DropdownModelProvider = ({ model }: DropdownModelProviderProps) => {
}
}, [selectedProvider, selectedModel])
if (!providers.length) return null
const currentModel = selectedModel?.id
? getModelBy(selectedModel?.id)
: undefined
if (!providers.length) return null
const provider = getProviderByName(selectedProvider)
@ -81,9 +85,9 @@ const DropdownModelProvider = ({ model }: DropdownModelProviderProps) => {
</span>
</button>
</DropdownMenuTrigger>
{selectedModel && (
{currentModel && (
<ModelSetting
model={selectedModel as Model}
model={currentModel as Model}
provider={provider as ProviderObject}
/>
)}

View File

@ -57,11 +57,19 @@ export function ModelSetting({ model, provider }: ModelSettingProps) {
models: updatedModels,
})
const params = Object.entries(updatedModel.settings).reduce(
(acc, [key, value]) => {
const rawVal = value.controller_props?.value
const num = parseFloat(rawVal as string)
acc[key] = !isNaN(num) ? num : rawVal
return acc
},
{} as Record<string, unknown>
) as ModelSettingParams
updateModel({
id: model.id,
settings: Object.entries(updatedModel.settings).map(([key, value]) => ({
[key]: value.controller_props?.value,
})) as ModelSettingParams,
settings: params,
...(params as unknown as object),
})
}
}
@ -75,7 +83,7 @@ export function ModelSetting({ model, provider }: ModelSettingProps) {
</SheetTrigger>
<SheetContent className="h-[calc(100%-8px)] top-1 right-1 rounded-e-md overflow-y-auto">
<SheetHeader>
<SheetTitle>Model Setting {model.id}</SheetTitle>
<SheetTitle>Model Settings - {model.id}</SheetTitle>
<SheetDescription>
Configure model settings to optimize performance and behavior.
</SheetDescription>

View File

@ -180,14 +180,14 @@ export const useChat = () => {
resetTokenSpeed,
provider,
getMessages,
setAbortController,
updateStreamingContent,
addMessage,
setPrompt,
selectedModel,
currentAssistant?.instructions,
setAbortController,
updateLoadingModel,
currentAssistant,
tools,
updateLoadingModel,
updateTokenSpeed,
]
)

View File

@ -6,6 +6,7 @@ type ModelProviderState = {
providers: ModelProvider[]
selectedProvider: string
selectedModel: Model | null
getModelBy: (modelId: string) => Model | undefined
setProviders: (providers: ModelProvider[]) => void
getProviderByName: (providerName: string) => ModelProvider | undefined
updateProvider: (providerName: string, data: Partial<ModelProvider>) => void
@ -24,6 +25,13 @@ export const useModelProvider = create<ModelProviderState>()(
providers: [],
selectedProvider: 'llama.cpp',
selectedModel: null,
getModelBy: (modelId: string) => {
const provider = get().providers.find(
(provider) => provider.provider === get().selectedProvider
)
if (!provider) return undefined
return provider.models.find((model) => model.id === modelId)
},
setProviders: (providers) =>
set((state) => {
const existingProviders = state.providers

View File

@ -1,6 +1,6 @@
export const modelSettings = {
ctx_size: {
key: 'ctx_size',
ctx_len: {
key: 'ctx_len',
title: 'Context Size',
description: 'Size of the prompt context (0 = loaded from model).',
controller_type: 'input',
@ -10,290 +10,108 @@ export const modelSettings = {
type: 'number',
},
},
n_predict: {
key: 'n_predict',
title: 'Max Tokens to Predict',
description: 'Maximum number of tokens to generate (-1 = infinity).',
controller_type: 'input',
controller_props: {
value: -1,
placeholder: '-1',
type: 'number',
},
},
batch_size: {
key: 'batch_size',
title: 'Batch Size',
description: 'Logical maximum batch size for processing prompts.',
controller_type: 'input',
controller_props: {
value: 2048,
placeholder: '2048',
type: 'number',
},
},
ubatch_size: {
key: 'ubatch_size',
title: 'uBatch Size',
description: 'Physical maximum batch size for processing prompts.',
controller_type: 'input',
controller_props: {
value: 512,
placeholder: '512',
type: 'number',
},
},
n_gpu_layers: {
key: 'n_gpu_layers',
ngl: {
key: 'ngl',
title: 'GPU Layers',
description:
'Number of model layers to offload to the GPU (-1 for all layers, 0 for CPU only).',
controller_type: 'input',
controller_props: {
value: -1,
value: 100,
placeholder: '-1',
type: 'number',
},
},
device: {
key: 'device',
title: 'Devices for Offload',
description:
"Comma-separated list of devices to use for offloading (e.g., 'cuda:0', 'cuda:0,cuda:1'). Leave empty to use default/CPU only.",
temperature: {
key: 'temp',
title: 'Temperature',
description: 'Temperature for sampling (higher = more random).',
controller_type: 'input',
controller_props: {
value: '',
placeholder: 'cuda:0',
type: 'text',
value: 0.8,
placeholder: '0.8',
type: 'number',
min: 0,
step: 0.01,
},
},
split_mode: {
key: 'split_mode',
title: 'GPU Split Mode',
description: 'How to split the model across multiple GPUs.',
controller_type: 'dropdown',
controller_props: {
value: 'layer',
options: [
{ value: 'none', name: 'None' },
{ value: 'layer', name: 'Layer' },
{ value: 'row', name: 'Row' },
],
},
},
main_gpu: {
key: 'main_gpu',
title: 'Main GPU Index',
description:
'The GPU to use for the model (split-mode=none) or intermediate results (split-mode=row).',
top_k: {
key: 'top_k',
title: 'Top K',
description: 'Top-K sampling (0 = disabled).',
controller_type: 'input',
controller_props: {
value: 0,
placeholder: '0',
value: 40,
placeholder: '40',
type: 'number',
},
},
flash_attn: {
key: 'flash_attn',
title: 'Flash Attention',
description: 'Enable Flash Attention for optimized performance.',
controller_type: 'checkbox',
top_p: {
key: 'top_p',
title: 'Top P',
description: 'Top-P sampling (1.0 = disabled).',
controller_type: 'input',
controller_props: {
value: false,
value: 0.9,
placeholder: '0.9',
type: 'number',
},
},
cont_batching: {
key: 'cont_batching',
title: 'Continuous Batching',
description:
'Enable continuous batching (a.k.a dynamic batching) for concurrent requests (default: enabled).',
controller_type: 'checkbox',
controller_props: {
value: true,
},
},
no_mmap: {
key: 'no_mmap',
title: 'Disable mmap',
description:
'Do not memory-map model (slower load but may reduce pageouts if not using mlock).',
controller_type: 'checkbox',
controller_props: {
value: false,
},
},
mlock: {
key: 'mlock',
title: 'MLock',
description:
'Force system to keep model in RAM, preventing swapping/compression.',
controller_type: 'checkbox',
controller_props: {
value: false,
},
},
no_kv_offload: {
key: 'no_kv_offload',
title: 'Disable KV Offload',
description: 'Disable KV cache offload to GPU (if GPU is used).',
controller_type: 'checkbox',
controller_props: {
value: false,
},
},
cache_type_k: {
key: 'cache_type_k',
title: 'KV Cache K Type',
description: 'KV cache data type for Keys (default: f16).',
controller_type: 'dropdown',
controller_props: {
value: 'f16',
options: [
{ value: 'f32', name: 'f32' },
{ value: 'f16', name: 'f16' },
{ value: 'bf16', name: 'bf16' },
{ value: 'q8_0', name: 'q8_0' },
{ value: 'q4_0', name: 'q4_0' },
{ value: 'q4_1', name: 'q4_1' },
{ value: 'iq4_nl', name: 'iq4_nl' },
{ value: 'q5_0', name: 'q5_0' },
{ value: 'q5_1', name: 'q5_1' },
],
},
},
cache_type_v: {
key: 'cache_type_v',
title: 'KV Cache V Type',
description: 'KV cache data type for Values (default: f16).',
controller_type: 'dropdown',
controller_props: {
value: 'f16',
options: [
{ value: 'f32', name: 'f32' },
{ value: 'f16', name: 'f16' },
{ value: 'bf16', name: 'bf16' },
{ value: 'q8_0', name: 'q8_0' },
{ value: 'q4_0', name: 'q4_0' },
{ value: 'q4_1', name: 'q4_1' },
{ value: 'iq4_nl', name: 'iq4_nl' },
{ value: 'q5_0', name: 'q5_0' },
{ value: 'q5_1', name: 'q5_1' },
],
},
},
defrag_thold: {
key: 'defrag_thold',
title: 'KV Cache Defragmentation Threshold',
description: 'Threshold for KV cache defragmentation (< 0 to disable).',
min_p: {
key: 'min_p',
title: 'Min P',
description: 'Min-P sampling (0.0 = disabled).',
controller_type: 'input',
controller_props: {
value: 0.1,
placeholder: '0.1',
type: 'number',
step: 0.01,
},
},
rope_scaling: {
key: 'rope_scaling',
title: 'RoPE Scaling Method',
description: 'RoPE frequency scaling method.',
controller_type: 'dropdown',
controller_props: {
value: 'none',
options: [
{ value: 'none', name: 'None' },
{ value: 'linear', name: 'Linear' },
{ value: 'yarn', name: 'YaRN' },
],
},
},
rope_scale: {
key: 'rope_scale',
title: 'RoPE Scale Factor',
description: 'RoPE context scaling factor.',
controller_type: 'input',
controller_props: {
value: 1.0,
placeholder: '1.0',
type: 'number',
min: 0,
step: 0.01,
},
},
rope_freq_base: {
key: 'rope_freq_base',
title: 'RoPE Frequency Base',
description: 'RoPE base frequency (0 = loaded from model).',
controller_type: 'input',
controller_props: {
value: 0,
placeholder: '0 (model default)',
type: 'number',
},
},
rope_freq_scale: {
key: 'rope_freq_scale',
title: 'RoPE Frequency Scale Factor',
description: 'RoPE frequency scaling factor.',
controller_type: 'input',
controller_props: {
value: 1.0,
placeholder: '1.0',
type: 'number',
min: 0,
step: 0.01,
},
},
mirostat: {
key: 'mirostat',
title: 'Mirostat Mode',
repeat_last_n: {
key: 'repeat_last_n',
title: 'Repeat Last N',
description:
'Use Mirostat sampling (0: disabled, 1: Mirostat V1, 2: Mirostat V2).',
controller_type: 'dropdown',
controller_props: {
value: 0,
options: [
{ value: 0, name: 'Disabled' },
{ value: 1, name: 'Mirostat V1' },
{ value: 2, name: 'Mirostat V2' },
],
},
},
mirostat_lr: {
key: 'mirostat_lr',
title: 'Mirostat Learning Rate',
description: 'Mirostat learning rate (eta).',
'Number of tokens to consider for repeat penalty (0 = disabled, -1 = ctx_size).',
controller_type: 'input',
controller_props: {
value: 0.1,
placeholder: '0.1',
value: 64,
placeholder: '64',
type: 'number',
min: 0,
step: 0.01,
},
},
mirostat_ent: {
key: 'mirostat_ent',
title: 'Mirostat Target Entropy',
description: 'Mirostat target entropy (tau).',
repeat_penalty: {
key: 'repeat_penalty',
title: 'Repeat Penalty',
description: 'Penalize repeating token sequences (1.0 = disabled).',
controller_type: 'input',
controller_props: {
value: 5.0,
placeholder: '5.0',
value: 1.0,
placeholder: '1.0',
type: 'number',
min: 0,
step: 0.01,
},
},
json_schema_file: {
key: 'json_schema_file',
title: 'JSON Schema File',
description: 'Path to a JSON schema file to constrain generations.',
presence_penalty: {
key: 'presence_penalty',
title: 'Presence Penalty',
description: 'Repeat alpha presence penalty (0.0 = disabled).',
controller_type: 'input',
controller_props: {
value: '',
placeholder: 'path/to/schema.json',
type: 'text',
value: 0.0,
placeholder: '0.0',
type: 'number',
},
},
frequency_penalty: {
key: 'frequency_penalty',
title: 'Frequency Penalty',
description: 'Repeat alpha frequency penalty (0.0 = disabled).',
controller_type: 'input',
controller_props: {
value: 0.0,
placeholder: '0.0',
type: 'number',
},
},
}