diff --git a/extensions/inference-openai-extension/resources/models.json b/extensions/inference-openai-extension/resources/models.json index 4d19654bc..fc6896882 100644 --- a/extensions/inference-openai-extension/resources/models.json +++ b/extensions/inference-openai-extension/resources/models.json @@ -99,10 +99,10 @@ "format": "api", "settings": {}, "parameters": { + "max_tokens": 32768, "temperature": 1, "top_p": 1, "stream": true, - "max_tokens": 32768, "frequency_penalty": 0, "presence_penalty": 0 }, @@ -126,9 +126,9 @@ "format": "api", "settings": {}, "parameters": { + "max_tokens": 65536, "temperature": 1, "top_p": 1, - "max_tokens": 65536, "stream": true, "frequency_penalty": 0, "presence_penalty": 0 diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx index dd6caa795..f6adf090b 100644 --- a/web/containers/ModelDropdown/index.tsx +++ b/web/containers/ModelDropdown/index.tsx @@ -192,8 +192,12 @@ const ModelDropdown = ({ model?.settings.ctx_len ?? 8192 ) const overriddenParameters = { - ctx_len: Math.min(8192, model?.settings.ctx_len ?? 8192), - max_tokens: defaultContextLength, + ctx_len: !isLocalEngine(model?.engine) + ? undefined + : defaultContextLength, + max_tokens: !isLocalEngine(model?.engine) + ? (model?.parameters.max_tokens ?? 8192) + : defaultContextLength, } const modelParams = { diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts index 999c887cb..63de2d3ab 100644 --- a/web/hooks/useCreateNewThread.ts +++ b/web/hooks/useCreateNewThread.ts @@ -17,6 +17,7 @@ import { fileUploadAtom } from '@/containers/Providers/Jotai' import { toaster } from '@/containers/Toast' +import { isLocalEngine } from '@/utils/modelEngine' import { generateThreadId } from '@/utils/thread' import { useActiveModel } from './useActiveModel' @@ -113,12 +114,14 @@ export const useCreateNewThread = () => { ) const overriddenSettings = { - ctx_len: defaultContextLength, + ctx_len: !isLocalEngine(model?.engine) ? undefined : defaultContextLength, } // Use ctx length by default const overriddenParameters = { - max_tokens: defaultContextLength, + max_tokens: !isLocalEngine(model?.engine) + ? (model?.parameters.token_limit ?? 8192) + : defaultContextLength, } const createdAt = Date.now() diff --git a/web/utils/modelEngine.ts b/web/utils/modelEngine.ts index 2ac4a1acd..d87d8d382 100644 --- a/web/utils/modelEngine.ts +++ b/web/utils/modelEngine.ts @@ -38,7 +38,9 @@ export const getLogoEngine = (engine: InferenceEngine) => { * @param engine * @returns */ -export const isLocalEngine = (engine: string) => { +export const isLocalEngine = (engine?: string) => { + if (!engine) return false + const engineObj = EngineManager.instance().get(engine) if (!engineObj) return false return (