diff --git a/extensions/engine-management-extension/models/cohere.json b/extensions/engine-management-extension/models/cohere.json index f78498b93..3c03be04e 100644 --- a/extensions/engine-management-extension/models/cohere.json +++ b/extensions/engine-management-extension/models/cohere.json @@ -26,5 +26,19 @@ "stream": true }, "engine": "cohere" + }, + { + "model": "command-a-03-2025", + "object": "model", + "name": "Command A", + "version": "1.0", + "description": "Command A is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It is best suited for complex RAG workflows and multi-step tool use.", + "inference_params": { + "max_tokens": 4096, + "temperature": 0.7, + "max_temperature": 1.0, + "stream": true + }, + "engine": "cohere" } ] diff --git a/extensions/engine-management-extension/models/deepseek.json b/extensions/engine-management-extension/models/deepseek.json index 29d5406bf..0e9930445 100644 --- a/extensions/engine-management-extension/models/deepseek.json +++ b/extensions/engine-management-extension/models/deepseek.json @@ -2,7 +2,7 @@ { "model": "deepseek-chat", "object": "model", - "name": "DeepSeek Chat", + "name": "DeepSeek V3", "version": "1.0", "description": "The deepseek-chat model has been upgraded to DeepSeek-V3. deepseek-reasoner points to the new model DeepSeek-R1", "inference_params": { diff --git a/extensions/engine-management-extension/models/google_gemini.json b/extensions/engine-management-extension/models/google_gemini.json index 392754ee6..2c21df5ee 100644 --- a/extensions/engine-management-extension/models/google_gemini.json +++ b/extensions/engine-management-extension/models/google_gemini.json @@ -1,30 +1,4 @@ [ - { - "model": "gemini-2.0-flash", - "object": "model", - "name": "Gemini 2.0 Flash", - "version": "1.0", - "description": "Gemini 2.0 Flash delivers next-gen features and improved capabilities, including superior speed, native tool use, multimodal generation, and a 1M token context window.", - "inference_params": { - "max_tokens": 8192, - "temperature": 0.6, - "stream": true - }, - "engine": "google_gemini" - }, - { - "model": "gemini-2.0-flash-lite-preview", - "object": "model", - "name": "Gemini 2.0 Flash-Lite Preview", - "version": "1.0", - "description": "A Gemini 2.0 Flash model optimized for cost efficiency and low latency.", - "inference_params": { - "max_tokens": 8192, - "temperature": 0.6, - "stream": true - }, - "engine": "google_gemini" - }, { "model": "gemini-1.5-flash", "object": "model", @@ -63,5 +37,57 @@ "stream": true }, "engine": "google_gemini" + }, + { + "model": "gemini-2.5-pro-preview-05-06", + "object": "model", + "name": "Gemini 2.5 Pro Preview", + "version": "1.0", + "description": "Gemini 2.5 Pro is our state-of-the-art thinking model, capable of reasoning over complex problems in code, math, and STEM, as well as analyzing large datasets, codebases, and documents using long context. Gemini 2.5 Pro rate limits are more restricted since it is an experimental / preview model.", + "inference_params": { + "max_tokens": 65536, + "temperature": 0.6, + "stream": true + }, + "engine": "google_gemini" + }, + { + "model": "gemini-2.5-flash-preview-04-17", + "object": "model", + "name": "Our best model in terms of price-performance, offering well-rounded capabilities. Gemini 2.5 Flash rate limits are more restricted since it is an experimental / preview model.", + "version": "1.0", + "description": "Gemini 2.5 Flash preview", + "inference_params": { + "max_tokens": 8192, + "temperature": 0.6, + "stream": true + }, + "engine": "google_gemini" + }, + { + "model": "gemini-2.0-flash", + "object": "model", + "name": "Gemini 2.0 Flash", + "version": "1.0", + "description": "Gemini 2.0 Flash delivers next-gen features and improved capabilities, including superior speed, native tool use, multimodal generation, and a 1M token context window.", + "inference_params": { + "max_tokens": 8192, + "temperature": 0.6, + "stream": true + }, + "engine": "google_gemini" + }, + { + "model": "gemini-2.0-flash-lite", + "object": "model", + "name": "Gemini 2.0 Flash-Lite", + "version": "1.0", + "description": "A Gemini 2.0 Flash model optimized for cost efficiency and low latency.", + "inference_params": { + "max_tokens": 8192, + "temperature": 0.6, + "stream": true + }, + "engine": "google_gemini" } -] +] \ No newline at end of file diff --git a/extensions/engine-management-extension/models/groq.json b/extensions/engine-management-extension/models/groq.json index 38a0f3835..981bd563b 100644 --- a/extensions/engine-management-extension/models/groq.json +++ b/extensions/engine-management-extension/models/groq.json @@ -50,108 +50,6 @@ }, "engine": "groq" }, - { - "model": "llama-3.2-11b-text-preview", - "object": "model", - "name": "Groq Llama 3.2 11b Text Preview", - "version": "1.1", - "description": "Groq Llama 3.2 11b Text Preview with supercharged speed!", - "inference_params": { - "max_tokens": 8192, - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "engine": "groq" - }, - { - "model": "llama-3.2-11b-vision-preview", - "object": "model", - "name": "Groq Llama 3.2 11b Vision Preview", - "version": "1.1", - "description": "Groq Llama 3.2 11b Vision Preview with supercharged speed!", - "inference_params": { - "max_tokens": 8192, - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "engine": "groq" - }, - { - "model": "llama-3.2-1b-preview", - "object": "model", - "name": "Groq Llama 3.2 1b Preview", - "version": "1.1", - "description": "Groq Llama 3.2 1b Preview with supercharged speed!", - "parameters": { - "max_tokens": 8192, - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "engine": "groq" - }, - { - "model": "llama-3.2-3b-preview", - "object": "model", - "name": "Groq Llama 3.2 3b Preview", - "version": "1.1", - "description": "Groq Llama 3.2 3b Preview with supercharged speed!", - "parameters": { - "max_tokens": 8192, - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "engine": "groq" - }, - { - "model": "llama-3.2-90b-text-preview", - "object": "model", - "name": "Groq Llama 3.2 90b Text Preview", - "version": "1.1", - "description": "Groq Llama 3.2 90b Text Preview with supercharged speed!", - "parameters": { - "max_tokens": 8192, - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "engine": "groq" - }, - { - "model": "llama-3.2-90b-vision-preview", - "object": "model", - "name": "Groq Llama 3.2 90b Vision Preview", - "version": "1.1", - "description": "Groq Llama 3.2 90b Vision Preview with supercharged speed!", - "parameters": { - "max_tokens": 8192, - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "engine": "groq" - }, { "model": "gemma2-9b-it", "object": "model", @@ -170,11 +68,11 @@ "engine": "groq" }, { - "model": "mixtral-8x7b-32768", + "model": "llama-3.3-70b-versatile", "object": "model", - "name": "Groq Mixtral 8x7B Instruct", - "version": "1.2", - "description": "Groq Mixtral 8x7B Instruct is Mixtral with supercharged speed!", + "name": "Groq Llama 3.3 70b Versatile", + "version": "3.3", + "description": "Groq Llama 3.3 70b Versatile with supercharged speed!", "parameters": { "max_tokens": 32768, "temperature": 0.7,