diff --git a/extensions/engine-management-extension/engines.mjs b/extensions/engine-management-extension/engines.mjs
index e85035423..eafe8a09c 100644
--- a/extensions/engine-management-extension/engines.mjs
+++ b/extensions/engine-management-extension/engines.mjs
@@ -6,6 +6,8 @@ import groq from './resources/groq.json' with { type: 'json' }
import martian from './resources/martian.json' with { type: 'json' }
import mistral from './resources/mistral.json' with { type: 'json' }
import nvidia from './resources/nvidia.json' with { type: 'json' }
+import deepseek from './resources/deepseek.json' with { type: 'json' }
+import googleGemini from './resources/google_gemini.json' with { type: 'json' }
import anthropicModels from './models/anthropic.json' with { type: 'json' }
import cohereModels from './models/cohere.json' with { type: 'json' }
@@ -15,6 +17,8 @@ import groqModels from './models/groq.json' with { type: 'json' }
import martianModels from './models/martian.json' with { type: 'json' }
import mistralModels from './models/mistral.json' with { type: 'json' }
import nvidiaModels from './models/nvidia.json' with { type: 'json' }
+import deepseekModels from './models/deepseek.json' with { type: 'json' }
+import googleGeminiModels from './models/google_gemini.json' with { type: 'json' }
const engines = [
anthropic,
@@ -25,6 +29,8 @@ const engines = [
mistral,
martian,
nvidia,
+ deepseek,
+ googleGemini,
]
const models = [
...anthropicModels,
@@ -35,5 +41,7 @@ const models = [
...mistralModels,
...martianModels,
...nvidiaModels,
+ ...deepseekModels,
+ ...googleGeminiModels,
]
export { engines, models }
diff --git a/extensions/engine-management-extension/models/deepseek.json b/extensions/engine-management-extension/models/deepseek.json
new file mode 100644
index 000000000..29d5406bf
--- /dev/null
+++ b/extensions/engine-management-extension/models/deepseek.json
@@ -0,0 +1,28 @@
+[
+ {
+ "model": "deepseek-chat",
+ "object": "model",
+ "name": "DeepSeek Chat",
+ "version": "1.0",
+ "description": "The deepseek-chat model has been upgraded to DeepSeek-V3. deepseek-reasoner points to the new model DeepSeek-R1",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "deepseek"
+ },
+ {
+ "model": "deepseek-reasoner",
+ "object": "model",
+ "name": "DeepSeek R1",
+ "version": "1.0",
+ "description": "CoT (Chain of Thought) is the reasoning content deepseek-reasoner gives before output the final answer. For details, please refer to Reasoning Model.",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "deepseek"
+ }
+]
diff --git a/extensions/engine-management-extension/models/google_gemini.json b/extensions/engine-management-extension/models/google_gemini.json
new file mode 100644
index 000000000..392754ee6
--- /dev/null
+++ b/extensions/engine-management-extension/models/google_gemini.json
@@ -0,0 +1,67 @@
+[
+ {
+ "model": "gemini-2.0-flash",
+ "object": "model",
+ "name": "Gemini 2.0 Flash",
+ "version": "1.0",
+ "description": "Gemini 2.0 Flash delivers next-gen features and improved capabilities, including superior speed, native tool use, multimodal generation, and a 1M token context window.",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "google_gemini"
+ },
+ {
+ "model": "gemini-2.0-flash-lite-preview",
+ "object": "model",
+ "name": "Gemini 2.0 Flash-Lite Preview",
+ "version": "1.0",
+ "description": "A Gemini 2.0 Flash model optimized for cost efficiency and low latency.",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "google_gemini"
+ },
+ {
+ "model": "gemini-1.5-flash",
+ "object": "model",
+ "name": "Gemini 1.5 Flash",
+ "version": "1.0",
+ "description": "Gemini 1.5 Flash is a fast and versatile multimodal model for scaling across diverse tasks.",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "google_gemini"
+ },
+ {
+ "model": "gemini-1.5-flash-8b",
+ "object": "model",
+ "name": "Gemini 1.5 Flash-8B",
+ "version": "1.0",
+ "description": "Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks.",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "google_gemini"
+ },
+ {
+ "model": "gemini-1.5-pro",
+ "object": "model",
+ "name": "Gemini 1.5 Pro",
+ "version": "1.0",
+ "description": "Gemini 1.5 Pro is a mid-size multimodal model that is optimized for a wide-range of reasoning tasks. 1.5 Pro can process large amounts of data at once, including 2 hours of video, 19 hours of audio, codebases with 60,000 lines of code, or 2,000 pages of text. ",
+ "inference_params": {
+ "max_tokens": 8192,
+ "temperature": 0.6,
+ "stream": true
+ },
+ "engine": "google_gemini"
+ }
+]
diff --git a/extensions/engine-management-extension/models/openai.json b/extensions/engine-management-extension/models/openai.json
index 8f59b42ea..7373118b3 100644
--- a/extensions/engine-management-extension/models/openai.json
+++ b/extensions/engine-management-extension/models/openai.json
@@ -82,7 +82,6 @@
"max_tokens": 100000,
"temperature": 1,
"top_p": 1,
- "stream": true,
"frequency_penalty": 0,
"presence_penalty": 0
},
diff --git a/extensions/engine-management-extension/models/openrouter.json b/extensions/engine-management-extension/models/openrouter.json
index b9714bb57..356ed2ddc 100644
--- a/extensions/engine-management-extension/models/openrouter.json
+++ b/extensions/engine-management-extension/models/openrouter.json
@@ -1,16 +1,61 @@
[
{
- "model": "open-router-auto",
+ "model": "deepseek/deepseek-r1:free",
"object": "model",
- "name": "OpenRouter",
+ "name": "DeepSeek: R1",
"version": "1.0",
- "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+ "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"inference_params": {
- "max_tokens": 128000,
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0,
- "presence_penalty": 0
+ "presence_penalty": 0,
+ "stream": true
+ },
+ "engine": "openrouter"
+ },
+ {
+ "model": "deepseek/deepseek-r1-distill-llama-70b:free",
+ "object": "model",
+ "name": "DeepSeek: R1 Distill Llama 70B",
+ "version": "1.0",
+ "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+ "inference_params": {
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "frequency_penalty": 0,
+ "presence_penalty": 0,
+ "stream": true
+ },
+ "engine": "openrouter"
+ },
+ {
+ "model": "deepseek/deepseek-r1-distill-llama-70b:free",
+ "object": "model",
+ "name": "DeepSeek: R1 Distill Llama 70B",
+ "version": "1.0",
+ "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+ "inference_params": {
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "frequency_penalty": 0,
+ "presence_penalty": 0,
+ "stream": true
+ },
+ "engine": "openrouter"
+ },
+ {
+ "model": "meta-llama/llama-3.1-405b-instruct:free",
+ "object": "model",
+ "name": "Meta: Llama 3.1 405B Instruct",
+ "version": "1.0",
+ "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+ "inference_params": {
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "frequency_penalty": 0,
+ "presence_penalty": 0,
+ "stream": true
},
"engine": "openrouter"
}
diff --git a/extensions/engine-management-extension/resources/anthropic.json b/extensions/engine-management-extension/resources/anthropic.json
index 02315147d..2b73edcc1 100644
--- a/extensions/engine-management-extension/resources/anthropic.json
+++ b/extensions/engine-management-extension/resources/anthropic.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-anthropic-extension",
+ "id": "anthropic",
"type": "remote",
"engine": "anthropic",
"url": "https://console.anthropic.com/settings/keys",
diff --git a/extensions/engine-management-extension/resources/cohere.json b/extensions/engine-management-extension/resources/cohere.json
index 7f18c9558..3d7bee48f 100644
--- a/extensions/engine-management-extension/resources/cohere.json
+++ b/extensions/engine-management-extension/resources/cohere.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-cohere-extension",
+ "id": "cohere",
"type": "remote",
"engine": "cohere",
"url": "https://dashboard.cohere.com/api-keys",
diff --git a/extensions/engine-management-extension/resources/deepseek.json b/extensions/engine-management-extension/resources/deepseek.json
new file mode 100644
index 000000000..1df8bc638
--- /dev/null
+++ b/extensions/engine-management-extension/resources/deepseek.json
@@ -0,0 +1,23 @@
+{
+ "id": "deepseek",
+ "type": "remote",
+ "engine": "deepseek",
+ "url": "https://platform.deepseek.com/api_keys",
+ "api_key": "",
+ "metadata": {
+ "get_models_url": "https://api.deepseek.com/models",
+ "header_template": "Authorization: Bearer {{api_key}}",
+ "transform_req": {
+ "chat_completions": {
+ "url": "https://api.deepseek.com/chat/completions",
+ "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+ }
+ },
+ "transform_resp": {
+ "chat_completions": {
+ "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+ }
+ },
+ "explore_models_url": "https://api-docs.deepseek.com/quick_start/pricing"
+ }
+}
diff --git a/extensions/engine-management-extension/resources/google_gemini.json b/extensions/engine-management-extension/resources/google_gemini.json
new file mode 100644
index 000000000..3b0ea6a2f
--- /dev/null
+++ b/extensions/engine-management-extension/resources/google_gemini.json
@@ -0,0 +1,23 @@
+{
+ "id": "google_gemini",
+ "type": "remote",
+ "engine": "google_gemini",
+ "url": "https://aistudio.google.com/apikey",
+ "api_key": "",
+ "metadata": {
+ "get_models_url": "https://generativelanguage.googleapis.com/v1beta/models",
+ "header_template": "Authorization: Bearer {{api_key}}",
+ "transform_req": {
+ "chat_completions": {
+ "url": "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
+ "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+ }
+ },
+ "transform_resp": {
+ "chat_completions": {
+ "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+ }
+ },
+ "explore_models_url": "https://ai.google.dev/gemini-api/docs/models/gemini"
+ }
+}
diff --git a/extensions/engine-management-extension/resources/groq.json b/extensions/engine-management-extension/resources/groq.json
index 67d6e0932..28f903980 100644
--- a/extensions/engine-management-extension/resources/groq.json
+++ b/extensions/engine-management-extension/resources/groq.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-groq-extension",
+ "id": "groq",
"type": "remote",
"engine": "groq",
"url": "https://console.groq.com/keys",
diff --git a/extensions/engine-management-extension/resources/martian.json b/extensions/engine-management-extension/resources/martian.json
index 6fb1cecf7..9caafcf88 100644
--- a/extensions/engine-management-extension/resources/martian.json
+++ b/extensions/engine-management-extension/resources/martian.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-martian-extension",
+ "id": "martian",
"type": "remote",
"engine": "martian",
"url": "https://withmartian.com/dashboard",
diff --git a/extensions/engine-management-extension/resources/mistral.json b/extensions/engine-management-extension/resources/mistral.json
index 3013d5f46..4a24471a2 100644
--- a/extensions/engine-management-extension/resources/mistral.json
+++ b/extensions/engine-management-extension/resources/mistral.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-mistral-extension",
+ "id": "mistral",
"type": "remote",
"engine": "mistral",
"url": "https://console.mistral.ai/api-keys/",
diff --git a/extensions/engine-management-extension/resources/nvidia.json b/extensions/engine-management-extension/resources/nvidia.json
index f703f8f18..98c303ed4 100644
--- a/extensions/engine-management-extension/resources/nvidia.json
+++ b/extensions/engine-management-extension/resources/nvidia.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-nvidia-extension",
+ "id": "nvidia",
"type": "remote",
"engine": "nvidia",
"url": "https://org.ngc.nvidia.com/setup/personal-keys",
diff --git a/extensions/engine-management-extension/resources/openai.json b/extensions/engine-management-extension/resources/openai.json
index 62260715c..16f2b1efa 100644
--- a/extensions/engine-management-extension/resources/openai.json
+++ b/extensions/engine-management-extension/resources/openai.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-openai-extension",
+ "id": "openai",
"type": "remote",
"engine": "openai",
"url": "https://platform.openai.com/account/api-keys",
@@ -10,7 +10,7 @@
"transform_req": {
"chat_completions": {
"url": "https://api.openai.com/v1/chat/completions",
- "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
+ "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% set first = false %} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
}
},
"transform_resp": {
diff --git a/extensions/engine-management-extension/resources/openrouter.json b/extensions/engine-management-extension/resources/openrouter.json
index 39872838d..608f6f2fb 100644
--- a/extensions/engine-management-extension/resources/openrouter.json
+++ b/extensions/engine-management-extension/resources/openrouter.json
@@ -1,5 +1,5 @@
{
- "id": "@janhq/inference-openrouter-extension",
+ "id": "openrouter",
"type": "remote",
"engine": "openrouter",
"url": "https://openrouter.ai/keys",
@@ -10,7 +10,7 @@
"transform_req": {
"chat_completions": {
"url": "https://openrouter.ai/api/v1/chat/completions",
- "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+ "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
}
},
"transform_resp": {
diff --git a/web/containers/ErrorMessage/index.tsx b/web/containers/ErrorMessage/index.tsx
index 0c0d1c97c..da866714e 100644
--- a/web/containers/ErrorMessage/index.tsx
+++ b/web/containers/ErrorMessage/index.tsx
@@ -89,6 +89,9 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
{message?.content[0]?.text?.value && (