feat: Add Jan API server Swagger UI (#6502)

* feat: Add Jan API server Swagger UI - Serve OpenAPI spec (`static/openapi.json`) directly from the proxy server. - Implement Swagger UI assets (`swagger-ui.css`, `swagger-ui-bundle.js`, `favicon.ico`) and a simple HTML wrapper under `/docs`. - Extend the proxy whitelist to include Swagger UI routes. - Add routing logic for `/openapi.json`, `/docs`, and Swagger UI static files. - Update whitelisted paths and integrate CORS handling for the new endpoints. * feat: serve Swagger UI at root path The Swagger UI endpoint previously lived under `/docs`. The route handling and exclusion list have been updated so the UI is now served directly at `/`. This simplifies access, aligns with the expected root URL in the Tauri frontend, and removes the now‑unused `/docs` path handling. * feat: add model loading state and translations for local API server Implemented a loading indicator for model startup, updated the start/stop button to reflect model loading and server starting states, and disabled interactions while pending. Added new translation keys (`loadingModel`, `startingServer`) across all supported locales (en, de, id, pl, vn, zh-CN, zh-TW) and integrated them into the UI. Included a small delay after model start to ensure backend state consistency. This improves user feedback and prevents race conditions during server initialization.
2025-09-19 09:11:55 +05:30 · 2025-09-19 09:11:55 +05:30 · d1a8bdc4e3
commit d1a8bdc4e3
parent 359dd8f41e
13 changed files with 835 additions and 15 deletions
--- a/src-tauri/src/core/server/proxy.rs
+++ b/src-tauri/src/core/server/proxy.rs
@ -215,7 +215,14 @@ async fn proxy_request(
    let path = get_destination_path(original_path, &config.prefix);
    let method = parts.method.clone();

-    let whitelisted_paths = ["/", "/openapi.json", "/favicon.ico"];
+    let whitelisted_paths = [
+        "/",
+        "/openapi.json",
+        "/favicon.ico",
+        "/docs/swagger-ui.css",
+        "/docs/swagger-ui-bundle.js",
+        "/docs/swagger-ui-standalone-preset.js",
+    ];
    let is_whitelisted_path = whitelisted_paths.contains(&path.as_str());

    if !is_whitelisted_path {
@ -448,6 +455,82 @@ async fn proxy_request(

            return Ok(response_builder.body(Body::from(body_str)).unwrap());
        }
+
+        (hyper::Method::GET, "/openapi.json") => {
+            let body = include_str!("../../../static/openapi.json"); // relative to src-tauri/src/
+            return Ok(Response::builder()
+                .status(StatusCode::OK)
+                .header(hyper::header::CONTENT_TYPE, "application/json")
+                .body(Body::from(body))
+                .unwrap());
+        }
+
+        // DOCS route
+        (hyper::Method::GET, "/") => {
+            let html = r#"
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <title>API Docs</title>
+  <link rel="stylesheet" type="text/css" href="/docs/swagger-ui.css">
+</head>
+<body>
+  <div id="swagger-ui"></div>
+  <script src="/docs/swagger-ui-bundle.js"></script>
+  <script>
+  window.onload = () => {
+    SwaggerUIBundle({
+      url: '/openapi.json',
+      dom_id: '#swagger-ui',
+    });
+  };
+  </script>
+</body>
+</html>
+    "#;
+
+            let mut response_builder = Response::builder()
+                .status(StatusCode::OK)
+                .header(hyper::header::CONTENT_TYPE, "text/html");
+
+            response_builder = add_cors_headers_with_host_and_origin(
+                response_builder,
+                &host_header,
+                &origin_header,
+                &config.trusted_hosts,
+            );
+
+            return Ok(response_builder.body(Body::from(html)).unwrap());
+        }
+
+        (hyper::Method::GET, "/docs/swagger-ui.css") => {
+            let css = include_str!("../../../static/swagger-ui/swagger-ui.css");
+            return Ok(Response::builder()
+                .status(StatusCode::OK)
+                .header(hyper::header::CONTENT_TYPE, "text/css")
+                .body(Body::from(css))
+                .unwrap());
+        }
+
+        (hyper::Method::GET, "/docs/swagger-ui-bundle.js") => {
+            let js = include_str!("../../../static/swagger-ui/swagger-ui-bundle.js");
+            return Ok(Response::builder()
+                .status(StatusCode::OK)
+                .header(hyper::header::CONTENT_TYPE, "application/javascript")
+                .body(Body::from(js))
+                .unwrap());
+        }
+
+        (hyper::Method::GET, "/favicon.ico") => {
+            let icon = include_bytes!("../../../static/swagger-ui/favicon.ico");
+            return Ok(Response::builder()
+                .status(StatusCode::OK)
+                .header(hyper::header::CONTENT_TYPE, "image/x-icon")
+                .body(Body::from(icon.as_ref()))
+                .unwrap());
+        }
+
        _ => {
            let is_explicitly_whitelisted_get = method == hyper::Method::GET
                && whitelisted_paths.contains(&destination_path.as_str());
--- a/src-tauri/static/openapi.json
+++ b/src-tauri/static/openapi.json
@ -0,0 +1,690 @@
+{
+  "openapi": "3.0.0",
+  "info": {
+    "title": "Jan API Server Endpoints",
+    "description": "Jan API server provides a REST API interface for seamless interaction with large language models (LLMs) for third party apps. Compatible with the OpenAI API spec, it enables straightforward API access of models available in Jan.",
+    "version": "1.0"
+  },
+  "servers": [
+    {
+      "url": "http://localhost:1337/v1",
+      "description": "Jan API server"
+    }
+  ],
+  "tags": [
+    {
+      "name": "Models",
+      "description": "Endpoints for model discovery and management"
+    },
+    {
+      "name": "Inference",
+      "description": "Endpoint for generating completions (chat or text) from a model"
+    }
+  ],
+  "paths": {
+    "/models": {
+      "get": {
+        "summary": "List loaded models",
+        "description": "Returns information about the loaded model(s). The list always contains a single element describing the current model. See the OpenAI *Models* API documentation for details.",
+        "operationId": "listModels",
+        "tags": ["Models"],
+        "responses": {
+          "200": {
+            "description": "A list containing a single model object",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ListModelsResponseDto"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/chat/completion": {
+      "post": {
+        "summary": "Create chat completion",
+        "description": "Generates a completion for the supplied prompt. Streaming mode is supported. All extra options described in the documentation are optional and follow the OpenAI‑compatible naming.",
+        "operationId": "createChatCompletion",
+        "tags": ["Inference"],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/CreateChatCompletionDto"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Completion result",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ChatCompletionResponseDto"
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  },
+  "components": {
+    "securitySchemes": {
+      "bearerAuth": {
+        "type": "http",
+        "scheme": "bearer",
+        "bearerFormat": "API-Key",
+        "description": "API key required for all endpoints."
+      }
+    },
+    "schemas": {
+      "ModelDto": {
+        "type": "object",
+        "description": "Model metadata as returned by `/v1/models`.",
+        "properties": {
+          "source_url": {
+            "type": "string",
+            "description": "URL to the source of the model."
+          },
+          "id": {
+            "type": "string",
+            "description": "Unique identifier for the model (normally the file path)."
+          },
+          "object": {
+            "type": "string",
+            "enum": ["model"]
+          },
+          "name": {
+            "type": "string",
+            "description": "Human‑readable name of the model."
+          },
+          "version": {
+            "type": "string",
+            "default": "1.0",
+            "description": "Version string of the model."
+          },
+          "description": {
+            "type": "string",
+            "description": "Long description of the model."
+          },
+          "format": {
+            "type": "string",
+            "description": "File format (e.g., gguf)."
+          },
+          "ctx_len": {
+            "type": "integer",
+            "description": "Context length the model was trained with."
+          },
+          "prompt_template": {
+            "type": "string",
+            "description": "Template used to build prompts."
+          },
+          "temperature": {
+            "type": "number",
+            "description": "Default temperature for generation."
+          },
+          "top_p": {
+            "type": "number",
+            "description": "Default nucleus‑sampling probability."
+          },
+          "stream": {
+            "type": "boolean",
+            "description": "Whether streaming is enabled by default."
+          },
+          "max_tokens": {
+            "type": "integer",
+            "description": "Maximum tokens the model can generate."
+          },
+          "stop": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Default stop sequences."
+          },
+          "frequency_penalty": {
+            "type": "number",
+            "description": "Default frequency penalty."
+          },
+          "presence_penalty": {
+            "type": "number",
+            "description": "Default presence penalty."
+          },
+          "author": {
+            "type": "string"
+          },
+          "tags": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "size": {
+            "type": "integer"
+          },
+          "cover": {
+            "type": "string"
+          },
+          "engine": {
+            "type": "string"
+          }
+        },
+        "required": [
+          "source_url",
+          "id",
+          "object",
+          "name",
+          "version",
+          "description",
+          "format",
+          "ctx_len",
+          "prompt_template",
+          "temperature",
+          "top_p",
+          "stream",
+          "max_tokens",
+          "stop",
+          "frequency_penalty",
+          "presence_penalty",
+          "author",
+          "tags",
+          "size",
+          "cover",
+          "engine"
+        ]
+      },
+      "ListModelsResponseDto": {
+        "type": "object",
+        "description": "Response for `GET /v1/models` – a list that always contains a single model entry.",
+        "properties": {
+          "object": {
+            "type": "string",
+            "enum": ["list"]
+          },
+          "data": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ModelDto"
+            }
+          }
+        },
+        "required": ["object", "data"]
+      },
+      "CreateChatCompletionDto": {
+        "type": "object",
+        "description": "Request body for `POST /v1/chat/completion`. All fields follow the OpenAI naming; fields marked *optional* are not required.",
+        "properties": {
+          "model": {
+            "type": "string",
+            "description": "The unique identifier of the model to use."
+          },
+          "messages": {
+            "type": "array",
+            "description": "Array of chat messages that form the prompt.",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionMessage"
+            }
+          },
+          "stream": {
+            "type": "boolean",
+            "description": "If true, the server streams tokens as they are generated.",
+            "default": false
+          },
+          "max_tokens": {
+            "type": "integer",
+            "description": "Maximum number of tokens to generate."
+          },
+          "stop": {
+            "type": "array",
+            "description": "Stop sequences – generation stops before emitting any of these strings.",
+            "items": {
+              "type": "string"
+            },
+            "default": []
+          },
+          "temperature": {
+            "type": "number",
+            "description": "Adjusts randomness of the output.",
+            "default": 0.8
+          },
+          "dynatemp_range": {
+            "type": "number",
+            "description": "Dynamic‑temperature range – final temperature is sampled from `[temperature‑range, temperature+range]`.",
+            "default": 0
+          },
+          "dynatemp_exponent": {
+            "type": "number",
+            "description": "Exponent for dynamic‑temperature scaling.",
+            "default": 1
+          },
+          "top_k": {
+            "type": "integer",
+            "description": "Restricts sampling to the K most probable tokens.",
+            "default": 40
+          },
+          "top_p": {
+            "type": "number",
+            "description": "Nucleus sampling cutoff (cumulative probability).",
+            "default": 0.95
+          },
+          "min_p": {
+            "type": "number",
+            "description": "Minimum probability for a token to be considered relative to the most likely token.",
+            "default": 0.05
+          },
+          "typical_p": {
+            "type": "number",
+            "description": "Enable locally typical sampling with parameter p.",
+            "default": 1.0
+          },
+          "n_predict": {
+            "type": "integer",
+            "description": "Maximum number of tokens to predict; -1 = unlimited, 0 = evaluate prompt only.",
+            "default": -1
+          },
+          "n_indent": {
+            "type": "integer",
+            "description": "Minimum line indentation for generated code.",
+            "default": 0
+          },
+          "n_keep": {
+            "type": "integer",
+            "description": "Tokens from the prompt to retain when context is exceeded. 0 = none, -1 = all.",
+            "default": 0
+          },
+          "presence_penalty": {
+            "type": "number",
+            "description": "Presence penalty (0.0 = disabled).",
+            "default": 0
+          },
+          "frequency_penalty": {
+            "type": "number",
+            "description": "Frequency penalty (0.0 = disabled).",
+            "default": 0
+          },
+          "repeat_penalty": {
+            "type": "number",
+            "description": "Repetition penalty for token sequences.",
+            "default": 1.1
+          },
+          "repeat_last_n": {
+            "type": "integer",
+            "description": "How many last tokens to consider for repeat penalty (0 = disabled, -1 = context size).",
+            "default": 64
+          },
+          "dry_multiplier": {
+            "type": "number",
+            "description": "DRY (Don’t Repeat Yourself) multiplier (0.0 = disabled).",
+            "default": 0
+          },
+          "dry_base": {
+            "type": "number",
+            "description": "DRY base value.",
+            "default": 1.75
+          },
+          "dry_allowed_length": {
+            "type": "integer",
+            "description": "Length after which DRY penalty grows exponentially.",
+            "default": 2
+          },
+          "dry_penalty_last_n": {
+            "type": "integer",
+            "description": "How many tokens to scan for DRY repetitions (0 = disabled, -1 = context size).",
+            "default": -1
+          },
+          "dry_sequence_breakers": {
+            "type": "array",
+            "description": "Sequence breakers for DRY sampling.",
+            "items": {
+              "type": "string"
+            },
+            "default": ["\\n", ":", "\"", "*"]
+          },
+          "xtc_probability": {
+            "type": "number",
+            "description": "Probability for token removal via XTC sampler (0.0 = disabled).",
+            "default": 0
+          },
+          "xtc_threshold": {
+            "type": "number",
+            "description": "Minimum probability threshold for XTC.",
+            "default": 0.1
+          },
+          "mirostat": {
+            "type": "integer",
+            "description": "Enable Mirostat sampling (0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0).",
+            "default": 0
+          },
+          "mirostat_tau": {
+            "type": "number",
+            "description": "Target entropy for Mirostat.",
+            "default": 5
+          },
+          "mirostat_eta": {
+            "type": "number",
+            "description": "Learning rate for Mirostat.",
+            "default": 0.1
+          },
+          "grammar": {
+            "type": "string",
+            "description": "Grammar for grammar‑based sampling."
+          },
+          "json_schema": {
+            "type": "object",
+            "description": "JSON schema for grammar‑based sampling."
+          },
+          "seed": {
+            "type": "integer",
+            "description": "RNG seed; -1 = random.",
+            "default": -1
+          },
+          "ignore_eos": {
+            "type": "boolean",
+            "description": "Continue generating after EOS token.",
+            "default": false
+          },
+          "logit_bias": {
+            "type": "object",
+            "additionalProperties": {
+              "oneOf": [
+                {
+                  "type": "number"
+                },
+                {
+                  "type": "boolean"
+                }
+              ]
+            },
+            "description": "Modify the likelihood of a token appearing. Accepts a JSON object mapping token IDs to a bias value from -100 to 100, or `false` to ban a token.",
+            "default": {}
+          },
+          "n_probs": {
+            "type": "integer",
+            "description": "If > 0, top-N token probabilities are returned.",
+            "default": 0
+          },
+          "min_keep": {
+            "type": "integer",
+            "description": "Force samplers to return at least N tokens.",
+            "default": 0
+          },
+          "t_max_predict_ms": {
+            "type": "integer",
+            "description": "Maximum generation time in milliseconds (0 = disabled).",
+            "default": 0
+          },
+          "id_slot": {
+            "type": "integer",
+            "description": "Assign the request to a specific inference slot (-1 = auto).",
+            "default": -1
+          },
+          "cache_prompt": {
+            "type": "boolean",
+            "description": "Reuse KV cache from previous requests when possible.",
+            "default": true
+          },
+          "return_tokens": {
+            "type": "boolean",
+            "description": "Include raw token IDs in the response.",
+            "default": false
+          },
+          "samplers": {
+            "type": "array",
+            "description": "Ordered list of samplers to apply.",
+            "items": {
+              "type": "string"
+            },
+            "default": [
+              "dry",
+              "top_k",
+              "typ_p",
+              "top_p",
+              "min_p",
+              "xtc",
+              "temperature"
+            ]
+          },
+          "timings_per_token": {
+            "type": "boolean",
+            "description": "Include timing information per token.",
+            "default": false
+          },
+          "return_progress": {
+            "type": "boolean",
+            "description": "Include prompt‑processing progress when streaming.",
+            "default": false
+          },
+          "post_sampling_probs": {
+            "type": "boolean",
+            "description": "Return top‑N probabilities *after* sampling.",
+            "default": false
+          },
+          "response_fields": {
+            "type": "array",
+            "description": "Select which fields to include in the response.",
+            "items": {
+              "type": "string"
+            }
+          },
+          "lora": {
+            "type": "array",
+            "description": "LoRA adapters to apply for this request.",
+            "items": {
+              "type": "object",
+              "properties": {
+                "id": {
+                  "type": "integer"
+                },
+                "scale": {
+                  "type": "number"
+                }
+              },
+              "required": ["id", "scale"]
+            }
+          },
+          "multimodal_data": {
+            "type": "array",
+            "description": "Base64‑encoded multimodal data (images, audio, …). Must match the number of `<__media__>` markers in the prompt.",
+            "items": {
+              "type": "string"
+            }
+          }
+        },
+        "required": ["model", "messages"]
+      },
+      "ChatCompletionResponseDto": {
+        "type": "object",
+        "description": "Response from `POST /v1/chat/completion`.",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "Unique identifier for the completion."
+          },
+          "object": {
+            "type": "string",
+            "enum": ["chat.completion"]
+          },
+          "created": {
+            "type": "integer",
+            "description": "Unix timestamp of creation."
+          },
+          "model": {
+            "type": "string",
+            "description": "Model used for the completion."
+          },
+          "choices": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChoiceDto"
+            },
+            "description": "List of generated choices."
+          },
+          "usage": {
+            "$ref": "#/components/schemas/UsageDto"
+          },
+          "system_fingerprint": {
+            "type": "string",
+            "description": "Fingerprint of the system configuration used."
+          },
+          "generation_settings": {
+            "type": "object",
+            "description": "The generation settings used for the completion."
+          },
+          "prompt": {
+            "type": "string",
+            "description": "The processed prompt."
+          },
+          "truncated": {
+            "type": "boolean",
+            "description": "True if the context size was exceeded."
+          },
+          "tokens_cached": {
+            "type": "integer",
+            "description": "Number of tokens from the prompt which were reused from a cache."
+          },
+          "tokens_evaluated": {
+            "type": "integer",
+            "description": "Number of tokens evaluated in total from the prompt."
+          }
+        },
+        "required": ["id", "object", "created", "model", "choices", "usage"]
+      },
+      "ChatCompletionMessage": {
+        "type": "object",
+        "description": "A single turn in a chat conversation.",
+        "properties": {
+          "role": {
+            "type": "string",
+            "enum": ["system", "assistant", "user"],
+            "description": "Who sent the message."
+          },
+          "content": {
+            "type": "string",
+            "description": "The textual content of the message."
+          }
+        },
+        "required": ["role", "content"]
+      },
+      "ChoiceDto": {
+        "type": "object",
+        "properties": {
+          "index": {
+            "type": "integer"
+          },
+          "message": {
+            "$ref": "#/components/schemas/ChatCompletionMessage"
+          },
+          "finish_reason": {
+            "type": "string",
+            "description": "Why the generation stopped (e.g., `stop`, `length`, `model`)."
+          },
+          "logprobs": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatChoiceLogprobs"
+            },
+            "description": "Log probability information for the choice, if requested.",
+            "nullable": true
+          }
+        },
+        "required": ["index", "message", "finish_reason"]
+      },
+      "UsageDto": {
+        "type": "object",
+        "description": "Token usage statistics.",
+        "properties": {
+          "prompt_tokens": {
+            "type": "integer"
+          },
+          "completion_tokens": {
+            "type": "integer"
+          },
+          "total_tokens": {
+            "type": "integer"
+          }
+        },
+        "required": ["prompt_tokens", "completion_tokens", "total_tokens"]
+      },
+      "LogprobContent": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "integer",
+            "description": "The token ID."
+          },
+          "token": {
+            "type": "string",
+            "description": "The token string."
+          },
+          "logprob": {
+            "type": "number",
+            "description": "The log probability of this token."
+          },
+          "prob": {
+            "type": "number",
+            "description": "The probability of this token (if post_sampling_probs is true)."
+          },
+          "bytes": {
+            "type": "array",
+            "items": {
+              "type": "integer"
+            },
+            "description": "The token represented as a list of bytes."
+          }
+        }
+      },
+      "ChatChoiceLogprobs": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "integer",
+            "description": "The token ID."
+          },
+          "token": {
+            "type": "string",
+            "description": "The most likely token."
+          },
+          "logprob": {
+            "type": "number",
+            "description": "The log probability of the most likely token."
+          },
+          "prob": {
+            "type": "number",
+            "description": "The probability of the most likely token (if post_sampling_probs is true)."
+          },
+          "bytes": {
+            "type": "array",
+            "items": {
+              "type": "integer"
+            },
+            "description": "The most likely token represented as a list of bytes."
+          },
+          "top_logprobs": {
+            "type": "array",
+            "description": "List of the most likely tokens and their log probs.",
+            "items": {
+              "$ref": "#/components/schemas/LogprobContent"
+            }
+          },
+          "top_probs": {
+            "type": "array",
+            "description": "List of the most likely tokens and their probs (if post_sampling_probs is true).",
+            "items": {
+              "$ref": "#/components/schemas/LogprobContent"
+            }
+          }
+        }
+      }
+    }
+  },
+  "security": [
+    {
+      "bearerAuth": []
+    }
+  ]
+}
--- a/src-tauri/static/swagger-ui/favicon.ico
+++ b/src-tauri/static/swagger-ui/favicon.ico
--- a/src-tauri/static/swagger-ui/swagger-ui-bundle.js
+++ b/src-tauri/static/swagger-ui/swagger-ui-bundle.js
--- a/src-tauri/static/swagger-ui/swagger-ui.css
+++ b/src-tauri/static/swagger-ui/swagger-ui.css
--- a/web-app/src/locales/de-DE/settings.json
+++ b/web-app/src/locales/de-DE/settings.json
@ -160,7 +160,9 @@
  "localApiServer": {
    "title": "Lokaler API Server",
    "description": "Führe lokal einen OpenAI-kompatiblen Server aus.",
-    "startServer": "Start Server",
+    "startServer": "Server starten",
+    "loadingModel": "Modell laden",
+    "startingServer": "Server wird gestartet",
    "stopServer": "Stop Server",
    "serverLogs": "Server Logs",
    "serverLogsDesc": "Zeige detaillierte Logs des lokalen API-Servers an.",
--- a/web-app/src/locales/en/settings.json
+++ b/web-app/src/locales/en/settings.json
@ -161,6 +161,8 @@
    "title": "Local API Server",
    "description": "Run an OpenAI-compatible server locally.",
    "startServer": "Start Server",
+    "loadingModel": "Loading Model",
+    "startingServer": "Starting Server",
    "stopServer": "Stop Server",
    "serverLogs": "Server Logs",
    "serverLogsDesc": "View detailed logs of the local API server.",
--- a/web-app/src/locales/id/settings.json
+++ b/web-app/src/locales/id/settings.json
@ -161,6 +161,8 @@
    "title": "Server API Lokal",
    "description": "Jalankan server yang kompatibel dengan OpenAI secara lokal.",
    "startServer": "Mulai Server",
+    "loadingModel": "Memuat Model",
+    "startingServer": "Memulai Server",
    "stopServer": "Hentikan Server",
    "serverLogs": "Log Server",
    "serverLogsDesc": "Lihat log terperinci dari server API lokal.",
@ -261,4 +263,4 @@
    "updateSuccess": "Llamacpp berhasil diperbarui",
    "updateError": "Gagal memperbarui Llamacpp"
  }
-}
+}
--- a/web-app/src/locales/pl/settings.json
+++ b/web-app/src/locales/pl/settings.json
@ -161,6 +161,8 @@
    "title": "Lokalny Serwer API",
    "description": "Używaj lokalnego serwera kompatybilnego z API OpenAI.",
    "startServer": "Uruchom Serwer",
+    "loadingModel": "Ładowanie modelu",
+    "startingServer": "Uruchamianie serwera",
    "stopServer": "Zatrzymaj Serwer",
    "serverLogs": "Dzienniki Serwera",
    "serverLogsDesc": "Wyświetl szczegółowe dzienniki lokalnego serwera API.",
--- a/web-app/src/locales/vn/settings.json
+++ b/web-app/src/locales/vn/settings.json
@ -161,6 +161,8 @@
    "title": "Máy chủ API cục bộ",
    "description": "Chạy máy chủ tương thích với OpenAI cục bộ.",
    "startServer": "Khởi động máy chủ",
+    "loadingModel": "Đang tải mô hình",
+    "startingServer": "Đang khởi động máy chủ",
    "stopServer": "Dừng máy chủ",
    "serverLogs": "Nhật ký máy chủ",
    "serverLogsDesc": "Xem nhật ký chi tiết của máy chủ API cục bộ.",
@ -261,4 +263,4 @@
    "updateSuccess": "Cập nhật Llamacpp thành công",
    "updateError": "Không thể cập nhật Llamacpp"
  }
-}
+}
--- a/web-app/src/locales/zh-CN/settings.json
+++ b/web-app/src/locales/zh-CN/settings.json
@ -161,6 +161,8 @@
    "title": "本地 API 服务器",
    "description": "在本地运行与 OpenAI 兼容的服务器。",
    "startServer": "启动服务器",
+    "loadingModel": "加载模型",
+    "startingServer": "正在启动服务器",
    "stopServer": "停止服务器",
    "serverLogs": "服务器日志",
    "serverLogsDesc": "查看本地 API 服务器的详细日志。",
@ -261,4 +263,5 @@
    "updateSuccess": "Llamacpp 更新成功",
    "updateError": "更新 Llamacpp 失败"
  }
-}
+}
+
--- a/web-app/src/locales/zh-TW/settings.json
+++ b/web-app/src/locales/zh-TW/settings.json
@ -161,6 +161,8 @@
    "title": "本機 API 伺服器",
    "description": "在本機執行與 OpenAI 相容的伺服器。",
    "startServer": "啟動伺服器",
+    "loadingModel": "載入模型",
+    "startingServer": "正在啟動伺服器",
    "stopServer": "停止伺服器",
    "serverLogs": "伺服器日誌",
    "serverLogsDesc": "檢視本機 API 伺服器的詳細日誌。",
--- a/web-app/src/routes/settings/local-api-server.tsx
+++ b/web-app/src/routes/settings/local-api-server.tsx
@ -64,11 +64,14 @@ function LocalAPIServerContent() {

  useEffect(() => {
    const checkServerStatus = async () => {
-      serviceHub.app().getServerStatus().then((running) => {
-        if (running) {
-          setServerStatus('running')
-        }
-      })
+      serviceHub
+        .app()
+        .getServerStatus()
+        .then((running) => {
+          if (running) {
+            setServerStatus('running')
+          }
+        })
    }
    checkServerStatus()
  }, [serviceHub, setServerStatus])
@ -125,6 +128,8 @@ function LocalAPIServerContent() {
    return null
  }

+  const [isModelLoading, setIsModelLoading] = useState(false)
+
  const toggleAPIServer = async () => {
    // Validate API key before starting server
    if (serverStatus === 'stopped') {
@ -144,12 +149,20 @@ function LocalAPIServerContent() {
      }

      setServerStatus('pending')
+      setIsModelLoading(true) // Start loading state

      // Start the model first
-      serviceHub.models().startModel(modelToStart.provider, modelToStart.model)
+      serviceHub
+        .models()
+        .startModel(modelToStart.provider, modelToStart.model)
        .then(() => {
          console.log(`Model ${modelToStart.model} started successfully`)
+          setIsModelLoading(false) // Model loaded, stop loading state

+          // Add a small delay for the backend to update state
+          return new Promise((resolve) => setTimeout(resolve, 500))
+        })
+        .then(() => {
          // Then start the server
          return window.core?.api?.startServer({
            host: serverHost,
@ -168,6 +181,7 @@ function LocalAPIServerContent() {
        .catch((error: unknown) => {
          console.error('Error starting server:', error)
          setServerStatus('stopped')
+          setIsModelLoading(false) // Reset loading state on error
        })
    } else {
      setServerStatus('pending')
@ -183,6 +197,18 @@ function LocalAPIServerContent() {
    }
  }

+  const getButtonText = () => {
+    if (isModelLoading) {
+      return t('settings:localApiServer.loadingModel') // TODO: Update this translation
+    }
+    if (serverStatus === 'pending' && !isModelLoading) {
+      return t('settings:localApiServer.startingServer') // TODO: Update this translation
+    }
+    return isServerRunning
+      ? t('settings:localApiServer.stopServer')
+      : t('settings:localApiServer.startServer')
+  }
+
  const handleOpenLogs = async () => {
    try {
      await serviceHub.window().openLocalApiServerLogsWindow()
@ -219,10 +245,9 @@ function LocalAPIServerContent() {
                      onClick={toggleAPIServer}
                      variant={isServerRunning ? 'destructive' : 'default'}
                      size="sm"
+                      disabled={serverStatus === 'pending'} // Disable during any loading state
                    >
-                      {isServerRunning
-                        ? t('settings:localApiServer.stopServer')
-                        : t('settings:localApiServer.startServer')}
+                      {getButtonText()}
                    </Button>
                  </div>
                </div>
@ -317,7 +342,9 @@ function LocalAPIServerContent() {
              <CardItem
                title={t('settings:localApiServer.proxyTimeout')}
                description={t('settings:localApiServer.proxyTimeoutDesc')}
-                actions={<ProxyTimeoutInput isServerRunning={isServerRunning} />}
+                actions={
+                  <ProxyTimeoutInput isServerRunning={isServerRunning} />
+                }
              />
            </Card>