jan/website/public/openapi/openapi.json

{
  "openapi": "3.1.0",
  "info": {
    "title": "👋Jan API",
    "description": "OpenAI-compatible API for local inference with Jan. Run AI models locally with complete privacy using llama.cpp's high-performance inference engine. Supports GGUF models with CPU and GPU acceleration. No authentication required for local usage.",
    "version": "0.3.14",
    "contact": {
      "name": "Jan Support",
      "url": "https://jan.ai/support",
      "email": "support@jan.ai"
    },
    "license": {
      "name": "Apache 2.0",
      "url": "https://github.com/janhq/jan/blob/main/LICENSE"
    }
  },
  "servers": [
    {
      "url": "http://127.0.0.1:1337",
      "description": "Local Jan Server (Default IP)"
    },
    {
      "url": "http://localhost:1337",
      "description": "Local Jan Server (localhost)"
    },
    {
      "url": "http://localhost:8080",
      "description": "Local Jan Server (Alternative Port)"
    }
  ],
  "tags": [
    {
      "name": "Models",
      "description": "List and describe available models"
    },
    {
      "name": "Chat",
      "description": "Chat completion endpoints for conversational AI"
    },
    {
      "name": "Completions",
      "description": "Text completion endpoints for generating text"
    },
    {
      "name": "Extras",
      "description": "Additional utility endpoints for tokenization and text processing"
    }
  ],
  "paths": {
    "/v1/completions": {
      "post": {
        "tags": ["Completions"],
        "summary": "Create completion",
        "description": "Creates a completion for the provided prompt and parameters. This endpoint is compatible with OpenAI's completions API.",
        "operationId": "create_completion",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/CreateCompletionRequest"
              },
              "examples": {
                "basic": {
                  "summary": "Basic Completion",
                  "description": "Simple text completion example",
                  "value": {
                    "model": "gemma-2-2b-it-Q8_0",
                    "prompt": "Once upon a time",
                    "max_tokens": 50,
                    "temperature": 0.7
                  }
                },
                "creative": {
                  "summary": "Creative Writing",
                  "description": "Generate creative content with higher temperature",
                  "value": {
                    "model": "gemma-2-2b-it-Q8_0",
                    "prompt": "Write a short poem about coding:",
                    "max_tokens": 150,
                    "temperature": 1,
                    "top_p": 0.95
                  }
                },
                "code": {
                  "summary": "Code Generation",
                  "description": "Generate code with lower temperature for accuracy",
                  "value": {
                    "model": "gemma-2-2b-it-Q8_0",
                    "prompt": "# Python function to calculate fibonacci\ndef fibonacci(n):",
                    "max_tokens": 200,
                    "temperature": 0.3,
                    "stop": ["\n\n", "def ", "class "]
                  }
                },
                "streaming": {
                  "summary": "Streaming Response",
                  "description": "Stream tokens as they are generated",
                  "value": {
                    "model": "gemma-2-2b-it-Q8_0",
                    "prompt": "Explain quantum computing in simple terms:",
                    "max_tokens": 300,
                    "temperature": 0.7,
                    "stream": true
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful Response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/CreateCompletionResponse"
                }
              }
            }
          },
          "202": {
            "description": "Accepted - Request is being processed",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/CreateCompletionResponse"
                }
              },
              "text/event-stream": {
                "schema": {
                  "type": "string",
                  "format": "binary",
                  "description": "Server-sent events stream for streaming responses"
                }
              }
            }
          },
          "422": {
            "description": "Validation Error",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ValidationError"
                }
              }
            }
          }
        }
      }
    },
    "/v1/chat/completions": {
      "post": {
        "tags": ["Chat"],
        "summary": "Create chat completion",
        "description": "Creates a model response for the given chat conversation. This endpoint is compatible with OpenAI's chat completions API.",
        "operationId": "create_chat_completion",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/CreateChatCompletionRequest"
              },
              "examples": {
                "simple": {
                  "summary": "Simple Chat",
                  "description": "Basic question and answer",
                  "value": {
                    "model": "gemma-2-2b-it-Q8_0",
                    "messages": [
                      {
                        "role": "user",
                        "content": "What is the capital of France?"
                      }
                    ],
                    "max_tokens": 100,
                    "temperature": 0.7
                  }
                },
                "system": {
                  "summary": "With System Message",
                  "description": "Chat with system instructions",
                  "value": {
                    "model": "gemma-2-2b-it-Q8_0",
                    "messages": [
                      {
                        "role": "system",
                        "content": "You are a helpful assistant that speaks like a pirate."
                      },
                      {
                        "role": "user",
                        "content": "Tell me about the weather today."
                      }
                    ],
                    "max_tokens": 150,
                    "temperature": 0.8
                  }
                },
                "conversation": {
                  "summary": "Multi-turn Conversation",
                  "description": "Extended conversation with context",
                  "value": {
                    "model": "gemma-2-2b-it-Q8_0",
                    "messages": [
                      {
                        "role": "system",
                        "content": "You are a knowledgeable AI assistant."
                      },
                      {
                        "role": "user",
                        "content": "What is machine learning?"
                      },
                      {
                        "role": "assistant",
                        "content": "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed."
                      },
                      {
                        "role": "user",
                        "content": "Can you give me a simple example?"
                      }
                    ],
                    "max_tokens": 200,
                    "temperature": 0.7
                  }
                },
                "streaming": {
                  "summary": "Streaming Chat",
                  "description": "Stream the response token by token",
                  "value": {
                    "model": "gemma-2-2b-it-Q8_0",
                    "messages": [
                      {
                        "role": "user",
                        "content": "Write a haiku about programming"
                      }
                    ],
                    "stream": true,
                    "temperature": 0.9
                  }
                },
                "json_mode": {
                  "summary": "JSON Response",
                  "description": "Request structured JSON output",
                  "value": {
                    "model": "gemma-2-2b-it-Q8_0",
                    "messages": [
                      {
                        "role": "user",
                        "content": "List 3 programming languages with their main use cases in JSON format"
                      }
                    ],
                    "max_tokens": 200,
                    "temperature": 0.5,
                    "response_format": {
                      "type": "json_object"
                    }
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful Response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/CreateChatCompletionResponse"
                }
              },
              "text/event-stream": {
                "schema": {
                  "type": "string",
                  "format": "binary",
                  "description": "Server-sent events stream for streaming responses"
                }
              }
            }
          },
          "202": {
            "description": "Accepted - Request is being processed",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/CreateChatCompletionResponse"
                }
              },
              "text/event-stream": {
                "schema": {
                  "type": "string",
                  "format": "binary",
                  "description": "Server-sent events stream for streaming responses"
                }
              }
            }
          },
          "422": {
            "description": "Validation Error",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ValidationError"
                }
              }
            }
          }
        }
      }
    },
    "/v1/models": {
      "get": {
        "tags": ["Models"],
        "summary": "List available models",
        "description": "Lists the currently available models and provides basic information about each one such as the owner and availability.",
        "operationId": "list_models",
        "responses": {
          "200": {
            "description": "Successful Response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ModelList"
                },
                "example": {
                  "object": "list",
                  "data": [
                    {
                      "id": "gemma-2-2b-it-Q8_0",
                      "object": "model",
                      "created": 1686935002,
                      "owned_by": "jan"
                    },
                    {
                      "id": "llama-3.1-8b-instruct-Q4_K_M",
                      "object": "model",
                      "created": 1686935002,
                      "owned_by": "jan"
                    },
                    {
                      "id": "mistral-7b-instruct-v0.3-Q4_K_M",
                      "object": "model",
                      "created": 1686935002,
                      "owned_by": "jan"
                    },
                    {
                      "id": "phi-3-mini-4k-instruct-Q4_K_M",
                      "object": "model",
                      "created": 1686935002,
                      "owned_by": "jan"
                    }
                  ]
                }
              }
            }
          }
        }
      }
    },
    "/extras/tokenize": {
      "post": {
        "tags": ["Extras"],
        "summary": "Tokenize text",
        "description": "Convert text input into tokens using the model's tokenizer.",
        "operationId": "tokenize",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/TokenizeRequest"
              },
              "example": {
                "input": "Hello, world!",
                "model": "gemma-2-2b-it-Q8_0"
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful Response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/TokenizeResponse"
                },
                "example": {
                  "tokens": [15339, 11, 1917, 0]
                }
              }
            }
          }
        }
      }
    },
    "/extras/tokenize/count": {
      "post": {
        "tags": ["Extras"],
        "summary": "Count tokens",
        "description": "Count the number of tokens in the provided text.",
        "operationId": "count_tokens",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/TokenizeRequest"
              },
              "example": {
                "input": "How many tokens does this text have?",
                "model": "gemma-2-2b-it-Q8_0"
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful Response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/TokenCountResponse"
                },
                "example": {
                  "count": 8
                }
              }
            }
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "TokenizeRequest": {
        "type": "object",
        "properties": {
          "input": {
            "type": "string",
            "description": "The text to tokenize"
          },
          "model": {
            "type": "string",
            "description": "The model to use for tokenization",
            "enum": [
              "gemma-2-2b-it-Q8_0",
              "llama-3.1-8b-instruct-Q4_K_M",
              "mistral-7b-instruct-v0.3-Q4_K_M",
              "phi-3-mini-4k-instruct-Q4_K_M"
            ]
          }
        },
        "required": ["input"]
      },
      "TokenizeResponse": {
        "type": "object",
        "properties": {
          "tokens": {
            "type": "array",
            "items": {
              "type": "integer"
            },
            "description": "Array of token IDs"
          }
        },
        "required": ["tokens"]
      },
      "TokenCountResponse": {
        "type": "object",
        "properties": {
          "count": {
            "type": "integer",
            "description": "Number of tokens"
          }
        },
        "required": ["count"]
      }
    },
    "securitySchemes": {
      "bearerAuth": {
        "type": "http",
        "scheme": "bearer",
        "bearerFormat": "JWT",
        "description": "Optional: Enter your API key if authentication is enabled. The Bearer prefix will be added automatically."
      }
    }
  },
  "x-jan-local-features": {
    "engine": "llama.cpp",
    "features": [
      "GGUF model support",
      "CPU and GPU acceleration",
      "Quantized model support (Q4, Q5, Q8)",
      "Metal acceleration on macOS",
      "CUDA support on NVIDIA GPUs",
      "ROCm support on AMD GPUs",
      "AVX/AVX2/AVX512 optimizations",
      "Memory-mapped model loading"
    ],
    "privacy": {
      "local_processing": true,
      "no_telemetry": true,
      "offline_capable": true
    },
    "model_formats": ["GGUF", "GGML"],
    "default_settings": {
      "context_length": 4096,
      "batch_size": 512,
      "threads": "auto"
    }
  }
}