{ "openapi": "3.1.0", "info": { "title": "👋Jan API", "description": "OpenAI-compatible API for local inference with Jan. Run AI models locally with complete privacy using llama.cpp's high-performance inference engine. Supports GGUF models with CPU and GPU acceleration. No authentication required for local usage.", "version": "0.3.14", "contact": { "name": "Jan Support", "url": "https://jan.ai/support", "email": "support@jan.ai" }, "license": { "name": "Apache 2.0", "url": "https://github.com/janhq/jan/blob/main/LICENSE" } }, "servers": [ { "url": "http://127.0.0.1:1337", "description": "Local Jan Server (Default IP)" }, { "url": "http://localhost:1337", "description": "Local Jan Server (localhost)" }, { "url": "http://localhost:8080", "description": "Local Jan Server (Alternative Port)" } ], "tags": [ { "name": "Models", "description": "List and describe available models" }, { "name": "Chat", "description": "Chat completion endpoints for conversational AI" }, { "name": "Completions", "description": "Text completion endpoints for generating text" }, { "name": "Extras", "description": "Additional utility endpoints for tokenization and text processing" } ], "paths": { "/v1/completions": { "post": { "tags": ["Completions"], "summary": "Create completion", "description": "Creates a completion for the provided prompt and parameters. This endpoint is compatible with OpenAI's completions API.", "operationId": "create_completion", "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CreateCompletionRequest" }, "examples": { "basic": { "summary": "Basic Completion", "description": "Simple text completion example", "value": { "model": "gemma-2-2b-it-Q8_0", "prompt": "Once upon a time", "max_tokens": 50, "temperature": 0.7 } }, "creative": { "summary": "Creative Writing", "description": "Generate creative content with higher temperature", "value": { "model": "gemma-2-2b-it-Q8_0", "prompt": "Write a short poem about coding:", "max_tokens": 150, "temperature": 1, "top_p": 0.95 } }, "code": { "summary": "Code Generation", "description": "Generate code with lower temperature for accuracy", "value": { "model": "gemma-2-2b-it-Q8_0", "prompt": "# Python function to calculate fibonacci\ndef fibonacci(n):", "max_tokens": 200, "temperature": 0.3, "stop": ["\n\n", "def ", "class "] } }, "streaming": { "summary": "Streaming Response", "description": "Stream tokens as they are generated", "value": { "model": "gemma-2-2b-it-Q8_0", "prompt": "Explain quantum computing in simple terms:", "max_tokens": 300, "temperature": 0.7, "stream": true } } } } } }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CreateCompletionResponse" } } } }, "202": { "description": "Accepted - Request is being processed", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CreateCompletionResponse" } }, "text/event-stream": { "schema": { "type": "string", "format": "binary", "description": "Server-sent events stream for streaming responses" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ValidationError" } } } } } } }, "/v1/chat/completions": { "post": { "tags": ["Chat"], "summary": "Create chat completion", "description": "Creates a model response for the given chat conversation. This endpoint is compatible with OpenAI's chat completions API.", "operationId": "create_chat_completion", "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CreateChatCompletionRequest" }, "examples": { "simple": { "summary": "Simple Chat", "description": "Basic question and answer", "value": { "model": "gemma-2-2b-it-Q8_0", "messages": [ { "role": "user", "content": "What is the capital of France?" } ], "max_tokens": 100, "temperature": 0.7 } }, "system": { "summary": "With System Message", "description": "Chat with system instructions", "value": { "model": "gemma-2-2b-it-Q8_0", "messages": [ { "role": "system", "content": "You are a helpful assistant that speaks like a pirate." }, { "role": "user", "content": "Tell me about the weather today." } ], "max_tokens": 150, "temperature": 0.8 } }, "conversation": { "summary": "Multi-turn Conversation", "description": "Extended conversation with context", "value": { "model": "gemma-2-2b-it-Q8_0", "messages": [ { "role": "system", "content": "You are a knowledgeable AI assistant." }, { "role": "user", "content": "What is machine learning?" }, { "role": "assistant", "content": "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed." }, { "role": "user", "content": "Can you give me a simple example?" } ], "max_tokens": 200, "temperature": 0.7 } }, "streaming": { "summary": "Streaming Chat", "description": "Stream the response token by token", "value": { "model": "gemma-2-2b-it-Q8_0", "messages": [ { "role": "user", "content": "Write a haiku about programming" } ], "stream": true, "temperature": 0.9 } }, "json_mode": { "summary": "JSON Response", "description": "Request structured JSON output", "value": { "model": "gemma-2-2b-it-Q8_0", "messages": [ { "role": "user", "content": "List 3 programming languages with their main use cases in JSON format" } ], "max_tokens": 200, "temperature": 0.5, "response_format": { "type": "json_object" } } } } } } }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CreateChatCompletionResponse" } }, "text/event-stream": { "schema": { "type": "string", "format": "binary", "description": "Server-sent events stream for streaming responses" } } } }, "202": { "description": "Accepted - Request is being processed", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CreateChatCompletionResponse" } }, "text/event-stream": { "schema": { "type": "string", "format": "binary", "description": "Server-sent events stream for streaming responses" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ValidationError" } } } } } } }, "/v1/models": { "get": { "tags": ["Models"], "summary": "List available models", "description": "Lists the currently available models and provides basic information about each one such as the owner and availability.", "operationId": "list_models", "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ModelList" }, "example": { "object": "list", "data": [ { "id": "gemma-2-2b-it-Q8_0", "object": "model", "created": 1686935002, "owned_by": "jan" }, { "id": "llama-3.1-8b-instruct-Q4_K_M", "object": "model", "created": 1686935002, "owned_by": "jan" }, { "id": "mistral-7b-instruct-v0.3-Q4_K_M", "object": "model", "created": 1686935002, "owned_by": "jan" }, { "id": "phi-3-mini-4k-instruct-Q4_K_M", "object": "model", "created": 1686935002, "owned_by": "jan" } ] } } } } } } }, "/extras/tokenize": { "post": { "tags": ["Extras"], "summary": "Tokenize text", "description": "Convert text input into tokens using the model's tokenizer.", "operationId": "tokenize", "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TokenizeRequest" }, "example": { "input": "Hello, world!", "model": "gemma-2-2b-it-Q8_0" } } } }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TokenizeResponse" }, "example": { "tokens": [15339, 11, 1917, 0] } } } } } } }, "/extras/tokenize/count": { "post": { "tags": ["Extras"], "summary": "Count tokens", "description": "Count the number of tokens in the provided text.", "operationId": "count_tokens", "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TokenizeRequest" }, "example": { "input": "How many tokens does this text have?", "model": "gemma-2-2b-it-Q8_0" } } } }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TokenCountResponse" }, "example": { "count": 8 } } } } } } } }, "components": { "schemas": { "TokenizeRequest": { "type": "object", "properties": { "input": { "type": "string", "description": "The text to tokenize" }, "model": { "type": "string", "description": "The model to use for tokenization", "enum": [ "gemma-2-2b-it-Q8_0", "llama-3.1-8b-instruct-Q4_K_M", "mistral-7b-instruct-v0.3-Q4_K_M", "phi-3-mini-4k-instruct-Q4_K_M" ] } }, "required": ["input"] }, "TokenizeResponse": { "type": "object", "properties": { "tokens": { "type": "array", "items": { "type": "integer" }, "description": "Array of token IDs" } }, "required": ["tokens"] }, "TokenCountResponse": { "type": "object", "properties": { "count": { "type": "integer", "description": "Number of tokens" } }, "required": ["count"] } }, "securitySchemes": { "bearerAuth": { "type": "http", "scheme": "bearer", "bearerFormat": "JWT", "description": "Optional: Enter your API key if authentication is enabled. The Bearer prefix will be added automatically." } } }, "x-jan-local-features": { "engine": "llama.cpp", "features": [ "GGUF model support", "CPU and GPU acceleration", "Quantized model support (Q4, Q5, Q8)", "Metal acceleration on macOS", "CUDA support on NVIDIA GPUs", "ROCm support on AMD GPUs", "AVX/AVX2/AVX512 optimizations", "Memory-mapped model loading" ], "privacy": { "local_processing": true, "no_telemetry": true, "offline_capable": true }, "model_formats": ["GGUF", "GGML"], "default_settings": { "context_length": 4096, "batch_size": 512, "threads": "auto" } } }