516 lines
16 KiB
JSON
516 lines
16 KiB
JSON
{
|
|
"openapi": "3.1.0",
|
|
"info": {
|
|
"title": "👋Jan API",
|
|
"description": "OpenAI-compatible API for local inference with Jan. Run AI models locally with complete privacy using llama.cpp's high-performance inference engine. Supports GGUF models with CPU and GPU acceleration. No authentication required for local usage.",
|
|
"version": "0.3.14",
|
|
"contact": {
|
|
"name": "Jan Support",
|
|
"url": "https://jan.ai/support",
|
|
"email": "support@jan.ai"
|
|
},
|
|
"license": {
|
|
"name": "Apache 2.0",
|
|
"url": "https://github.com/janhq/jan/blob/main/LICENSE"
|
|
}
|
|
},
|
|
"servers": [
|
|
{
|
|
"url": "http://127.0.0.1:1337",
|
|
"description": "Local Jan Server (Default IP)"
|
|
},
|
|
{
|
|
"url": "http://localhost:1337",
|
|
"description": "Local Jan Server (localhost)"
|
|
},
|
|
{
|
|
"url": "http://localhost:8080",
|
|
"description": "Local Jan Server (Alternative Port)"
|
|
}
|
|
],
|
|
"tags": [
|
|
{
|
|
"name": "Models",
|
|
"description": "List and describe available models"
|
|
},
|
|
{
|
|
"name": "Chat",
|
|
"description": "Chat completion endpoints for conversational AI"
|
|
},
|
|
{
|
|
"name": "Completions",
|
|
"description": "Text completion endpoints for generating text"
|
|
},
|
|
{
|
|
"name": "Extras",
|
|
"description": "Additional utility endpoints for tokenization and text processing"
|
|
}
|
|
],
|
|
"paths": {
|
|
"/v1/completions": {
|
|
"post": {
|
|
"tags": ["Completions"],
|
|
"summary": "Create completion",
|
|
"description": "Creates a completion for the provided prompt and parameters. This endpoint is compatible with OpenAI's completions API.",
|
|
"operationId": "create_completion",
|
|
"requestBody": {
|
|
"required": true,
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/CreateCompletionRequest"
|
|
},
|
|
"examples": {
|
|
"basic": {
|
|
"summary": "Basic Completion",
|
|
"description": "Simple text completion example",
|
|
"value": {
|
|
"model": "gemma-2-2b-it-Q8_0",
|
|
"prompt": "Once upon a time",
|
|
"max_tokens": 50,
|
|
"temperature": 0.7
|
|
}
|
|
},
|
|
"creative": {
|
|
"summary": "Creative Writing",
|
|
"description": "Generate creative content with higher temperature",
|
|
"value": {
|
|
"model": "gemma-2-2b-it-Q8_0",
|
|
"prompt": "Write a short poem about coding:",
|
|
"max_tokens": 150,
|
|
"temperature": 1,
|
|
"top_p": 0.95
|
|
}
|
|
},
|
|
"code": {
|
|
"summary": "Code Generation",
|
|
"description": "Generate code with lower temperature for accuracy",
|
|
"value": {
|
|
"model": "gemma-2-2b-it-Q8_0",
|
|
"prompt": "# Python function to calculate fibonacci\ndef fibonacci(n):",
|
|
"max_tokens": 200,
|
|
"temperature": 0.3,
|
|
"stop": ["\n\n", "def ", "class "]
|
|
}
|
|
},
|
|
"streaming": {
|
|
"summary": "Streaming Response",
|
|
"description": "Stream tokens as they are generated",
|
|
"value": {
|
|
"model": "gemma-2-2b-it-Q8_0",
|
|
"prompt": "Explain quantum computing in simple terms:",
|
|
"max_tokens": 300,
|
|
"temperature": 0.7,
|
|
"stream": true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"responses": {
|
|
"200": {
|
|
"description": "Successful Response",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/CreateCompletionResponse"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"202": {
|
|
"description": "Accepted - Request is being processed",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/CreateCompletionResponse"
|
|
}
|
|
},
|
|
"text/event-stream": {
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "binary",
|
|
"description": "Server-sent events stream for streaming responses"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"422": {
|
|
"description": "Validation Error",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ValidationError"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/v1/chat/completions": {
|
|
"post": {
|
|
"tags": ["Chat"],
|
|
"summary": "Create chat completion",
|
|
"description": "Creates a model response for the given chat conversation. This endpoint is compatible with OpenAI's chat completions API.",
|
|
"operationId": "create_chat_completion",
|
|
"requestBody": {
|
|
"required": true,
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/CreateChatCompletionRequest"
|
|
},
|
|
"examples": {
|
|
"simple": {
|
|
"summary": "Simple Chat",
|
|
"description": "Basic question and answer",
|
|
"value": {
|
|
"model": "gemma-2-2b-it-Q8_0",
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "What is the capital of France?"
|
|
}
|
|
],
|
|
"max_tokens": 100,
|
|
"temperature": 0.7
|
|
}
|
|
},
|
|
"system": {
|
|
"summary": "With System Message",
|
|
"description": "Chat with system instructions",
|
|
"value": {
|
|
"model": "gemma-2-2b-it-Q8_0",
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "You are a helpful assistant that speaks like a pirate."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "Tell me about the weather today."
|
|
}
|
|
],
|
|
"max_tokens": 150,
|
|
"temperature": 0.8
|
|
}
|
|
},
|
|
"conversation": {
|
|
"summary": "Multi-turn Conversation",
|
|
"description": "Extended conversation with context",
|
|
"value": {
|
|
"model": "gemma-2-2b-it-Q8_0",
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "You are a knowledgeable AI assistant."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "What is machine learning?"
|
|
},
|
|
{
|
|
"role": "assistant",
|
|
"content": "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "Can you give me a simple example?"
|
|
}
|
|
],
|
|
"max_tokens": 200,
|
|
"temperature": 0.7
|
|
}
|
|
},
|
|
"streaming": {
|
|
"summary": "Streaming Chat",
|
|
"description": "Stream the response token by token",
|
|
"value": {
|
|
"model": "gemma-2-2b-it-Q8_0",
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "Write a haiku about programming"
|
|
}
|
|
],
|
|
"stream": true,
|
|
"temperature": 0.9
|
|
}
|
|
},
|
|
"json_mode": {
|
|
"summary": "JSON Response",
|
|
"description": "Request structured JSON output",
|
|
"value": {
|
|
"model": "gemma-2-2b-it-Q8_0",
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "List 3 programming languages with their main use cases in JSON format"
|
|
}
|
|
],
|
|
"max_tokens": 200,
|
|
"temperature": 0.5,
|
|
"response_format": {
|
|
"type": "json_object"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"responses": {
|
|
"200": {
|
|
"description": "Successful Response",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/CreateChatCompletionResponse"
|
|
}
|
|
},
|
|
"text/event-stream": {
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "binary",
|
|
"description": "Server-sent events stream for streaming responses"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"202": {
|
|
"description": "Accepted - Request is being processed",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/CreateChatCompletionResponse"
|
|
}
|
|
},
|
|
"text/event-stream": {
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "binary",
|
|
"description": "Server-sent events stream for streaming responses"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"422": {
|
|
"description": "Validation Error",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ValidationError"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/v1/models": {
|
|
"get": {
|
|
"tags": ["Models"],
|
|
"summary": "List available models",
|
|
"description": "Lists the currently available models and provides basic information about each one such as the owner and availability.",
|
|
"operationId": "list_models",
|
|
"responses": {
|
|
"200": {
|
|
"description": "Successful Response",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ModelList"
|
|
},
|
|
"example": {
|
|
"object": "list",
|
|
"data": [
|
|
{
|
|
"id": "gemma-2-2b-it-Q8_0",
|
|
"object": "model",
|
|
"created": 1686935002,
|
|
"owned_by": "jan"
|
|
},
|
|
{
|
|
"id": "llama-3.1-8b-instruct-Q4_K_M",
|
|
"object": "model",
|
|
"created": 1686935002,
|
|
"owned_by": "jan"
|
|
},
|
|
{
|
|
"id": "mistral-7b-instruct-v0.3-Q4_K_M",
|
|
"object": "model",
|
|
"created": 1686935002,
|
|
"owned_by": "jan"
|
|
},
|
|
{
|
|
"id": "phi-3-mini-4k-instruct-Q4_K_M",
|
|
"object": "model",
|
|
"created": 1686935002,
|
|
"owned_by": "jan"
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/extras/tokenize": {
|
|
"post": {
|
|
"tags": ["Extras"],
|
|
"summary": "Tokenize text",
|
|
"description": "Convert text input into tokens using the model's tokenizer.",
|
|
"operationId": "tokenize",
|
|
"requestBody": {
|
|
"required": true,
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/TokenizeRequest"
|
|
},
|
|
"example": {
|
|
"input": "Hello, world!",
|
|
"model": "gemma-2-2b-it-Q8_0"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"responses": {
|
|
"200": {
|
|
"description": "Successful Response",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/TokenizeResponse"
|
|
},
|
|
"example": {
|
|
"tokens": [15339, 11, 1917, 0]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/extras/tokenize/count": {
|
|
"post": {
|
|
"tags": ["Extras"],
|
|
"summary": "Count tokens",
|
|
"description": "Count the number of tokens in the provided text.",
|
|
"operationId": "count_tokens",
|
|
"requestBody": {
|
|
"required": true,
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/TokenizeRequest"
|
|
},
|
|
"example": {
|
|
"input": "How many tokens does this text have?",
|
|
"model": "gemma-2-2b-it-Q8_0"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"responses": {
|
|
"200": {
|
|
"description": "Successful Response",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/TokenCountResponse"
|
|
},
|
|
"example": {
|
|
"count": 8
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"components": {
|
|
"schemas": {
|
|
"TokenizeRequest": {
|
|
"type": "object",
|
|
"properties": {
|
|
"input": {
|
|
"type": "string",
|
|
"description": "The text to tokenize"
|
|
},
|
|
"model": {
|
|
"type": "string",
|
|
"description": "The model to use for tokenization",
|
|
"enum": [
|
|
"gemma-2-2b-it-Q8_0",
|
|
"llama-3.1-8b-instruct-Q4_K_M",
|
|
"mistral-7b-instruct-v0.3-Q4_K_M",
|
|
"phi-3-mini-4k-instruct-Q4_K_M"
|
|
]
|
|
}
|
|
},
|
|
"required": ["input"]
|
|
},
|
|
"TokenizeResponse": {
|
|
"type": "object",
|
|
"properties": {
|
|
"tokens": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "integer"
|
|
},
|
|
"description": "Array of token IDs"
|
|
}
|
|
},
|
|
"required": ["tokens"]
|
|
},
|
|
"TokenCountResponse": {
|
|
"type": "object",
|
|
"properties": {
|
|
"count": {
|
|
"type": "integer",
|
|
"description": "Number of tokens"
|
|
}
|
|
},
|
|
"required": ["count"]
|
|
}
|
|
},
|
|
"securitySchemes": {
|
|
"bearerAuth": {
|
|
"type": "http",
|
|
"scheme": "bearer",
|
|
"bearerFormat": "JWT",
|
|
"description": "Optional: Enter your API key if authentication is enabled. The Bearer prefix will be added automatically."
|
|
}
|
|
}
|
|
},
|
|
"x-jan-local-features": {
|
|
"engine": "llama.cpp",
|
|
"features": [
|
|
"GGUF model support",
|
|
"CPU and GPU acceleration",
|
|
"Quantized model support (Q4, Q5, Q8)",
|
|
"Metal acceleration on macOS",
|
|
"CUDA support on NVIDIA GPUs",
|
|
"ROCm support on AMD GPUs",
|
|
"AVX/AVX2/AVX512 optimizations",
|
|
"Memory-mapped model loading"
|
|
],
|
|
"privacy": {
|
|
"local_processing": true,
|
|
"no_telemetry": true,
|
|
"offline_capable": true
|
|
},
|
|
"model_formats": ["GGUF", "GGML"],
|
|
"default_settings": {
|
|
"context_length": 4096,
|
|
"batch_size": 512,
|
|
"threads": "auto"
|
|
}
|
|
}
|
|
}
|