diff --git a/docs/src/pages/_meta.json b/docs/src/pages/_meta.json index d095500f4..72b460fde 100644 --- a/docs/src/pages/_meta.json +++ b/docs/src/pages/_meta.json @@ -1,58 +1,66 @@ { - "index": { - "type": "page", - "title": "Homepage", - "display": "hidden", - "theme": { - "layout": "raw" - } - }, - "docs": { - "type": "page", - "title": "Docs" - }, - "platforms": { - "type": "page", - "title": "Platforms", - "display": "hidden" - }, - "integrations": { - "type": "page", - "title": "Integrations", - "display": "hidden" - }, - "changelog": { - "type": "page", - "title": "Changelog", - "theme": { - "layout": "raw" - } - }, - "blog": { - "type": "page", - "title": "Blog", - "theme": { - "layout": "raw" - } - }, - "post": { - "type": "page", - "title": "Post Categories", - "display": "hidden" - }, - - "download": { - "type": "page", - "theme": { - "layout": "raw" - } - }, - "privacy": { - "title": "Privacy", - "display": "hidden" - }, - "support": { - "title": "Support", - "display": "hidden" - } + "index": { + "type": "page", + "title": "Homepage", + "display": "hidden", + "theme": { + "layout": "raw" + } + }, + "docs": { + "type": "page", + "title": "Docs" + }, + "server": { + "type": "page", + "title": "Docs", + "display": "hidden" + }, + "platforms": { + "type": "page", + "title": "Platforms", + "display": "hidden" + }, + "integrations": { + "type": "page", + "title": "Integrations", + "display": "hidden" + }, + "api-reference": { + "type": "page", + "title": "API reference" + }, + "changelog": { + "type": "page", + "title": "Changelog", + "theme": { + "layout": "raw" + } + }, + "blog": { + "type": "page", + "title": "Blog", + "theme": { + "layout": "raw" + } + }, + "post": { + "type": "page", + "title": "Post Categories", + "display": "hidden" + }, + "download": { + "type": "page", + "theme": { + "layout": "raw" + } + }, + "privacy": { + "title": "Privacy", + "display": "hidden" + }, + "support": { + "title": "Support", + "display": "hidden" + } } diff --git a/docs/src/pages/api-reference/_meta.json b/docs/src/pages/api-reference/_meta.json new file mode 100644 index 000000000..b1ea1c4c5 --- /dev/null +++ b/docs/src/pages/api-reference/_meta.json @@ -0,0 +1,20 @@ +{ + "get-started-separator": { + "title": "Get started", + "type": "separator" + }, + "index": "Overview", + "installation": "Installation", + "configuration": "Configuration", + "core-concepts-separator": { + "title": "Core concepts", + "type": "separator" + }, + "api-reference": "API Reference", + "resource-separator": { + "title": "Resources", + "type": "separator" + }, + "architecture": "Architecture", + "development": "Development" +} diff --git a/docs/src/pages/api-reference/api-reference.mdx b/docs/src/pages/api-reference/api-reference.mdx new file mode 100644 index 000000000..662127638 --- /dev/null +++ b/docs/src/pages/api-reference/api-reference.mdx @@ -0,0 +1,378 @@ +--- +title: API Reference +description: Complete API documentation for Jan Server endpoints and OpenAI compatibility. +--- + +## Base URL + +All API endpoints are available at the API gateway base URL: + +``` +http://localhost:8080/api/v1 +``` + +The API gateway automatically forwards port 8080 when using the standard deployment scripts. + +## Authentication + +Jan Server supports multiple authentication methods: + +### JWT Token Authentication + +Include JWT token in the Authorization header: + +```bash +curl -H "Authorization: Bearer " \ + http://localhost:8080/api/v1/protected-endpoint +``` + +### API Key Authentication + +Include API key in the Authorization header: + +```bash +curl -H "Authorization: Bearer " \ + http://localhost:8080/api/v1/protected-endpoint +``` + +## OpenAI-Compatible Endpoints + +Jan Server implements OpenAI-compatible endpoints for seamless integration with existing tools. + +### Chat Completions + +**Endpoint**: `POST /api/v1/chat/completions` + +Standard OpenAI chat completions API for conversational AI. + +```bash +curl -X POST http://localhost:8080/api/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "model": "jan-v1-4b", + "messages": [ + {"role": "user", "content": "Hello, how are you?"} + ], + "max_tokens": 100, + "temperature": 0.7 + }' +``` + +**Parameters:** +- `model` (string): Model identifier (`jan-v1-4b`) +- `messages` (array): Conversation history +- `max_tokens` (integer): Maximum response tokens +- `temperature` (float): Response randomness (0.0 to 2.0) +- `stream` (boolean): Enable streaming responses + +### Model Information + +**Endpoint**: `GET /api/v1/models` + +List available models: + +```bash +curl http://localhost:8080/api/v1/models +``` + +**Response:** +```json +{ + "object": "list", + "data": [ + { + "id": "jan-v1-4b", + "object": "model", + "created": 1234567890, + "owned_by": "jan" + } + ] +} +``` + +### Completions (Text Generation) + +**Endpoint**: `POST /api/v1/completions` + +Text completion endpoint: + +```bash +curl -X POST http://localhost:8080/api/v1/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "model": "jan-v1-4b", + "prompt": "The meaning of life is", + "max_tokens": 50 + }' +``` + +## Authentication Endpoints + +### OAuth2 Google Login + +**Endpoint**: `GET /auth/google` + +Redirects to Google OAuth2 authorization: + +```bash +curl http://localhost:8080/auth/google +``` + +### OAuth2 Callback + +**Endpoint**: `GET /auth/google/callback` + +Handles OAuth2 callback and issues JWT token: + +``` +http://localhost:8080/auth/google/callback?code=&state= +``` + +### Token Refresh + +**Endpoint**: `POST /api/v1/auth/refresh` + +Refresh expired JWT tokens: + +```bash +curl -X POST http://localhost:8080/api/v1/auth/refresh \ + -H "Authorization: Bearer " +``` + +## User Management + +### User Profile + +**Endpoint**: `GET /api/v1/user/profile` + +Get current user profile: + +```bash +curl -H "Authorization: Bearer " \ + http://localhost:8080/api/v1/user/profile +``` + +### API Keys + +**Endpoint**: `POST /api/v1/user/api-keys` + +Generate new API key: + +```bash +curl -X POST http://localhost:8080/api/v1/user/api-keys \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Development Key", + "permissions": ["read", "write"] + }' +``` + +## Conversation Management + +### Create Conversation + +**Endpoint**: `POST /api/v1/conversations` + +Create new conversation: + +```bash +curl -X POST http://localhost:8080/api/v1/conversations \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "title": "My Conversation", + "model": "jan-v1-4b" + }' +``` + +### List Conversations + +**Endpoint**: `GET /api/v1/conversations` + +Get user's conversations: + +```bash +curl -H "Authorization: Bearer " \ + http://localhost:8080/api/v1/conversations +``` + +### Get Conversation + +**Endpoint**: `GET /api/v1/conversations/{id}` + +Get specific conversation with message history: + +```bash +curl -H "Authorization: Bearer " \ + http://localhost:8080/api/v1/conversations/123 +``` + +## Health and Status + +### Health Check + +**Endpoint**: `GET /health` + +Basic health check: + +```bash +curl http://localhost:8080/health +``` + +**Response:** +```json +{ + "status": "ok", + "timestamp": "2024-01-01T12:00:00Z" +} +``` + +### System Status + +**Endpoint**: `GET /api/v1/status` + +Detailed system status: + +```bash +curl -H "Authorization: Bearer " \ + http://localhost:8080/api/v1/status +``` + +**Response:** +```json +{ + "api_gateway": "healthy", + "inference_model": "healthy", + "database": "healthy", + "external_apis": { + "serper": "healthy" + } +} +``` + +## Error Responses + +Jan Server returns standard HTTP status codes and JSON error responses: + +```json +{ + "error": { + "message": "Invalid request format", + "type": "invalid_request_error", + "code": "invalid_json" + } +} +``` + +### Common Error Codes + +| Status Code | Description | +|-------------|-------------| +| `400` | Bad Request - Invalid request format | +| `401` | Unauthorized - Invalid or missing authentication | +| `403` | Forbidden - Insufficient permissions | +| `404` | Not Found - Resource not found | +| `429` | Too Many Requests - Rate limit exceeded | +| `500` | Internal Server Error - Server error | +| `503` | Service Unavailable - Service temporarily unavailable | + +## Interactive Documentation + +Jan Server provides interactive Swagger documentation at: + +``` +http://localhost:8080/api/swagger/index.html#/ +``` + +This interface allows you to: +- Browse all available endpoints +- Test API calls directly from the browser +- View request/response schemas +- Generate code samples + +The Swagger documentation is auto-generated from Go code annotations and provides the most up-to-date API reference. + +## Rate Limiting + +API endpoints implement rate limiting to prevent abuse: + +- **Authenticated requests**: 1000 requests per hour per user +- **Unauthenticated requests**: 100 requests per hour per IP +- **Model inference**: 60 requests per minute per user + +Rate limit headers are included in responses: +``` +X-RateLimit-Limit: 1000 +X-RateLimit-Remaining: 999 +X-RateLimit-Reset: 1609459200 +``` + +## SDK and Client Libraries + +### JavaScript/Node.js + +Use the OpenAI JavaScript SDK with Jan Server: + +```javascript +import OpenAI from 'openai'; + +const openai = new OpenAI({ + baseURL: 'http://localhost:8080/api/v1', + apiKey: 'your-jwt-token' +}); + +const completion = await openai.chat.completions.create({ + model: 'jan-v1-4b', + messages: [ + { role: 'user', content: 'Hello!' } + ] +}); +``` + +### Python + +Use the OpenAI Python SDK: + +```python +import openai + +openai.api_base = "http://localhost:8080/api/v1" +openai.api_key = "your-jwt-token" + +response = openai.ChatCompletion.create( + model="jan-v1-4b", + messages=[ + {"role": "user", "content": "Hello!"} + ] +) +``` + +### cURL Examples + +Complete cURL examples for common operations: + +```bash +# Get models +curl http://localhost:8080/api/v1/models + +# Chat completion +curl -X POST http://localhost:8080/api/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jan-v1-4b", + "messages": [{"role": "user", "content": "Hello"}] + }' + +# Streaming chat completion +curl -X POST http://localhost:8080/api/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jan-v1-4b", + "messages": [{"role": "user", "content": "Tell me a story"}], + "stream": true + }' \ + --no-buffer +``` diff --git a/docs/src/pages/api-reference/architecture.mdx b/docs/src/pages/api-reference/architecture.mdx new file mode 100644 index 000000000..03dc1b363 --- /dev/null +++ b/docs/src/pages/api-reference/architecture.mdx @@ -0,0 +1,191 @@ +--- +title: Architecture +description: Technical architecture and system design of Jan Server components. +--- + +## System Overview + +Jan Server implements a microservices architecture on Kubernetes with three core components communicating over HTTP and managed by Helm charts. + +```mermaid +graph TD + Client[Client/Browser] --> Gateway[jan-api-gateway:8080] + Gateway --> Model[jan-inference-model:8101] + Gateway --> DB[(PostgreSQL:5432)] + Gateway --> Serper[Serper API] + Gateway --> OAuth[Google OAuth2] +``` + +## Components + +### API Gateway (`jan-api-gateway`) + +**Technology Stack:** +- **Language**: Go 1.24.6 +- **Framework**: Gin web framework +- **ORM**: GORM with PostgreSQL driver +- **DI**: Google Wire for dependency injection +- **Documentation**: Swagger/OpenAPI auto-generated + +**Responsibilities:** +- HTTP request routing and middleware +- User authentication via JWT and OAuth2 +- Database operations and data persistence +- External API integration (Serper, Google OAuth) +- OpenAI-compatible API endpoints +- Request forwarding to inference service + +**Key Directories:** +``` +application/ +├── cmd/server/ # Main entry point and DI wiring +├── app/ # Core business logic +├── config/ # Environment variables and settings +└── docs/ # Auto-generated Swagger docs +``` + +### Inference Model (`jan-inference-model`) + +**Technology Stack:** +- **Base Image**: VLLM OpenAI v0.10.0 +- **Model**: Jan-v1-4B (downloaded from Hugging Face) +- **Protocol**: OpenAI-compatible HTTP API +- **Features**: Tool calling, reasoning parsing + +**Configuration:** +- **Model Path**: `/models/Jan-v1-4B` +- **Served Name**: `jan-v1-4b` +- **Port**: 8101 +- **Batch Tokens**: 1024 max +- **Tool Parser**: Hermes +- **Reasoning Parser**: Qwen3 + +**Capabilities:** +- Text generation and completion +- Tool calling and function execution +- Multi-turn conversations +- Reasoning and chain-of-thought + +### Database (PostgreSQL) + +**Configuration:** +- **Database**: `jan` +- **User**: `jan-user` +- **Password**: `jan-password` +- **Port**: 5432 + +**Schema:** +- User accounts and authentication +- Conversation history +- Project and organization management +- API keys and access control + +## Data Flow + +### Request Processing + +1. **Client Request**: HTTP request to API gateway on port 8080 +2. **Authentication**: JWT token validation or OAuth2 flow +3. **Request Routing**: Gateway routes to appropriate handler +4. **Database Operations**: GORM queries for user data/state +5. **Inference Call**: HTTP request to model service on port 8101 +6. **Response Assembly**: Gateway combines results and returns to client + +### Authentication Flow + +**JWT Authentication:** +1. User provides credentials +2. Gateway validates against database +3. JWT token issued with HMAC-SHA256 signing +4. Subsequent requests include JWT in Authorization header + +**OAuth2 Flow:** +1. Client redirected to Google OAuth2 +2. Authorization code returned to redirect URL +3. Gateway exchanges code for access token +4. User profile retrieved from Google +5. Local JWT token issued + +## Deployment Architecture + +### Kubernetes Resources + +**Deployments:** +- `jan-api-gateway`: Single replica Go application +- `jan-inference-model`: Single replica VLLM server +- `postgresql`: StatefulSet with persistent storage + +**Services:** +- `jan-api-gateway`: ClusterIP exposing port 8080 +- `jan-inference-model`: ClusterIP exposing port 8101 +- `postgresql`: ClusterIP exposing port 5432 + +**Configuration:** +- Environment variables via Helm values +- Secrets for sensitive data (JWT keys, OAuth credentials) +- ConfigMaps for application settings + +### Helm Chart Structure + +``` +charts/ +├── umbrella-chart/ # Main deployment chart +│ ├── Chart.yaml +│ ├── values.yaml # Configuration values +│ └── Chart.lock +└── apps-charts/ # Individual service charts + ├── jan-api-gateway/ + └── jan-inference-model/ +``` + +## Security Architecture + +### Authentication Methods +- **JWT Tokens**: HMAC-SHA256 signed tokens for API access +- **OAuth2**: Google OAuth2 integration for user login +- **API Keys**: HMAC-SHA256 signed keys for service access + +### Network Security +- **Internal Communication**: Services communicate over Kubernetes cluster network +- **External Access**: Only API gateway exposed via port forwarding or ingress +- **Database Access**: PostgreSQL accessible only within cluster + +### Data Security +- **Secrets Management**: Kubernetes secrets for sensitive configuration +- **Environment Variables**: Non-sensitive config via environment variables +- **Database Encryption**: Standard PostgreSQL encryption at rest + +Production deployments should implement additional security measures including TLS termination, network policies, and secret rotation. + +## Scalability Considerations + +**Current Limitations:** +- Single replica deployments +- No horizontal pod autoscaling +- Local storage for database + +**Future Enhancements:** +- Multi-replica API gateway with load balancing +- Horizontal pod autoscaling based on CPU/memory +- External database with clustering +- Redis caching layer +- Message queue for async processing + +## Development Architecture + +### Code Generation +- **Swagger**: API documentation generated from Go annotations +- **Wire**: Dependency injection code generated from providers +- **GORM Gen**: Database model generation from schema + +### Build Process +1. **API Gateway**: Multi-stage Docker build with Go compilation +2. **Inference Model**: Base VLLM image with model download +3. **Helm Charts**: Dependency management and templating +4. **Documentation**: Auto-generation during development + +### Local Development +- **Hot Reload**: Source code changes reflected without full rebuild +- **Database Migrations**: Automated schema updates +- **API Testing**: Swagger UI for interactive testing +- **Logging**: Structured logging with configurable levels diff --git a/docs/src/pages/api-reference/configuration.mdx b/docs/src/pages/api-reference/configuration.mdx new file mode 100644 index 000000000..cbcba57fd --- /dev/null +++ b/docs/src/pages/api-reference/configuration.mdx @@ -0,0 +1,263 @@ +--- +title: Configuration +description: Configure Jan Server environment variables, authentication, and external integrations. +--- + +## Environment Variables + +Jan Server configuration is managed through environment variables defined in the Helm values file at `charts/umbrella-chart/values.yaml`. + +### API Gateway Configuration + +#### Core Settings + +| Variable | Default | Description | +|----------|---------|-------------| +| `JAN_INFERENCE_MODEL_URL` | `http://jan-server-jan-inference-model:8101` | Internal URL for inference service | + +#### Authentication + +| Variable | Purpose | Format | +|----------|---------|--------| +| `JWT_SECRET` | JWT token signing | Base64 encoded HMAC-SHA256 key | +| `APIKEY_SECRET` | API key signing | Base64 encoded HMAC-SHA256 key | + +The default JWT and API key secrets are for development only. Generate new secrets for production deployments. + +#### OAuth2 Integration + +| Variable | Description | +|----------|-------------| +| `OAUTH2_GOOGLE_CLIENT_ID` | Google OAuth2 application client ID | +| `OAUTH2_GOOGLE_CLIENT_SECRET` | Google OAuth2 application secret | +| `OAUTH2_GOOGLE_REDIRECT_URL` | Callback URL for OAuth2 flow | + +#### External APIs + +| Variable | Provider | Purpose | +|----------|----------|---------| +| `SERPER_API_KEY` | Serper | Web search integration | + +#### Database Connection + +| Variable | Default | Description | +|----------|---------|-------------| +| `DB_POSTGRESQL_WRITE_DSN` | `host=jan-server-postgresql user=jan-user password=jan-password dbname=jan port=5432 sslmode=disable` | Write database connection | +| `DB_POSTGRESQL_READ1_DSN` | `host=jan-server-postgresql user=jan-user password=jan-password dbname=jan port=5432 sslmode=disable` | Read database connection | + +## Helm Configuration + +### Updating Values + +Edit the configuration in `charts/umbrella-chart/values.yaml`: + +```yaml +jan-api-gateway: + env: + - name: SERPER_API_KEY + value: your_serper_api_key + - name: OAUTH2_GOOGLE_CLIENT_ID + value: your_google_client_id + - name: OAUTH2_GOOGLE_CLIENT_SECRET + value: your_google_client_secret +``` + +### Applying Changes + +After modifying values, redeploy the application: + +```bash +helm upgrade jan-server ./charts/umbrella-chart +``` + +## Authentication Setup + +### JWT Tokens + +Generate a secure JWT signing key: + +```bash +# Generate 256-bit key for HMAC-SHA256 +openssl rand -base64 32 +``` + +Update the `JWT_SECRET` value in your Helm configuration. + +### API Keys + +Generate a secure API key signing secret: + +```bash +# Generate 256-bit key for HMAC-SHA256 +openssl rand -base64 32 +``` + +Update the `APIKEY_SECRET` value in your Helm configuration. + +### Google OAuth2 + +1. **Create Google Cloud Project** + - Go to [Google Cloud Console](https://console.cloud.google.com) + - Create a new project or select existing + +2. **Enable OAuth2** + - Navigate to "APIs & Services" > "Credentials" + - Create OAuth2 client ID credentials + - Set application type to "Web application" + +3. **Configure Redirect URI** + ``` + http://localhost:8080/auth/google/callback + ``` + +4. **Update Configuration** + - Set `OAUTH2_GOOGLE_CLIENT_ID` to your client ID + - Set `OAUTH2_GOOGLE_CLIENT_SECRET` to your client secret + - Set `OAUTH2_GOOGLE_REDIRECT_URL` to your callback URL + +## External Integrations + +### Serper API + +Jan Server integrates with Serper for web search capabilities. + +1. **Get API Key** + - Register at [serper.dev](https://serper.dev) + - Generate API key from dashboard + +2. **Configure** + - Set `SERPER_API_KEY` in Helm values + - Redeploy the application + +### Adding New Integrations + +To add new external API integrations: + +1. **Update Helm Values** + ```yaml + jan-api-gateway: + env: + - name: YOUR_API_KEY + value: your_api_key_value + ``` + +2. **Update Go Configuration** + + Add to `config/environment_variables/env.go`: + ```go + YourAPIKey string `env:"YOUR_API_KEY"` + ``` + +3. **Redeploy** + ```bash + helm upgrade jan-server ./charts/umbrella-chart + ``` + +## Database Configuration + +### Connection Settings + +The default PostgreSQL configuration uses: +- **Host**: `jan-server-postgresql` (Kubernetes service name) +- **Database**: `jan` +- **User**: `jan-user` +- **Password**: `jan-password` +- **Port**: `5432` +- **SSL**: Disabled (development only) + +### Production Database + +For production deployments: + +1. **External Database** + - Use managed PostgreSQL service (AWS RDS, Google Cloud SQL) + - Update DSN variables with external connection details + +2. **SSL/TLS** + - Enable `sslmode=require` in connection strings + - Configure certificate validation + +3. **Connection Pooling** + - Consider using connection pooler (PgBouncer, pgpool-II) + - Configure appropriate pool sizes + +## Model Configuration + +The inference model service is configured via Docker CMD parameters: + +```dockerfile +CMD ["--model", "/models/Jan-v1-4B", \ + "--served-model-name", "jan-v1-4b", \ + "--host", "0.0.0.0", \ + "--port", "8101", \ + "--max-num-batched-tokens", "1024", \ + "--enable-auto-tool-choice", \ + "--tool-call-parser", "hermes", \ + "--reasoning-parser", "qwen3"] +``` + +### Model Parameters + +| Parameter | Value | Description | +|-----------|-------|-------------| +| `--model` | `/models/Jan-v1-4B` | Path to model files | +| `--served-model-name` | `jan-v1-4b` | API model identifier | +| `--max-num-batched-tokens` | `1024` | Maximum tokens per batch | +| `--tool-call-parser` | `hermes` | Tool calling format | +| `--reasoning-parser` | `qwen3` | Reasoning output format | + +Model configuration changes require rebuilding the inference Docker image. This will be configurable via environment variables in future releases. + +## Resource Configuration + +### Kubernetes Resources + +Current deployments use default resource limits. For production: + +```yaml +jan-api-gateway: + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 512Mi + +jan-inference-model: + resources: + requests: + cpu: 1000m + memory: 4Gi + limits: + cpu: 4000m + memory: 8Gi +``` + +### Storage + +PostgreSQL uses default Kubernetes storage. For production: + +```yaml +postgresql: + persistence: + enabled: true + size: 20Gi + storageClass: fast-ssd +``` + +## Logging Configuration + +Configure logging levels via environment variables: + +```yaml +jan-api-gateway: + env: + - name: LOG_LEVEL + value: info + - name: LOG_FORMAT + value: json +``` + +Available log levels: `debug`, `info`, `warn`, `error` +Available formats: `text`, `json` diff --git a/docs/src/pages/api-reference/development.mdx b/docs/src/pages/api-reference/development.mdx new file mode 100644 index 000000000..c773a2891 --- /dev/null +++ b/docs/src/pages/api-reference/development.mdx @@ -0,0 +1,445 @@ +--- +title: Development +description: Development setup, workflow, and contribution guidelines for Jan Server. +--- + +## Development Setup + +### Prerequisites + +- **Go**: 1.24.6 or later +- **Docker**: For containerization +- **minikube**: Local Kubernetes development +- **Helm**: Package management +- **Make**: Build automation + +### Initial Setup + + +1. **Clone Repository** + ```bash + git clone https://github.com/menloresearch/jan-server + cd jan-server + ``` + +2. **Install Development Tools** + ```bash + cd apps/jan-api-gateway/application + make install + ``` + +3. **Generate Code** + ```bash + make setup + ``` + +4. **Start Development Environment** + ```bash + # From project root + ./scripts/run.sh + ``` + +## API Gateway Development + +### Project Structure + +``` +apps/jan-api-gateway/application/ +├── cmd/server/ # Entry point and dependency injection +│ ├── server.go # Main server setup +│ ├── wire.go # DI configuration +│ └── wire_gen.go # Generated DI code +├── app/ # Core application logic +│ ├── domain/ # Business entities +│ ├── repository/ # Data access layer +│ ├── service/ # Business logic +│ └── handler/ # HTTP handlers +├── config/ # Configuration management +└── docs/ # Generated API documentation +``` + +### Build Commands + +```bash +# Install development dependencies +make install + +# Generate API documentation +make doc + +# Generate dependency injection code +make wire + +# Complete setup (doc + wire) +make setup + +# Build application +go build -o jan-api-gateway ./cmd/server +``` + +### Code Generation + +Jan Server uses code generation for several components: + +**Swagger Documentation:** +```bash +# Generates docs/swagger.json and docs/swagger.yaml +swag init --parseDependency -g cmd/server/server.go -o docs +``` + +**Dependency Injection:** +```bash +# Generates wire_gen.go from wire.go providers +wire ./cmd/server +``` + +**Database Models:** +```bash +# Generate GORM models (when schema changes) +go run cmd/codegen/gorm/gorm.go +``` + +### Local Development + +#### Running API Gateway Locally + +```bash +cd apps/jan-api-gateway/application + +# Set environment variables +export JAN_INFERENCE_MODEL_URL=http://localhost:8101 +export JWT_SECRET=your-jwt-secret +export DB_POSTGRESQL_WRITE_DSN="host=localhost user=jan-user password=jan-password dbname=jan port=5432 sslmode=disable" + +# Run the server +go run ./cmd/server +``` + +#### Database Setup + +For local development, you can run PostgreSQL directly: + +```bash +# Using Docker +docker run -d \ + --name jan-postgres \ + -e POSTGRES_DB=jan \ + -e POSTGRES_USER=jan-user \ + -e POSTGRES_PASSWORD=jan-password \ + -p 5432:5432 \ + postgres:14 +``` + +## Testing + +### Running Tests + +```bash +# Run all tests +go test ./... + +# Run tests with coverage +go test -cover ./... + +# Run specific test package +go test ./app/service/... +``` + +### Test Structure + +``` +app/ +├── service/ +│ ├── auth_service.go +│ ├── auth_service_test.go +│ ├── conversation_service.go +│ └── conversation_service_test.go +└── handler/ + ├── auth_handler.go + ├── auth_handler_test.go + ├── chat_handler.go + └── chat_handler_test.go +``` + +### Writing Tests + +Example service test: + +```go +func TestAuthService_ValidateToken(t *testing.T) { + // Setup + service := NewAuthService(mockRepo, mockConfig) + + // Test cases + tests := []struct { + name string + token string + expectValid bool + expectError bool + }{ + {"valid token", "valid.jwt.token", true, false}, + {"invalid token", "invalid.token", false, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + valid, err := service.ValidateToken(tt.token) + assert.Equal(t, tt.expectValid, valid) + assert.Equal(t, tt.expectError, err != nil) + }) + } +} +``` + +## Docker Development + +### Building Images + +```bash +# Build API gateway +docker build -t jan-api-gateway:dev ./apps/jan-api-gateway + +# Build inference model +docker build -t jan-inference-model:dev ./apps/jan-inference-model +``` + +### Development Compose + +For local development without Kubernetes: + +```yaml +# docker-compose.dev.yml +version: '3.8' +services: + postgres: + image: postgres:14 + environment: + POSTGRES_DB: jan + POSTGRES_USER: jan-user + POSTGRES_PASSWORD: jan-password + ports: + - "5432:5432" + + api-gateway: + build: ./apps/jan-api-gateway + ports: + - "8080:8080" + environment: + - JAN_INFERENCE_MODEL_URL=http://inference-model:8101 + - DB_POSTGRESQL_WRITE_DSN=host=postgres user=jan-user password=jan-password dbname=jan port=5432 sslmode=disable + depends_on: + - postgres + + inference-model: + build: ./apps/jan-inference-model + ports: + - "8101:8101" +``` + +## Debugging + +### Go Debugging + +For VS Code debugging, add to `.vscode/launch.json`: + +```json +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Launch Jan API Gateway", + "type": "go", + "request": "launch", + "mode": "auto", + "program": "${workspaceFolder}/apps/jan-api-gateway/application/cmd/server", + "env": { + "JAN_INFERENCE_MODEL_URL": "http://localhost:8101", + "JWT_SECRET": "development-secret" + } + } + ] +} +``` + +### Application Logs + +```bash +# View API gateway logs +kubectl logs deployment/jan-server-jan-api-gateway -f + +# View inference model logs +kubectl logs deployment/jan-server-jan-inference-model -f + +# View PostgreSQL logs +kubectl logs statefulset/jan-server-postgresql -f +``` + +### Log Levels + +Set log level via environment variable: + +```bash +export LOG_LEVEL=debug # debug, info, warn, error +``` + +## Code Style and Standards + +### Go Standards + +- Follow [Go Code Review Comments](https://go.dev/wiki/CodeReviewComments) +- Use `gofmt` for formatting +- Run `go vet` for static analysis +- Use meaningful variable and function names + +### API Standards + +- RESTful endpoint design +- OpenAPI/Swagger annotations for all endpoints +- Consistent error response format +- Proper HTTP status codes + +### Git Workflow + +```bash +# Create feature branch +git checkout -b feature/your-feature-name + +# Make changes and commit +git add . +git commit -m "feat: add new authentication endpoint" + +# Push and create PR +git push origin feature/your-feature-name +``` + +### Commit Message Format + +Follow conventional commits: + +``` +feat: add new feature +fix: resolve bug in authentication +docs: update API documentation +test: add unit tests for service layer +refactor: improve error handling +``` + +## Performance Testing + +### Load Testing + +Use [k6](https://k6.io) for API load testing: + +```javascript +// load-test.js +import http from 'k6/http'; + +export default function () { + const response = http.post('http://localhost:8080/api/v1/chat/completions', { + model: 'jan-v1-4b', + messages: [ + { role: 'user', content: 'Hello!' } + ] + }, { + headers: { + 'Content-Type': 'application/json', + 'Authorization': 'Bearer your-token' + } + }); + + check(response, { + 'status is 200': (r) => r.status === 200, + 'response time < 5000ms': (r) => r.timings.duration < 5000, + }); +} +``` + +Run load test: +```bash +k6 run --vus 10 --duration 30s load-test.js +``` + +### Memory Profiling + +Enable Go profiling endpoints: + +```go +import _ "net/http/pprof" + +// In main.go +go func() { + log.Println(http.ListenAndServe("localhost:6060", nil)) +}() +``` + +Profile memory usage: +```bash +go tool pprof http://localhost:6060/debug/pprof/heap +``` + +## Contributing + +### Pull Request Process + +1. **Fork the repository** +2. **Create feature branch** from `main` +3. **Make changes** following code standards +4. **Add tests** for new functionality +5. **Update documentation** if needed +6. **Submit pull request** with clear description + +### Code Review Checklist + +- [ ] Code follows Go standards +- [ ] Tests added for new features +- [ ] Documentation updated +- [ ] API endpoints have Swagger annotations +- [ ] No breaking changes without version bump +- [ ] Security considerations addressed + +### Issues and Bug Reports + +When reporting bugs, include: + +- **Environment**: OS, Go version, minikube version +- **Steps to reproduce**: Clear, minimal reproduction steps +- **Expected behavior**: What should happen +- **Actual behavior**: What actually happens +- **Logs**: Relevant error messages or logs + +For security issues, please report privately to the maintainers instead of creating public issues. + +## Release Process + +### Version Management + +Jan Server uses semantic versioning (semver): + +- **Major**: Breaking changes +- **Minor**: New features, backward compatible +- **Patch**: Bug fixes, backward compatible + +### Building Releases + +```bash +# Tag release +git tag -a v1.2.3 -m "Release v1.2.3" + +# Build release images +docker build -t jan-api-gateway:v1.2.3 ./apps/jan-api-gateway +docker build -t jan-inference-model:v1.2.3 ./apps/jan-inference-model + +# Push tags +git push origin v1.2.3 +``` + +### Deployment + +Production deployments follow the same Helm chart structure: + +```bash +# Deploy specific version +helm install jan-server ./charts/umbrella-chart \ + --set jan-api-gateway.image.tag=v1.2.3 \ + --set jan-inference-model.image.tag=v1.2.3 +``` diff --git a/docs/src/pages/api-reference/index.mdx b/docs/src/pages/api-reference/index.mdx new file mode 100644 index 000000000..de595e5dc --- /dev/null +++ b/docs/src/pages/api-reference/index.mdx @@ -0,0 +1,39 @@ +--- +title: Jan Server +description: Self-hosted AI infrastructure running the Jan platform on Kubernetes. +keywords: + [ + Jan Server, + self-hosted AI, + Kubernetes deployment, + Docker containers, + AI inference, + local LLM server, + VLLM, + Go API gateway, + Jan-v1 model + ] +--- + +## Self-Hosted Jan Platform + +Jan Server deploys the Jan AI platform on your own infrastructure using Kubernetes. It provides a complete AI inference stack with API gateway, model serving, and data persistence. + +Jan Server is in early development. APIs and deployment methods may change. + +## Architecture Overview + +Jan Server consists of two main components: + +- **API Gateway**: Go application handling authentication, web requests, and external integrations +- **Inference Model**: VLLM server running the Jan-v1-4B model for AI inference +- **PostgreSQL**: Database for user data, conversations, and system state + +## Key Features + +- **Kubernetes Native**: Deploys via Helm charts with minikube support +- **Jan-v1 Model**: 4B parameter model optimized for reasoning and tool use +- **OpenAI Compatible API**: Standard endpoints for integration +- **Authentication**: JWT tokens and OAuth2 Google integration +- **External Integrations**: Serper API for web search capabilities +- **Development Ready**: Local development environment with hot reload diff --git a/docs/src/pages/api-reference/installation.mdx b/docs/src/pages/api-reference/installation.mdx new file mode 100644 index 000000000..de0609a08 --- /dev/null +++ b/docs/src/pages/api-reference/installation.mdx @@ -0,0 +1,151 @@ +--- +title: Installation +description: Install and deploy Jan Server on Kubernetes using minikube and Helm. +--- + +## Prerequisites + +Jan Server requires the following tools installed on your system: + +- **Docker**: For building container images +- **minikube**: Local Kubernetes cluster for development +- **Helm**: Package manager for Kubernetes applications +- **kubectl**: Kubernetes command-line tool (installed with minikube) + +Jan Server currently supports minikube for local development. Production Kubernetes deployments are planned for future releases. + +## Quick Start + + +1. **Clone the repository** + ```bash + git clone https://github.com/menloresearch/jan-server + cd jan-server + ``` + +2. **Start minikube** + ```bash + minikube start + ``` + +3. **Configure Docker environment** + ```bash + eval $(minikube docker-env) + alias kubectl="minikube kubectl --" + ``` + +4. **Deploy Jan Server** + ```bash + ./scripts/run.sh + ``` + +5. **Access the API** + + The script automatically forwards port 8080. Access the Swagger UI at: + ``` + http://localhost:8080/api/swagger/index.html#/ + ``` + + +## Manual Installation + +### Build Docker Images + +Build both required Docker images: + +```bash +# Build API Gateway +docker build -t jan-api-gateway:latest ./apps/jan-api-gateway + +# Build Inference Model +docker build -t jan-inference-model:latest ./apps/jan-inference-model +``` + +The inference model image downloads the Jan-v1-4B model from Hugging Face during build. This requires an internet connection and several GB of download. + +### Deploy with Helm + +Install the Helm chart: + +```bash +# Update Helm dependencies +helm dependency update ./charts/umbrella-chart + +# Install Jan Server +helm install jan-server ./charts/umbrella-chart +``` + +### Port Forwarding + +Forward the API gateway port to access from your local machine: + +```bash +kubectl port-forward svc/jan-server-jan-api-gateway 8080:8080 +``` + +## Verify Installation + +Check that all pods are running: + +```bash +kubectl get pods +``` + +Expected output: +``` +NAME READY STATUS RESTARTS +jan-server-jan-api-gateway-xxx 1/1 Running 0 +jan-server-jan-inference-model-xxx 1/1 Running 0 +jan-server-postgresql-0 1/1 Running 0 +``` + +Test the API gateway: +```bash +curl http://localhost:8080/health +``` + +## Uninstalling + +To remove Jan Server: + +```bash +helm uninstall jan-server +``` + +To stop minikube: + +```bash +minikube stop +``` + +## Troubleshooting + +### Common Issues + +**Pods in `ImagePullBackOff` state** +- Ensure Docker images were built in the minikube environment +- Run `eval $(minikube docker-env)` before building images + +**Port forwarding connection refused** +- Verify the service is running: `kubectl get svc` +- Check pod status: `kubectl get pods` +- Review logs: `kubectl logs deployment/jan-server-jan-api-gateway` + +**Inference model download fails** +- Ensure internet connectivity during Docker build +- The Jan-v1-4B model is approximately 2.4GB + +### Resource Requirements + +**Minimum System Requirements:** +- 8GB RAM +- 20GB free disk space +- 4 CPU cores + +**Recommended System Requirements:** +- 16GB RAM +- 50GB free disk space +- 8 CPU cores +- GPU support (for faster inference) + +The inference model requires significant memory. Ensure your minikube cluster has adequate resources allocated. diff --git a/docs/src/pages/docs/_meta.json b/docs/src/pages/docs/_meta.json index 614c01e30..0a66403a4 100644 --- a/docs/src/pages/docs/_meta.json +++ b/docs/src/pages/docs/_meta.json @@ -1,55 +1,51 @@ { - "-- Switcher": { - "type": "separator", - "title": "Switcher" - }, - "index": "Overview", - "getting-started-separator": { - "title": "GETTING STARTED", - "type": "separator" - }, - "quickstart": "QuickStart", - "desktop": "Install 👋 Jan", - "jan-models": "Models", - "assistants": "Create Assistants", - "remote-models": "Cloud Providers", - "mcp-examples": "Tutorials", - - "explanation-separator": { - "title": "EXPLANATION", - "type": "separator" - }, - "llama-cpp": "Local AI Engine", - "model-parameters": "Model Parameters", - - "privacy-policy": { - "type": "page", - "display": "hidden", - "title": "Privacy Policy" - }, - - "advanced-separator": { - "title": "ADVANCED", - "type": "separator" - }, - "manage-models": "Manage Models", - "mcp": "Model Context Protocol", - - "localserver": { - "title": "LOCAL SERVER", - "type": "separator" - }, - "api-server": "Server Setup", - "llama-cpp-server": "LlamaCpp Server", - "server-settings": "Server Settings", - "server-troubleshooting": "Server Troubleshooting", - "server-examples": "Integrations", - "reference-separator": { - "title": "REFERENCE", - "type": "separator" - }, - "settings": "Settings", - "data-folder": "Jan Data Folder", - "troubleshooting": "Troubleshooting", - "privacy": "Privacy" + "-- Switcher": { + "type": "separator", + "title": "Switcher" + }, + "get-started-separator": { + "title": "Get started", + "type": "separator" + }, + "index": "Overview", + "quickstart": "Quickstart", + "desktop": "Install 👋 Jan", + "jan-models": "Models", + "remote-models": "Cloud Providers", + "mcp-examples": "Tutorials", + "coreconcepts-separator": { + "title": "Core concepts", + "type": "separator" + }, + "assistants": "Assistants", + "llama-cpp": "Local AI Engine", + "model-parameters": "Model Parameters", + "privacy-policy": { + "type": "page", + "display": "hidden", + "title": "Privacy Policy" + }, + "advanced-separator": { + "title": "ADVANCED", + "type": "separator" + }, + "manage-models": "Manage Models", + "mcp": "Model Context Protocol", + "localserver": { + "title": "LOCAL SERVER", + "type": "separator" + }, + "api-server": "Server Setup", + "llama-cpp-server": "LlamaCpp Server", + "server-settings": "Server Settings", + "server-troubleshooting": "Server Troubleshooting", + "server-examples": "Integrations", + "reference-separator": { + "title": "REFERENCE", + "type": "separator" + }, + "settings": "Settings", + "data-folder": "Jan Data Folder", + "troubleshooting": "Troubleshooting", + "privacy": "Privacy" } diff --git a/docs/src/pages/platforms/_meta.json b/docs/src/pages/platforms/_meta.json deleted file mode 100644 index bfee4c12e..000000000 --- a/docs/src/pages/platforms/_meta.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "-- Switcher": { - "type": "separator", - "title": "Switcher" - }, - "index": { - "display": "hidden" - } -} diff --git a/docs/src/pages/platforms/index.mdx b/docs/src/pages/platforms/index.mdx deleted file mode 100644 index 8ebaabe42..000000000 --- a/docs/src/pages/platforms/index.mdx +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Coming Soon -description: Exciting new features and platforms are on the way. Stay tuned for Jan Web, Jan Mobile, and our API Platform. -keywords: - [ - Jan, - Customizable Intelligence, LLM, - local AI, - privacy focus, - free and open source, - private and offline, - conversational AI, - no-subscription fee, - large language models, - coming soon, - Jan Web, - Jan Mobile, - API Platform, - ] ---- - -import { Callout } from 'nextra/components' - -
-
-

- 🚀 Coming Soon -

-

- We're working on the next stage of Jan - making our local assistant more powerful and available in more platforms. -

-
- -
-
-
🌐
-

Jan Web

-

- Access Jan directly from your browser with our powerful web interface -

-
- -
-
📱
-

Jan Mobile

-

- Take Jan on the go with our native mobile applications -

-
- -
-
-

API Platform

-

- Integrate Jan's capabilities into your applications with our API -

-
-
- - - **Stay Updated**: Follow our [GitHub repository](https://github.com/menloresearch/jan) and join our [Discord community](https://discord.com/invite/FTk2MvZwJH) for the latest updates on these exciting releases! - - -
-

What to Expect

-
-
- -
- Seamless Experience: Unified interface across all platforms -
-
-
- -
- Privacy First: Same privacy-focused approach you trust -
-
-
- -
- Developer Friendly: Robust APIs and comprehensive documentation -
-
-
-
-
diff --git a/docs/src/pages/server/_meta.json b/docs/src/pages/server/_meta.json new file mode 100644 index 000000000..151e650ea --- /dev/null +++ b/docs/src/pages/server/_meta.json @@ -0,0 +1,24 @@ +{ + "-- Switcher": { + "type": "separator", + "title": "Switcher" + }, + "get-started-separator": { + "title": "Get started", + "type": "separator" + }, + "index": "Overview", + "installation": "Installation", + "configuration": "Configuration", + "core-concepts-separator": { + "title": "Core concepts", + "type": "separator" + }, + "api-reference": "API Reference", + "resource-separator": { + "title": "Resources", + "type": "separator" + }, + "architecture": "Architecture", + "development": "Development" +} diff --git a/docs/src/pages/server/api-reference.mdx b/docs/src/pages/server/api-reference.mdx new file mode 100644 index 000000000..662127638 --- /dev/null +++ b/docs/src/pages/server/api-reference.mdx @@ -0,0 +1,378 @@ +--- +title: API Reference +description: Complete API documentation for Jan Server endpoints and OpenAI compatibility. +--- + +## Base URL + +All API endpoints are available at the API gateway base URL: + +``` +http://localhost:8080/api/v1 +``` + +The API gateway automatically forwards port 8080 when using the standard deployment scripts. + +## Authentication + +Jan Server supports multiple authentication methods: + +### JWT Token Authentication + +Include JWT token in the Authorization header: + +```bash +curl -H "Authorization: Bearer " \ + http://localhost:8080/api/v1/protected-endpoint +``` + +### API Key Authentication + +Include API key in the Authorization header: + +```bash +curl -H "Authorization: Bearer " \ + http://localhost:8080/api/v1/protected-endpoint +``` + +## OpenAI-Compatible Endpoints + +Jan Server implements OpenAI-compatible endpoints for seamless integration with existing tools. + +### Chat Completions + +**Endpoint**: `POST /api/v1/chat/completions` + +Standard OpenAI chat completions API for conversational AI. + +```bash +curl -X POST http://localhost:8080/api/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "model": "jan-v1-4b", + "messages": [ + {"role": "user", "content": "Hello, how are you?"} + ], + "max_tokens": 100, + "temperature": 0.7 + }' +``` + +**Parameters:** +- `model` (string): Model identifier (`jan-v1-4b`) +- `messages` (array): Conversation history +- `max_tokens` (integer): Maximum response tokens +- `temperature` (float): Response randomness (0.0 to 2.0) +- `stream` (boolean): Enable streaming responses + +### Model Information + +**Endpoint**: `GET /api/v1/models` + +List available models: + +```bash +curl http://localhost:8080/api/v1/models +``` + +**Response:** +```json +{ + "object": "list", + "data": [ + { + "id": "jan-v1-4b", + "object": "model", + "created": 1234567890, + "owned_by": "jan" + } + ] +} +``` + +### Completions (Text Generation) + +**Endpoint**: `POST /api/v1/completions` + +Text completion endpoint: + +```bash +curl -X POST http://localhost:8080/api/v1/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "model": "jan-v1-4b", + "prompt": "The meaning of life is", + "max_tokens": 50 + }' +``` + +## Authentication Endpoints + +### OAuth2 Google Login + +**Endpoint**: `GET /auth/google` + +Redirects to Google OAuth2 authorization: + +```bash +curl http://localhost:8080/auth/google +``` + +### OAuth2 Callback + +**Endpoint**: `GET /auth/google/callback` + +Handles OAuth2 callback and issues JWT token: + +``` +http://localhost:8080/auth/google/callback?code=&state= +``` + +### Token Refresh + +**Endpoint**: `POST /api/v1/auth/refresh` + +Refresh expired JWT tokens: + +```bash +curl -X POST http://localhost:8080/api/v1/auth/refresh \ + -H "Authorization: Bearer " +``` + +## User Management + +### User Profile + +**Endpoint**: `GET /api/v1/user/profile` + +Get current user profile: + +```bash +curl -H "Authorization: Bearer " \ + http://localhost:8080/api/v1/user/profile +``` + +### API Keys + +**Endpoint**: `POST /api/v1/user/api-keys` + +Generate new API key: + +```bash +curl -X POST http://localhost:8080/api/v1/user/api-keys \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Development Key", + "permissions": ["read", "write"] + }' +``` + +## Conversation Management + +### Create Conversation + +**Endpoint**: `POST /api/v1/conversations` + +Create new conversation: + +```bash +curl -X POST http://localhost:8080/api/v1/conversations \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "title": "My Conversation", + "model": "jan-v1-4b" + }' +``` + +### List Conversations + +**Endpoint**: `GET /api/v1/conversations` + +Get user's conversations: + +```bash +curl -H "Authorization: Bearer " \ + http://localhost:8080/api/v1/conversations +``` + +### Get Conversation + +**Endpoint**: `GET /api/v1/conversations/{id}` + +Get specific conversation with message history: + +```bash +curl -H "Authorization: Bearer " \ + http://localhost:8080/api/v1/conversations/123 +``` + +## Health and Status + +### Health Check + +**Endpoint**: `GET /health` + +Basic health check: + +```bash +curl http://localhost:8080/health +``` + +**Response:** +```json +{ + "status": "ok", + "timestamp": "2024-01-01T12:00:00Z" +} +``` + +### System Status + +**Endpoint**: `GET /api/v1/status` + +Detailed system status: + +```bash +curl -H "Authorization: Bearer " \ + http://localhost:8080/api/v1/status +``` + +**Response:** +```json +{ + "api_gateway": "healthy", + "inference_model": "healthy", + "database": "healthy", + "external_apis": { + "serper": "healthy" + } +} +``` + +## Error Responses + +Jan Server returns standard HTTP status codes and JSON error responses: + +```json +{ + "error": { + "message": "Invalid request format", + "type": "invalid_request_error", + "code": "invalid_json" + } +} +``` + +### Common Error Codes + +| Status Code | Description | +|-------------|-------------| +| `400` | Bad Request - Invalid request format | +| `401` | Unauthorized - Invalid or missing authentication | +| `403` | Forbidden - Insufficient permissions | +| `404` | Not Found - Resource not found | +| `429` | Too Many Requests - Rate limit exceeded | +| `500` | Internal Server Error - Server error | +| `503` | Service Unavailable - Service temporarily unavailable | + +## Interactive Documentation + +Jan Server provides interactive Swagger documentation at: + +``` +http://localhost:8080/api/swagger/index.html#/ +``` + +This interface allows you to: +- Browse all available endpoints +- Test API calls directly from the browser +- View request/response schemas +- Generate code samples + +The Swagger documentation is auto-generated from Go code annotations and provides the most up-to-date API reference. + +## Rate Limiting + +API endpoints implement rate limiting to prevent abuse: + +- **Authenticated requests**: 1000 requests per hour per user +- **Unauthenticated requests**: 100 requests per hour per IP +- **Model inference**: 60 requests per minute per user + +Rate limit headers are included in responses: +``` +X-RateLimit-Limit: 1000 +X-RateLimit-Remaining: 999 +X-RateLimit-Reset: 1609459200 +``` + +## SDK and Client Libraries + +### JavaScript/Node.js + +Use the OpenAI JavaScript SDK with Jan Server: + +```javascript +import OpenAI from 'openai'; + +const openai = new OpenAI({ + baseURL: 'http://localhost:8080/api/v1', + apiKey: 'your-jwt-token' +}); + +const completion = await openai.chat.completions.create({ + model: 'jan-v1-4b', + messages: [ + { role: 'user', content: 'Hello!' } + ] +}); +``` + +### Python + +Use the OpenAI Python SDK: + +```python +import openai + +openai.api_base = "http://localhost:8080/api/v1" +openai.api_key = "your-jwt-token" + +response = openai.ChatCompletion.create( + model="jan-v1-4b", + messages=[ + {"role": "user", "content": "Hello!"} + ] +) +``` + +### cURL Examples + +Complete cURL examples for common operations: + +```bash +# Get models +curl http://localhost:8080/api/v1/models + +# Chat completion +curl -X POST http://localhost:8080/api/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jan-v1-4b", + "messages": [{"role": "user", "content": "Hello"}] + }' + +# Streaming chat completion +curl -X POST http://localhost:8080/api/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "jan-v1-4b", + "messages": [{"role": "user", "content": "Tell me a story"}], + "stream": true + }' \ + --no-buffer +``` diff --git a/docs/src/pages/server/architecture.mdx b/docs/src/pages/server/architecture.mdx new file mode 100644 index 000000000..03dc1b363 --- /dev/null +++ b/docs/src/pages/server/architecture.mdx @@ -0,0 +1,191 @@ +--- +title: Architecture +description: Technical architecture and system design of Jan Server components. +--- + +## System Overview + +Jan Server implements a microservices architecture on Kubernetes with three core components communicating over HTTP and managed by Helm charts. + +```mermaid +graph TD + Client[Client/Browser] --> Gateway[jan-api-gateway:8080] + Gateway --> Model[jan-inference-model:8101] + Gateway --> DB[(PostgreSQL:5432)] + Gateway --> Serper[Serper API] + Gateway --> OAuth[Google OAuth2] +``` + +## Components + +### API Gateway (`jan-api-gateway`) + +**Technology Stack:** +- **Language**: Go 1.24.6 +- **Framework**: Gin web framework +- **ORM**: GORM with PostgreSQL driver +- **DI**: Google Wire for dependency injection +- **Documentation**: Swagger/OpenAPI auto-generated + +**Responsibilities:** +- HTTP request routing and middleware +- User authentication via JWT and OAuth2 +- Database operations and data persistence +- External API integration (Serper, Google OAuth) +- OpenAI-compatible API endpoints +- Request forwarding to inference service + +**Key Directories:** +``` +application/ +├── cmd/server/ # Main entry point and DI wiring +├── app/ # Core business logic +├── config/ # Environment variables and settings +└── docs/ # Auto-generated Swagger docs +``` + +### Inference Model (`jan-inference-model`) + +**Technology Stack:** +- **Base Image**: VLLM OpenAI v0.10.0 +- **Model**: Jan-v1-4B (downloaded from Hugging Face) +- **Protocol**: OpenAI-compatible HTTP API +- **Features**: Tool calling, reasoning parsing + +**Configuration:** +- **Model Path**: `/models/Jan-v1-4B` +- **Served Name**: `jan-v1-4b` +- **Port**: 8101 +- **Batch Tokens**: 1024 max +- **Tool Parser**: Hermes +- **Reasoning Parser**: Qwen3 + +**Capabilities:** +- Text generation and completion +- Tool calling and function execution +- Multi-turn conversations +- Reasoning and chain-of-thought + +### Database (PostgreSQL) + +**Configuration:** +- **Database**: `jan` +- **User**: `jan-user` +- **Password**: `jan-password` +- **Port**: 5432 + +**Schema:** +- User accounts and authentication +- Conversation history +- Project and organization management +- API keys and access control + +## Data Flow + +### Request Processing + +1. **Client Request**: HTTP request to API gateway on port 8080 +2. **Authentication**: JWT token validation or OAuth2 flow +3. **Request Routing**: Gateway routes to appropriate handler +4. **Database Operations**: GORM queries for user data/state +5. **Inference Call**: HTTP request to model service on port 8101 +6. **Response Assembly**: Gateway combines results and returns to client + +### Authentication Flow + +**JWT Authentication:** +1. User provides credentials +2. Gateway validates against database +3. JWT token issued with HMAC-SHA256 signing +4. Subsequent requests include JWT in Authorization header + +**OAuth2 Flow:** +1. Client redirected to Google OAuth2 +2. Authorization code returned to redirect URL +3. Gateway exchanges code for access token +4. User profile retrieved from Google +5. Local JWT token issued + +## Deployment Architecture + +### Kubernetes Resources + +**Deployments:** +- `jan-api-gateway`: Single replica Go application +- `jan-inference-model`: Single replica VLLM server +- `postgresql`: StatefulSet with persistent storage + +**Services:** +- `jan-api-gateway`: ClusterIP exposing port 8080 +- `jan-inference-model`: ClusterIP exposing port 8101 +- `postgresql`: ClusterIP exposing port 5432 + +**Configuration:** +- Environment variables via Helm values +- Secrets for sensitive data (JWT keys, OAuth credentials) +- ConfigMaps for application settings + +### Helm Chart Structure + +``` +charts/ +├── umbrella-chart/ # Main deployment chart +│ ├── Chart.yaml +│ ├── values.yaml # Configuration values +│ └── Chart.lock +└── apps-charts/ # Individual service charts + ├── jan-api-gateway/ + └── jan-inference-model/ +``` + +## Security Architecture + +### Authentication Methods +- **JWT Tokens**: HMAC-SHA256 signed tokens for API access +- **OAuth2**: Google OAuth2 integration for user login +- **API Keys**: HMAC-SHA256 signed keys for service access + +### Network Security +- **Internal Communication**: Services communicate over Kubernetes cluster network +- **External Access**: Only API gateway exposed via port forwarding or ingress +- **Database Access**: PostgreSQL accessible only within cluster + +### Data Security +- **Secrets Management**: Kubernetes secrets for sensitive configuration +- **Environment Variables**: Non-sensitive config via environment variables +- **Database Encryption**: Standard PostgreSQL encryption at rest + +Production deployments should implement additional security measures including TLS termination, network policies, and secret rotation. + +## Scalability Considerations + +**Current Limitations:** +- Single replica deployments +- No horizontal pod autoscaling +- Local storage for database + +**Future Enhancements:** +- Multi-replica API gateway with load balancing +- Horizontal pod autoscaling based on CPU/memory +- External database with clustering +- Redis caching layer +- Message queue for async processing + +## Development Architecture + +### Code Generation +- **Swagger**: API documentation generated from Go annotations +- **Wire**: Dependency injection code generated from providers +- **GORM Gen**: Database model generation from schema + +### Build Process +1. **API Gateway**: Multi-stage Docker build with Go compilation +2. **Inference Model**: Base VLLM image with model download +3. **Helm Charts**: Dependency management and templating +4. **Documentation**: Auto-generation during development + +### Local Development +- **Hot Reload**: Source code changes reflected without full rebuild +- **Database Migrations**: Automated schema updates +- **API Testing**: Swagger UI for interactive testing +- **Logging**: Structured logging with configurable levels diff --git a/docs/src/pages/server/configuration.mdx b/docs/src/pages/server/configuration.mdx new file mode 100644 index 000000000..cbcba57fd --- /dev/null +++ b/docs/src/pages/server/configuration.mdx @@ -0,0 +1,263 @@ +--- +title: Configuration +description: Configure Jan Server environment variables, authentication, and external integrations. +--- + +## Environment Variables + +Jan Server configuration is managed through environment variables defined in the Helm values file at `charts/umbrella-chart/values.yaml`. + +### API Gateway Configuration + +#### Core Settings + +| Variable | Default | Description | +|----------|---------|-------------| +| `JAN_INFERENCE_MODEL_URL` | `http://jan-server-jan-inference-model:8101` | Internal URL for inference service | + +#### Authentication + +| Variable | Purpose | Format | +|----------|---------|--------| +| `JWT_SECRET` | JWT token signing | Base64 encoded HMAC-SHA256 key | +| `APIKEY_SECRET` | API key signing | Base64 encoded HMAC-SHA256 key | + +The default JWT and API key secrets are for development only. Generate new secrets for production deployments. + +#### OAuth2 Integration + +| Variable | Description | +|----------|-------------| +| `OAUTH2_GOOGLE_CLIENT_ID` | Google OAuth2 application client ID | +| `OAUTH2_GOOGLE_CLIENT_SECRET` | Google OAuth2 application secret | +| `OAUTH2_GOOGLE_REDIRECT_URL` | Callback URL for OAuth2 flow | + +#### External APIs + +| Variable | Provider | Purpose | +|----------|----------|---------| +| `SERPER_API_KEY` | Serper | Web search integration | + +#### Database Connection + +| Variable | Default | Description | +|----------|---------|-------------| +| `DB_POSTGRESQL_WRITE_DSN` | `host=jan-server-postgresql user=jan-user password=jan-password dbname=jan port=5432 sslmode=disable` | Write database connection | +| `DB_POSTGRESQL_READ1_DSN` | `host=jan-server-postgresql user=jan-user password=jan-password dbname=jan port=5432 sslmode=disable` | Read database connection | + +## Helm Configuration + +### Updating Values + +Edit the configuration in `charts/umbrella-chart/values.yaml`: + +```yaml +jan-api-gateway: + env: + - name: SERPER_API_KEY + value: your_serper_api_key + - name: OAUTH2_GOOGLE_CLIENT_ID + value: your_google_client_id + - name: OAUTH2_GOOGLE_CLIENT_SECRET + value: your_google_client_secret +``` + +### Applying Changes + +After modifying values, redeploy the application: + +```bash +helm upgrade jan-server ./charts/umbrella-chart +``` + +## Authentication Setup + +### JWT Tokens + +Generate a secure JWT signing key: + +```bash +# Generate 256-bit key for HMAC-SHA256 +openssl rand -base64 32 +``` + +Update the `JWT_SECRET` value in your Helm configuration. + +### API Keys + +Generate a secure API key signing secret: + +```bash +# Generate 256-bit key for HMAC-SHA256 +openssl rand -base64 32 +``` + +Update the `APIKEY_SECRET` value in your Helm configuration. + +### Google OAuth2 + +1. **Create Google Cloud Project** + - Go to [Google Cloud Console](https://console.cloud.google.com) + - Create a new project or select existing + +2. **Enable OAuth2** + - Navigate to "APIs & Services" > "Credentials" + - Create OAuth2 client ID credentials + - Set application type to "Web application" + +3. **Configure Redirect URI** + ``` + http://localhost:8080/auth/google/callback + ``` + +4. **Update Configuration** + - Set `OAUTH2_GOOGLE_CLIENT_ID` to your client ID + - Set `OAUTH2_GOOGLE_CLIENT_SECRET` to your client secret + - Set `OAUTH2_GOOGLE_REDIRECT_URL` to your callback URL + +## External Integrations + +### Serper API + +Jan Server integrates with Serper for web search capabilities. + +1. **Get API Key** + - Register at [serper.dev](https://serper.dev) + - Generate API key from dashboard + +2. **Configure** + - Set `SERPER_API_KEY` in Helm values + - Redeploy the application + +### Adding New Integrations + +To add new external API integrations: + +1. **Update Helm Values** + ```yaml + jan-api-gateway: + env: + - name: YOUR_API_KEY + value: your_api_key_value + ``` + +2. **Update Go Configuration** + + Add to `config/environment_variables/env.go`: + ```go + YourAPIKey string `env:"YOUR_API_KEY"` + ``` + +3. **Redeploy** + ```bash + helm upgrade jan-server ./charts/umbrella-chart + ``` + +## Database Configuration + +### Connection Settings + +The default PostgreSQL configuration uses: +- **Host**: `jan-server-postgresql` (Kubernetes service name) +- **Database**: `jan` +- **User**: `jan-user` +- **Password**: `jan-password` +- **Port**: `5432` +- **SSL**: Disabled (development only) + +### Production Database + +For production deployments: + +1. **External Database** + - Use managed PostgreSQL service (AWS RDS, Google Cloud SQL) + - Update DSN variables with external connection details + +2. **SSL/TLS** + - Enable `sslmode=require` in connection strings + - Configure certificate validation + +3. **Connection Pooling** + - Consider using connection pooler (PgBouncer, pgpool-II) + - Configure appropriate pool sizes + +## Model Configuration + +The inference model service is configured via Docker CMD parameters: + +```dockerfile +CMD ["--model", "/models/Jan-v1-4B", \ + "--served-model-name", "jan-v1-4b", \ + "--host", "0.0.0.0", \ + "--port", "8101", \ + "--max-num-batched-tokens", "1024", \ + "--enable-auto-tool-choice", \ + "--tool-call-parser", "hermes", \ + "--reasoning-parser", "qwen3"] +``` + +### Model Parameters + +| Parameter | Value | Description | +|-----------|-------|-------------| +| `--model` | `/models/Jan-v1-4B` | Path to model files | +| `--served-model-name` | `jan-v1-4b` | API model identifier | +| `--max-num-batched-tokens` | `1024` | Maximum tokens per batch | +| `--tool-call-parser` | `hermes` | Tool calling format | +| `--reasoning-parser` | `qwen3` | Reasoning output format | + +Model configuration changes require rebuilding the inference Docker image. This will be configurable via environment variables in future releases. + +## Resource Configuration + +### Kubernetes Resources + +Current deployments use default resource limits. For production: + +```yaml +jan-api-gateway: + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 512Mi + +jan-inference-model: + resources: + requests: + cpu: 1000m + memory: 4Gi + limits: + cpu: 4000m + memory: 8Gi +``` + +### Storage + +PostgreSQL uses default Kubernetes storage. For production: + +```yaml +postgresql: + persistence: + enabled: true + size: 20Gi + storageClass: fast-ssd +``` + +## Logging Configuration + +Configure logging levels via environment variables: + +```yaml +jan-api-gateway: + env: + - name: LOG_LEVEL + value: info + - name: LOG_FORMAT + value: json +``` + +Available log levels: `debug`, `info`, `warn`, `error` +Available formats: `text`, `json` diff --git a/docs/src/pages/server/development.mdx b/docs/src/pages/server/development.mdx new file mode 100644 index 000000000..c773a2891 --- /dev/null +++ b/docs/src/pages/server/development.mdx @@ -0,0 +1,445 @@ +--- +title: Development +description: Development setup, workflow, and contribution guidelines for Jan Server. +--- + +## Development Setup + +### Prerequisites + +- **Go**: 1.24.6 or later +- **Docker**: For containerization +- **minikube**: Local Kubernetes development +- **Helm**: Package management +- **Make**: Build automation + +### Initial Setup + + +1. **Clone Repository** + ```bash + git clone https://github.com/menloresearch/jan-server + cd jan-server + ``` + +2. **Install Development Tools** + ```bash + cd apps/jan-api-gateway/application + make install + ``` + +3. **Generate Code** + ```bash + make setup + ``` + +4. **Start Development Environment** + ```bash + # From project root + ./scripts/run.sh + ``` + +## API Gateway Development + +### Project Structure + +``` +apps/jan-api-gateway/application/ +├── cmd/server/ # Entry point and dependency injection +│ ├── server.go # Main server setup +│ ├── wire.go # DI configuration +│ └── wire_gen.go # Generated DI code +├── app/ # Core application logic +│ ├── domain/ # Business entities +│ ├── repository/ # Data access layer +│ ├── service/ # Business logic +│ └── handler/ # HTTP handlers +├── config/ # Configuration management +└── docs/ # Generated API documentation +``` + +### Build Commands + +```bash +# Install development dependencies +make install + +# Generate API documentation +make doc + +# Generate dependency injection code +make wire + +# Complete setup (doc + wire) +make setup + +# Build application +go build -o jan-api-gateway ./cmd/server +``` + +### Code Generation + +Jan Server uses code generation for several components: + +**Swagger Documentation:** +```bash +# Generates docs/swagger.json and docs/swagger.yaml +swag init --parseDependency -g cmd/server/server.go -o docs +``` + +**Dependency Injection:** +```bash +# Generates wire_gen.go from wire.go providers +wire ./cmd/server +``` + +**Database Models:** +```bash +# Generate GORM models (when schema changes) +go run cmd/codegen/gorm/gorm.go +``` + +### Local Development + +#### Running API Gateway Locally + +```bash +cd apps/jan-api-gateway/application + +# Set environment variables +export JAN_INFERENCE_MODEL_URL=http://localhost:8101 +export JWT_SECRET=your-jwt-secret +export DB_POSTGRESQL_WRITE_DSN="host=localhost user=jan-user password=jan-password dbname=jan port=5432 sslmode=disable" + +# Run the server +go run ./cmd/server +``` + +#### Database Setup + +For local development, you can run PostgreSQL directly: + +```bash +# Using Docker +docker run -d \ + --name jan-postgres \ + -e POSTGRES_DB=jan \ + -e POSTGRES_USER=jan-user \ + -e POSTGRES_PASSWORD=jan-password \ + -p 5432:5432 \ + postgres:14 +``` + +## Testing + +### Running Tests + +```bash +# Run all tests +go test ./... + +# Run tests with coverage +go test -cover ./... + +# Run specific test package +go test ./app/service/... +``` + +### Test Structure + +``` +app/ +├── service/ +│ ├── auth_service.go +│ ├── auth_service_test.go +│ ├── conversation_service.go +│ └── conversation_service_test.go +└── handler/ + ├── auth_handler.go + ├── auth_handler_test.go + ├── chat_handler.go + └── chat_handler_test.go +``` + +### Writing Tests + +Example service test: + +```go +func TestAuthService_ValidateToken(t *testing.T) { + // Setup + service := NewAuthService(mockRepo, mockConfig) + + // Test cases + tests := []struct { + name string + token string + expectValid bool + expectError bool + }{ + {"valid token", "valid.jwt.token", true, false}, + {"invalid token", "invalid.token", false, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + valid, err := service.ValidateToken(tt.token) + assert.Equal(t, tt.expectValid, valid) + assert.Equal(t, tt.expectError, err != nil) + }) + } +} +``` + +## Docker Development + +### Building Images + +```bash +# Build API gateway +docker build -t jan-api-gateway:dev ./apps/jan-api-gateway + +# Build inference model +docker build -t jan-inference-model:dev ./apps/jan-inference-model +``` + +### Development Compose + +For local development without Kubernetes: + +```yaml +# docker-compose.dev.yml +version: '3.8' +services: + postgres: + image: postgres:14 + environment: + POSTGRES_DB: jan + POSTGRES_USER: jan-user + POSTGRES_PASSWORD: jan-password + ports: + - "5432:5432" + + api-gateway: + build: ./apps/jan-api-gateway + ports: + - "8080:8080" + environment: + - JAN_INFERENCE_MODEL_URL=http://inference-model:8101 + - DB_POSTGRESQL_WRITE_DSN=host=postgres user=jan-user password=jan-password dbname=jan port=5432 sslmode=disable + depends_on: + - postgres + + inference-model: + build: ./apps/jan-inference-model + ports: + - "8101:8101" +``` + +## Debugging + +### Go Debugging + +For VS Code debugging, add to `.vscode/launch.json`: + +```json +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Launch Jan API Gateway", + "type": "go", + "request": "launch", + "mode": "auto", + "program": "${workspaceFolder}/apps/jan-api-gateway/application/cmd/server", + "env": { + "JAN_INFERENCE_MODEL_URL": "http://localhost:8101", + "JWT_SECRET": "development-secret" + } + } + ] +} +``` + +### Application Logs + +```bash +# View API gateway logs +kubectl logs deployment/jan-server-jan-api-gateway -f + +# View inference model logs +kubectl logs deployment/jan-server-jan-inference-model -f + +# View PostgreSQL logs +kubectl logs statefulset/jan-server-postgresql -f +``` + +### Log Levels + +Set log level via environment variable: + +```bash +export LOG_LEVEL=debug # debug, info, warn, error +``` + +## Code Style and Standards + +### Go Standards + +- Follow [Go Code Review Comments](https://go.dev/wiki/CodeReviewComments) +- Use `gofmt` for formatting +- Run `go vet` for static analysis +- Use meaningful variable and function names + +### API Standards + +- RESTful endpoint design +- OpenAPI/Swagger annotations for all endpoints +- Consistent error response format +- Proper HTTP status codes + +### Git Workflow + +```bash +# Create feature branch +git checkout -b feature/your-feature-name + +# Make changes and commit +git add . +git commit -m "feat: add new authentication endpoint" + +# Push and create PR +git push origin feature/your-feature-name +``` + +### Commit Message Format + +Follow conventional commits: + +``` +feat: add new feature +fix: resolve bug in authentication +docs: update API documentation +test: add unit tests for service layer +refactor: improve error handling +``` + +## Performance Testing + +### Load Testing + +Use [k6](https://k6.io) for API load testing: + +```javascript +// load-test.js +import http from 'k6/http'; + +export default function () { + const response = http.post('http://localhost:8080/api/v1/chat/completions', { + model: 'jan-v1-4b', + messages: [ + { role: 'user', content: 'Hello!' } + ] + }, { + headers: { + 'Content-Type': 'application/json', + 'Authorization': 'Bearer your-token' + } + }); + + check(response, { + 'status is 200': (r) => r.status === 200, + 'response time < 5000ms': (r) => r.timings.duration < 5000, + }); +} +``` + +Run load test: +```bash +k6 run --vus 10 --duration 30s load-test.js +``` + +### Memory Profiling + +Enable Go profiling endpoints: + +```go +import _ "net/http/pprof" + +// In main.go +go func() { + log.Println(http.ListenAndServe("localhost:6060", nil)) +}() +``` + +Profile memory usage: +```bash +go tool pprof http://localhost:6060/debug/pprof/heap +``` + +## Contributing + +### Pull Request Process + +1. **Fork the repository** +2. **Create feature branch** from `main` +3. **Make changes** following code standards +4. **Add tests** for new functionality +5. **Update documentation** if needed +6. **Submit pull request** with clear description + +### Code Review Checklist + +- [ ] Code follows Go standards +- [ ] Tests added for new features +- [ ] Documentation updated +- [ ] API endpoints have Swagger annotations +- [ ] No breaking changes without version bump +- [ ] Security considerations addressed + +### Issues and Bug Reports + +When reporting bugs, include: + +- **Environment**: OS, Go version, minikube version +- **Steps to reproduce**: Clear, minimal reproduction steps +- **Expected behavior**: What should happen +- **Actual behavior**: What actually happens +- **Logs**: Relevant error messages or logs + +For security issues, please report privately to the maintainers instead of creating public issues. + +## Release Process + +### Version Management + +Jan Server uses semantic versioning (semver): + +- **Major**: Breaking changes +- **Minor**: New features, backward compatible +- **Patch**: Bug fixes, backward compatible + +### Building Releases + +```bash +# Tag release +git tag -a v1.2.3 -m "Release v1.2.3" + +# Build release images +docker build -t jan-api-gateway:v1.2.3 ./apps/jan-api-gateway +docker build -t jan-inference-model:v1.2.3 ./apps/jan-inference-model + +# Push tags +git push origin v1.2.3 +``` + +### Deployment + +Production deployments follow the same Helm chart structure: + +```bash +# Deploy specific version +helm install jan-server ./charts/umbrella-chart \ + --set jan-api-gateway.image.tag=v1.2.3 \ + --set jan-inference-model.image.tag=v1.2.3 +``` diff --git a/docs/src/pages/server/index.mdx b/docs/src/pages/server/index.mdx new file mode 100644 index 000000000..de595e5dc --- /dev/null +++ b/docs/src/pages/server/index.mdx @@ -0,0 +1,39 @@ +--- +title: Jan Server +description: Self-hosted AI infrastructure running the Jan platform on Kubernetes. +keywords: + [ + Jan Server, + self-hosted AI, + Kubernetes deployment, + Docker containers, + AI inference, + local LLM server, + VLLM, + Go API gateway, + Jan-v1 model + ] +--- + +## Self-Hosted Jan Platform + +Jan Server deploys the Jan AI platform on your own infrastructure using Kubernetes. It provides a complete AI inference stack with API gateway, model serving, and data persistence. + +Jan Server is in early development. APIs and deployment methods may change. + +## Architecture Overview + +Jan Server consists of two main components: + +- **API Gateway**: Go application handling authentication, web requests, and external integrations +- **Inference Model**: VLLM server running the Jan-v1-4B model for AI inference +- **PostgreSQL**: Database for user data, conversations, and system state + +## Key Features + +- **Kubernetes Native**: Deploys via Helm charts with minikube support +- **Jan-v1 Model**: 4B parameter model optimized for reasoning and tool use +- **OpenAI Compatible API**: Standard endpoints for integration +- **Authentication**: JWT tokens and OAuth2 Google integration +- **External Integrations**: Serper API for web search capabilities +- **Development Ready**: Local development environment with hot reload diff --git a/docs/src/pages/server/installation.mdx b/docs/src/pages/server/installation.mdx new file mode 100644 index 000000000..de0609a08 --- /dev/null +++ b/docs/src/pages/server/installation.mdx @@ -0,0 +1,151 @@ +--- +title: Installation +description: Install and deploy Jan Server on Kubernetes using minikube and Helm. +--- + +## Prerequisites + +Jan Server requires the following tools installed on your system: + +- **Docker**: For building container images +- **minikube**: Local Kubernetes cluster for development +- **Helm**: Package manager for Kubernetes applications +- **kubectl**: Kubernetes command-line tool (installed with minikube) + +Jan Server currently supports minikube for local development. Production Kubernetes deployments are planned for future releases. + +## Quick Start + + +1. **Clone the repository** + ```bash + git clone https://github.com/menloresearch/jan-server + cd jan-server + ``` + +2. **Start minikube** + ```bash + minikube start + ``` + +3. **Configure Docker environment** + ```bash + eval $(minikube docker-env) + alias kubectl="minikube kubectl --" + ``` + +4. **Deploy Jan Server** + ```bash + ./scripts/run.sh + ``` + +5. **Access the API** + + The script automatically forwards port 8080. Access the Swagger UI at: + ``` + http://localhost:8080/api/swagger/index.html#/ + ``` + + +## Manual Installation + +### Build Docker Images + +Build both required Docker images: + +```bash +# Build API Gateway +docker build -t jan-api-gateway:latest ./apps/jan-api-gateway + +# Build Inference Model +docker build -t jan-inference-model:latest ./apps/jan-inference-model +``` + +The inference model image downloads the Jan-v1-4B model from Hugging Face during build. This requires an internet connection and several GB of download. + +### Deploy with Helm + +Install the Helm chart: + +```bash +# Update Helm dependencies +helm dependency update ./charts/umbrella-chart + +# Install Jan Server +helm install jan-server ./charts/umbrella-chart +``` + +### Port Forwarding + +Forward the API gateway port to access from your local machine: + +```bash +kubectl port-forward svc/jan-server-jan-api-gateway 8080:8080 +``` + +## Verify Installation + +Check that all pods are running: + +```bash +kubectl get pods +``` + +Expected output: +``` +NAME READY STATUS RESTARTS +jan-server-jan-api-gateway-xxx 1/1 Running 0 +jan-server-jan-inference-model-xxx 1/1 Running 0 +jan-server-postgresql-0 1/1 Running 0 +``` + +Test the API gateway: +```bash +curl http://localhost:8080/health +``` + +## Uninstalling + +To remove Jan Server: + +```bash +helm uninstall jan-server +``` + +To stop minikube: + +```bash +minikube stop +``` + +## Troubleshooting + +### Common Issues + +**Pods in `ImagePullBackOff` state** +- Ensure Docker images were built in the minikube environment +- Run `eval $(minikube docker-env)` before building images + +**Port forwarding connection refused** +- Verify the service is running: `kubectl get svc` +- Check pod status: `kubectl get pods` +- Review logs: `kubectl logs deployment/jan-server-jan-api-gateway` + +**Inference model download fails** +- Ensure internet connectivity during Docker build +- The Jan-v1-4B model is approximately 2.4GB + +### Resource Requirements + +**Minimum System Requirements:** +- 8GB RAM +- 20GB free disk space +- 4 CPU cores + +**Recommended System Requirements:** +- 16GB RAM +- 50GB free disk space +- 8 CPU cores +- GPU support (for faster inference) + +The inference model requires significant memory. Ensure your minikube cluster has adequate resources allocated. diff --git a/docs/theme.config.tsx b/docs/theme.config.tsx index 148079484..3b046733a 100644 --- a/docs/theme.config.tsx +++ b/docs/theme.config.tsx @@ -1,25 +1,25 @@ -import React, { Fragment } from 'react' -import { useConfig, DocsThemeConfig } from 'nextra-theme-docs' -import LogoMark from '@/components/LogoMark' -import FooterMenu from '@/components/FooterMenu' -import JSONLD from '@/components/JSONLD' -import { useRouter } from 'next/router' -import Link from 'next/link' -import { LibraryBig, Blocks, BrainCircuit, Computer } from 'lucide-react' -import { AiOutlineGithub } from 'react-icons/ai' -import { BiLogoDiscordAlt } from 'react-icons/bi' -import { RiTwitterXFill } from 'react-icons/ri' +import React, { Fragment } from "react"; +import { useConfig, DocsThemeConfig } from "nextra-theme-docs"; +import LogoMark from "@/components/LogoMark"; +import FooterMenu from "@/components/FooterMenu"; +import JSONLD from "@/components/JSONLD"; +import { useRouter } from "next/router"; +import Link from "next/link"; +import { LibraryBig, Blocks, BrainCircuit, Computer } from "lucide-react"; +import { AiOutlineGithub } from "react-icons/ai"; +import { BiLogoDiscordAlt } from "react-icons/bi"; +import { RiTwitterXFill } from "react-icons/ri"; -const defaultUrl = 'https://jan.ai' -const defaultImage = 'https://jan.ai/assets/images/general/og-image.png' +const defaultUrl = "https://jan.ai"; +const defaultImage = "https://jan.ai/assets/images/general/og-image.png"; const structuredData = { - '@context': 'https://schema.org', - '@type': 'Organization', - 'name': 'Jan', - 'url': `${defaultUrl}`, - 'logo': `${defaultImage}`, -} + "@context": "https://schema.org", + "@type": "Organization", + name: "Jan", + url: `${defaultUrl}`, + logo: `${defaultImage}`, +}; const config: DocsThemeConfig = { logo: ( @@ -30,25 +30,25 @@ const config: DocsThemeConfig = { ), - docsRepositoryBase: 'https://github.com/menloresearch/jan/tree/dev/docs', + docsRepositoryBase: "https://github.com/menloresearch/jan/tree/dev/docs", feedback: { - content: 'Question? Give us feedback →', - labels: 'feedback', + content: "Question? Give us feedback →", + labels: "feedback", }, editLink: { - text: 'Edit this page on GitHub →', + text: "Edit this page on GitHub →", }, useNextSeoProps() { return { - titleTemplate: '%s - Jan', + titleTemplate: "%s - Jan", twitter: { - cardType: 'summary_large_image', - site: '@jandotai', + cardType: "summary_large_image", + site: "@jandotai", }, openGraph: { - type: 'website', + type: "website", }, - } + }; }, navbar: { extraContent: ( @@ -68,23 +68,21 @@ const config: DocsThemeConfig = { sidebar: { titleComponent: ({ type, title }) => { // eslint-disable-next-line react-hooks/rules-of-hooks - const { asPath } = useRouter() - if (type === 'separator' && title === 'Switcher') { + const { asPath } = useRouter(); + if (type === "separator" && title === "Switcher") { return (
{[ - { title: 'Jan Desktop', path: '/docs', Icon: LibraryBig }, { - title: 'Jan Mobile', - path: '/platforms', - Icon: BrainCircuit, - }, - // { title: 'Jan Mobile', path: '/platforms', Icon: Blocks }, - { - title: 'Jan Server', - path: '/platforms', + title: "Jan Server", + path: "/server", Icon: Computer, }, + { + title: "Jan Desktop & Mobile", + path: "/docs", + Icon: LibraryBig, + }, ].map((item) => asPath.startsWith(item.path) ? (
{item.title} - ) + ), )}
- ) + ); } - return title + return title; }, defaultMenuCollapseLevel: 1, toggleButton: true, @@ -117,9 +115,9 @@ const config: DocsThemeConfig = { backToTop: true, }, head: function useHead() { - const { title, frontMatter } = useConfig() - const titleTemplate = (frontMatter?.title || title) + ' - ' + 'Jan' - const { asPath } = useRouter() + const { title, frontMatter } = useConfig(); + const titleTemplate = (frontMatter?.title || title) + " - " + "Jan"; + const { asPath } = useRouter(); return ( @@ -143,20 +141,20 @@ const config: DocsThemeConfig = { /> @@ -164,31 +162,31 @@ const config: DocsThemeConfig = { name="keywords" content={ frontMatter?.keywords?.map((keyword: string) => keyword) || [ - 'Jan', - 'Customizable Intelligence, LLM', - 'local AI', - 'privacy focus', - 'free and open source', - 'private and offline', - 'conversational AI', - 'no-subscription fee', - 'large language models', - 'build in public', - 'remote team', - 'how we work', + "Jan", + "Customizable Intelligence, LLM", + "local AI", + "privacy focus", + "free and open source", + "private and offline", + "conversational AI", + "no-subscription fee", + "large language models", + "build in public", + "remote team", + "how we work", ] } /> - ) + ); }, footer: { text: , }, nextThemes: { - defaultTheme: 'light', + defaultTheme: "light", }, -} +}; -export default config +export default config;