diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..626303c91 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +# Jan inference +# jan-inference/llm/models/** +jan-inference/llm/.env \ No newline at end of file diff --git a/jan-inference/llm/.env.example b/jan-inference/llm/.env.example new file mode 100644 index 000000000..40a331b36 --- /dev/null +++ b/jan-inference/llm/.env.example @@ -0,0 +1,2 @@ +MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin +MODEL_FILE=$(basename $MODEL_URL) \ No newline at end of file diff --git a/jan-inference/llm/README.md b/jan-inference/llm/README.md new file mode 100644 index 000000000..3183a8fb6 --- /dev/null +++ b/jan-inference/llm/README.md @@ -0,0 +1,8 @@ +# Inference - LLM + +```bash +docker network create traefik_public +cp .env.example .env +# -> Update MODEL_URL in `.env` file +docker compose up -d --scale llm=2 +`````` diff --git a/jan-inference/llm/docker-compose.yml b/jan-inference/llm/docker-compose.yml new file mode 100644 index 000000000..50441dee3 --- /dev/null +++ b/jan-inference/llm/docker-compose.yml @@ -0,0 +1,94 @@ +version: '3' + +services: + + # Service to download a model file. + downloader: + image: busybox + # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist. + command: /bin/sh -c "MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$MODEL_FILE ]; then wget -O /models/$MODEL_FILE ${MODEL_URL}; fi" + # Mount a local directory to store the downloaded model. + volumes: + - ./models:/models + + # Service to wait for the downloader service to finish downloading the model. + wait-for-downloader: + image: busybox + # The command waits until the model file (specified in MODEL_URL) exists. + command: /bin/sh -c "MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'" + # Specifies that this service should start after the downloader service has started. + depends_on: + downloader: + condition: service_started + # Mount the same local directory to check for the downloaded model. + volumes: + - ./models:/models + + # Service to run the Llama web application. + llm: + image: ghcr.io/abetlen/llama-cpp-python:latest + # Mount the directory that contains the downloaded model. + volumes: + - ./models:/models + environment: + # Specify the path to the model for the web application. + MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin + labels: + # Instead of using the Host rule, set a PathPrefix rule + - "traefik.http.routers.web.rule=PathPrefix(`/inference/llm`)" + # This tells Traefik where to forward the traffic for this route. + - "traefik.http.routers.web.service=llm" + # Define a service for the llm and specify its load balancer configuration + - "traefik.http.services.llm-service.loadbalancer.server.port=8000" + + - "traefik.http.middlewares.strip-llm-prefix.stripprefix.prefixes=/inference/llm" + - "traefik.http.routers.web.middlewares=strip-llm-prefix" + # Health check configuration + healthcheck: + test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + # Restart policy configuration + restart: on-failure + # Specifies that this service should start only after wait-for-downloader has completed successfully. + depends_on: + wait-for-downloader: + condition: service_completed_successfully + # Connect this service to two networks: inference_net and traefik_public. + networks: + - inference_net + - traefik_public + + # Service for Traefik, a modern HTTP reverse proxy and load balancer. + traefik: + image: traefik:v2.5 + command: + # Enable the Traefik API dashboard without TLS (not recommended for production). + - "--api.insecure=true" + # Enable Traefik to use Docker as a provider. + - "--providers.docker=true" + # Do not expose services by default. Explicitly specify in each service if it should be exposed. + - "--providers.docker.exposedbydefault=false" + # Specify the default entry point on port 80. + - "--entrypoints.web.address=:80" + ports: + # Map port 80 in the container to port 80 on the host. + - "80:80" + # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host. + - "8080:8080" + # Mount the Docker socket to allow Traefik to listen to Docker's API. + volumes: + - /var/run/docker.sock:/var/run/docker.sock + # Connect this service to the traefik_public network. + networks: + - traefik_public + +# Define networks used in this docker-compose file. +networks: + # Network for the llm service (used for inference). + inference_net: + # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. + traefik_public: + external: true diff --git a/jan-inference/llm/models/.gitkeep b/jan-inference/llm/models/.gitkeep new file mode 100644 index 000000000..e69de29bb