version: '3' services: # Service to download a model file. downloader: image: busybox # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist. command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$LLM_MODEL_FILE ]; then wget -O /models/$LLM_MODEL_FILE ${MODEL_URL}; fi" # Mount a local directory to store the downloaded model. volumes: - ./models:/models # Service to wait for the downloader service to finish downloading the model. wait-for-downloader: image: busybox # The command waits until the model file (specified in MODEL_URL) exists. command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$LLM_MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'" # Specifies that this service should start after the downloader service has started. depends_on: downloader: condition: service_started # Mount the same local directory to check for the downloaded model. volumes: - ./models:/models # Service to run the Llama web application. llm: image: ghcr.io/abetlen/llama-cpp-python:latest # Mount the directory that contains the downloaded model. volumes: - ./models:/models environment: # Specify the path to the model for the web application. MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin labels: # Instead of using the Host rule, set a PathPrefix rule - "traefik.http.routers.web.rule=PathPrefix(`/inference/llm`)" # This tells Traefik where to forward the traffic for this route. - "traefik.http.routers.web.service=llm" # Define a service for the llm and specify its load balancer configuration - "traefik.http.services.llm-service.loadbalancer.server.port=8000" - "traefik.http.middlewares.strip-llm-prefix.stripprefix.prefixes=/inference/llm" - "traefik.http.routers.web.middlewares=strip-llm-prefix" # Health check configuration healthcheck: test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] interval: 30s timeout: 10s retries: 3 start_period: 30s # Restart policy configuration restart: on-failure # Specifies that this service should start only after wait-for-downloader has completed successfully. depends_on: wait-for-downloader: condition: service_completed_successfully # Connect this service to two networks: inference_net and traefik_public. networks: - inference_net - traefik_public # Service for Traefik, a modern HTTP reverse proxy and load balancer. traefik: image: traefik:v2.5 command: # Enable the Traefik API dashboard without TLS (not recommended for production). - "--api.insecure=true" # Enable Traefik to use Docker as a provider. - "--providers.docker=true" # Do not expose services by default. Explicitly specify in each service if it should be exposed. - "--providers.docker.exposedbydefault=false" # Specify the default entry point on port 80. - "--entrypoints.web.address=:80" ports: # Map port 80 in the container to port 80 on the host. - "80:80" # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host. - "8080:8080" # Mount the Docker socket to allow Traefik to listen to Docker's API. volumes: - /var/run/docker.sock:/var/run/docker.sock # Connect this service to the traefik_public network. networks: - traefik_public # Define networks used in this docker-compose file. networks: # Network for the llm service (used for inference). inference_net: # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. traefik_public: external: true