jan/jan-inference/llm/docker-compose.yml

78 lines
2.9 KiB
YAML

version: '3'
services:
# Service to download a model file.
downloader:
image: busybox
# The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist.
command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$LLM_MODEL_FILE ]; then wget -O /models/$LLM_MODEL_FILE ${MODEL_URL}; fi"
# Mount a local directory to store the downloaded model.
volumes:
- ./models:/models
# Service to wait for the downloader service to finish downloading the model.
wait-for-downloader:
image: busybox
# The command waits until the model file (specified in MODEL_URL) exists.
command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$LLM_MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'"
# Specifies that this service should start after the downloader service has started.
depends_on:
downloader:
condition: service_started
# Mount the same local directory to check for the downloaded model.
volumes:
- ./models:/models
# Service to run the Llama web application.
llm:
image: ghcr.io/abetlen/llama-cpp-python:latest
# Mount the directory that contains the downloaded model.
volumes:
- ./models:/models
environment:
# Specify the path to the model for the web application.
MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin
# Health check configuration
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
# Restart policy configuration
restart: on-failure
# Specifies that this service should start only after wait-for-downloader has completed successfully.
depends_on:
wait-for-downloader:
condition: service_completed_successfully
# Connect this service to two networks: inference_net and traefik_public.
networks:
- inference_net
- traefik_public
# Service for Traefik, a modern HTTP reverse proxy and load balancer.
traefik:
image: traefik:v2.10
ports:
# Map port 80 in the container to port 80 on the host.
- "80:80"
# Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host.
- "8080:8080"
# Mount the Docker socket to allow Traefik to listen to Docker's API.
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- ./traefik/traefik.yml:/traefik.yml:ro
- ./traefik/config.yml:/config.yml:ro
# Connect this service to the traefik_public network.
networks:
- traefik_public
# Define networks used in this docker-compose file.
networks:
# Network for the llm service (used for inference).
inference_net:
# Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
traefik_public:
external: true