chore: Remove docker compose for inference
This commit is contained in:
parent
729ef8c687
commit
b89722e439
@ -1,8 +0,0 @@
|
|||||||
# Inference - LLM
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker network create traefik_public
|
|
||||||
cp .env.example .env
|
|
||||||
# -> Update MODEL_URL in `.env` file
|
|
||||||
docker compose up -d --scale llm=2
|
|
||||||
``````
|
|
||||||
@ -1,60 +0,0 @@
|
|||||||
version: '3'
|
|
||||||
|
|
||||||
services:
|
|
||||||
|
|
||||||
# Service to download a model file.
|
|
||||||
downloader:
|
|
||||||
image: busybox
|
|
||||||
# The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist.
|
|
||||||
command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$LLM_MODEL_FILE ]; then wget -O /models/$LLM_MODEL_FILE ${MODEL_URL}; fi"
|
|
||||||
# Mount a local directory to store the downloaded model.
|
|
||||||
volumes:
|
|
||||||
- ./models:/models
|
|
||||||
|
|
||||||
# Service to wait for the downloader service to finish downloading the model.
|
|
||||||
wait-for-downloader:
|
|
||||||
image: busybox
|
|
||||||
# The command waits until the model file (specified in MODEL_URL) exists.
|
|
||||||
command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$LLM_MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'"
|
|
||||||
# Specifies that this service should start after the downloader service has started.
|
|
||||||
depends_on:
|
|
||||||
downloader:
|
|
||||||
condition: service_started
|
|
||||||
# Mount the same local directory to check for the downloaded model.
|
|
||||||
volumes:
|
|
||||||
- ./models:/models
|
|
||||||
|
|
||||||
# Service to run the Llama web application.
|
|
||||||
llm:
|
|
||||||
image: ghcr.io/abetlen/llama-cpp-python:latest
|
|
||||||
# Mount the directory that contains the downloaded model.
|
|
||||||
volumes:
|
|
||||||
- ./models:/models
|
|
||||||
ports:
|
|
||||||
- 8000:8000
|
|
||||||
environment:
|
|
||||||
# Specify the path to the model for the web application.
|
|
||||||
MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin
|
|
||||||
PYTHONUNBUFFERED: 1
|
|
||||||
# Health check configuration
|
|
||||||
# healthcheck:
|
|
||||||
# test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
|
|
||||||
# interval: 30s
|
|
||||||
# timeout: 10s
|
|
||||||
# retries: 3
|
|
||||||
# start_period: 30s
|
|
||||||
# Restart policy configuration
|
|
||||||
restart: on-failure
|
|
||||||
# Specifies that this service should start only after wait-for-downloader has completed successfully.
|
|
||||||
depends_on:
|
|
||||||
wait-for-downloader:
|
|
||||||
condition: service_completed_successfully
|
|
||||||
# Connect this service to two networks: inference_net and traefik_public.
|
|
||||||
networks:
|
|
||||||
- traefik_public
|
|
||||||
|
|
||||||
# Define networks used in this docker-compose file.
|
|
||||||
networks:
|
|
||||||
# Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
|
|
||||||
traefik_public:
|
|
||||||
external: true
|
|
||||||
@ -1,71 +0,0 @@
|
|||||||
version: '3'
|
|
||||||
|
|
||||||
services:
|
|
||||||
|
|
||||||
# Service to download a model file.
|
|
||||||
downloader:
|
|
||||||
build:
|
|
||||||
context: ./
|
|
||||||
dockerfile: compile.Dockerfile
|
|
||||||
# platform: "linux/amd64"
|
|
||||||
# The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist.
|
|
||||||
command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /converted_models/*.bin ]; then wget -O /converted_models/$SD_MODEL_FILE ${MODEL_URL}; python /sd.cpp/models/convert.py --out_type q4_0 --out_file /converted_models/$SD_MODEL_FILE; fi"
|
|
||||||
# Mount a local directory to store the downloaded model.
|
|
||||||
volumes:
|
|
||||||
- ./models:/converted_models
|
|
||||||
|
|
||||||
# Service to wait for the downloader service to finish downloading the model.
|
|
||||||
wait-for-downloader:
|
|
||||||
image: busybox
|
|
||||||
# The command waits until the model file (specified in MODEL_URL) exists.
|
|
||||||
command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/*.bin ]; do sleep 1; done; echo 'Model downloaded and converted!'"
|
|
||||||
# Specifies that this service should start after the downloader service has started.
|
|
||||||
depends_on:
|
|
||||||
downloader:
|
|
||||||
condition: service_started
|
|
||||||
# Mount the same local directory to check for the downloaded model.
|
|
||||||
volumes:
|
|
||||||
- ./models:/models
|
|
||||||
|
|
||||||
# Service to run the SD web application.
|
|
||||||
sd:
|
|
||||||
build:
|
|
||||||
context: ./
|
|
||||||
dockerfile: inference.Dockerfile
|
|
||||||
# Mount the directory that contains the downloaded model.
|
|
||||||
volumes:
|
|
||||||
- ./models:/models
|
|
||||||
- ./output/:/serving/output
|
|
||||||
command: /bin/bash -c "python -m uvicorn main:app --proxy-headers --host 0.0.0.0 --port 8000"
|
|
||||||
# platform: "linux/amd64"
|
|
||||||
environment:
|
|
||||||
# Specify the path to the model for the web application.
|
|
||||||
BASE_URL: http://0.0.0.0:8000
|
|
||||||
MODEL_NAME: "v1-5-pruned-emaonly-ggml-model-q5_0.bin"
|
|
||||||
MODEL_DIR: "/models"
|
|
||||||
SD_PATH: "/sd"
|
|
||||||
PYTHONUNBUFFERED: 1
|
|
||||||
ports:
|
|
||||||
- 8000:8000
|
|
||||||
# Health check configuration
|
|
||||||
# healthcheck:
|
|
||||||
# test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
|
|
||||||
# interval: 30s
|
|
||||||
# timeout: 10s
|
|
||||||
# retries: 3
|
|
||||||
# start_period: 30s
|
|
||||||
# Restart policy configuration
|
|
||||||
restart: on-failure
|
|
||||||
# Specifies that this service should start only after wait-for-downloader has completed successfully.
|
|
||||||
depends_on:
|
|
||||||
wait-for-downloader:
|
|
||||||
condition: service_completed_successfully
|
|
||||||
# Connect this service to two networks: inference_net and traefik_public.
|
|
||||||
networks:
|
|
||||||
- traefik_public
|
|
||||||
|
|
||||||
# Define networks used in this docker-compose file.
|
|
||||||
networks:
|
|
||||||
# Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
|
|
||||||
traefik_public:
|
|
||||||
external: true
|
|
||||||
Loading…
x
Reference in New Issue
Block a user