diff --git a/jan-inference/llm/README.md b/jan-inference/llm/README.md deleted file mode 100644 index 3183a8fb6..000000000 --- a/jan-inference/llm/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Inference - LLM - -```bash -docker network create traefik_public -cp .env.example .env -# -> Update MODEL_URL in `.env` file -docker compose up -d --scale llm=2 -`````` diff --git a/jan-inference/llm/docker-compose.yml b/jan-inference/llm/docker-compose.yml deleted file mode 100644 index 0c15d3659..000000000 --- a/jan-inference/llm/docker-compose.yml +++ /dev/null @@ -1,60 +0,0 @@ -version: '3' - -services: - - # Service to download a model file. - downloader: - image: busybox - # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist. - command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$LLM_MODEL_FILE ]; then wget -O /models/$LLM_MODEL_FILE ${MODEL_URL}; fi" - # Mount a local directory to store the downloaded model. - volumes: - - ./models:/models - - # Service to wait for the downloader service to finish downloading the model. - wait-for-downloader: - image: busybox - # The command waits until the model file (specified in MODEL_URL) exists. - command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$LLM_MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'" - # Specifies that this service should start after the downloader service has started. - depends_on: - downloader: - condition: service_started - # Mount the same local directory to check for the downloaded model. - volumes: - - ./models:/models - - # Service to run the Llama web application. - llm: - image: ghcr.io/abetlen/llama-cpp-python:latest - # Mount the directory that contains the downloaded model. - volumes: - - ./models:/models - ports: - - 8000:8000 - environment: - # Specify the path to the model for the web application. - MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin - PYTHONUNBUFFERED: 1 - # Health check configuration - # healthcheck: - # test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] - # interval: 30s - # timeout: 10s - # retries: 3 - # start_period: 30s - # Restart policy configuration - restart: on-failure - # Specifies that this service should start only after wait-for-downloader has completed successfully. - depends_on: - wait-for-downloader: - condition: service_completed_successfully - # Connect this service to two networks: inference_net and traefik_public. - networks: - - traefik_public - -# Define networks used in this docker-compose file. -networks: - # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. - traefik_public: - external: true diff --git a/jan-inference/sd/docker-compose.yml b/jan-inference/sd/docker-compose.yml deleted file mode 100644 index fd1a73a8e..000000000 --- a/jan-inference/sd/docker-compose.yml +++ /dev/null @@ -1,71 +0,0 @@ -version: '3' - -services: - - # Service to download a model file. - downloader: - build: - context: ./ - dockerfile: compile.Dockerfile - # platform: "linux/amd64" - # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist. - command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /converted_models/*.bin ]; then wget -O /converted_models/$SD_MODEL_FILE ${MODEL_URL}; python /sd.cpp/models/convert.py --out_type q4_0 --out_file /converted_models/$SD_MODEL_FILE; fi" - # Mount a local directory to store the downloaded model. - volumes: - - ./models:/converted_models - - # Service to wait for the downloader service to finish downloading the model. - wait-for-downloader: - image: busybox - # The command waits until the model file (specified in MODEL_URL) exists. - command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/*.bin ]; do sleep 1; done; echo 'Model downloaded and converted!'" - # Specifies that this service should start after the downloader service has started. - depends_on: - downloader: - condition: service_started - # Mount the same local directory to check for the downloaded model. - volumes: - - ./models:/models - - # Service to run the SD web application. - sd: - build: - context: ./ - dockerfile: inference.Dockerfile - # Mount the directory that contains the downloaded model. - volumes: - - ./models:/models - - ./output/:/serving/output - command: /bin/bash -c "python -m uvicorn main:app --proxy-headers --host 0.0.0.0 --port 8000" - # platform: "linux/amd64" - environment: - # Specify the path to the model for the web application. - BASE_URL: http://0.0.0.0:8000 - MODEL_NAME: "v1-5-pruned-emaonly-ggml-model-q5_0.bin" - MODEL_DIR: "/models" - SD_PATH: "/sd" - PYTHONUNBUFFERED: 1 - ports: - - 8000:8000 - # Health check configuration - # healthcheck: - # test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] - # interval: 30s - # timeout: 10s - # retries: 3 - # start_period: 30s - # Restart policy configuration - restart: on-failure - # Specifies that this service should start only after wait-for-downloader has completed successfully. - depends_on: - wait-for-downloader: - condition: service_completed_successfully - # Connect this service to two networks: inference_net and traefik_public. - networks: - - traefik_public - -# Define networks used in this docker-compose file. -networks: - # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. - traefik_public: - external: true