diff --git a/README.md b/README.md index 61af029c3..f1d7858fd 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,15 @@ Jan is a free, source-available and [fair code licensed](https://faircode.io/) A Jan offers an [Docker Compose](https://docs.docker.com/compose/) deployment that automates the setup process. +```bash +# Download models +# Runway SD 1.5 +wget https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors -P jan-inference/sd/models + +# Download LLM +wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -P jan-inference/llm/models +`````` + Run the following command to start all the services defined in the `docker-compose.yml` ```shell @@ -102,14 +111,4 @@ Jan is a monorepo that pulls in the following submodules ## Live Demo -You can access the live demo at https://cloud.jan.ai. - -## Common Issues and Troubleshooting - -**Error in `jan-inference` service** ![](images/download-model-error.png) - -- Error: download model incomplete -- Solution: - - Manually download the LLM model using the URL specified in the environment variable `MODEL_URL` within the `.env` file. The URL is typically https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin - - Copy the downloaded file `llama-2-7b-chat.ggmlv3.q4_1.bin` to the folder `jan-inference/llm/models` - - Run `docker compose down` followed by `docker compose up -d` again to restart the services. +You can access the live demo at https://cloud.jan.ai. \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 44db975fa..5b456f33e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -125,7 +125,6 @@ services: timeout: 10s retries: 5 start_period: 5s - networks: jan_community: ipv4_address: 172.20.0.14 @@ -152,39 +151,9 @@ services: jan_community: ipv4_address: 172.20.0.15 - # Service to download a model file. - downloader: - image: busybox - # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist. - command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$LLM_MODEL_FILE ]; then wget -O /models/$LLM_MODEL_FILE ${MODEL_URL}; fi" - # Mount a local directory to store the downloaded model. - volumes: - - ./jan-inference/llm/models:/models - - networks: - jan_community: - ipv4_address: 172.20.0.16 - - # Service to wait for the downloader service to finish downloading the model. - wait-for-downloader: - image: busybox - # The command waits until the model file (specified in MODEL_URL) exists. - command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$LLM_MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'" - # Specifies that this service should start after the downloader service has started. - depends_on: - downloader: - condition: service_started - # Mount the same local directory to check for the downloaded model. - volumes: - - ./jan-inference/llm/models:/models - - networks: - jan_community: - ipv4_address: 172.20.0.17 - # Service to run the Llama web application. llm: - image: ghcr.io/abetlen/llama-cpp-python:latest + image: ghcr.io/abetlen/llama-cpp-python@sha256:b6d21ff8c4d9baad65e1fa741a0f8c898d68735fff3f3cd777e3f0c6a1839dd4 # Mount the directory that contains the downloaded model. volumes: - ./jan-inference/llm/models:/models @@ -192,20 +161,74 @@ services: - 8000:8000 environment: # Specify the path to the model for the web application. - MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin + MODEL: /models/${LLM_MODEL_FILE} PYTHONUNBUFFERED: 1 # Restart policy configuration restart: on-failure # Specifies that this service should start only after wait-for-downloader has completed successfully. - depends_on: - wait-for-downloader: - condition: service_completed_successfully - # Connect this service to two networks: inference_net and traefik_public. - networks: jan_community: ipv4_address: 172.20.0.18 + sd-downloader: + build: + context: ./jan-inference/sd/ + dockerfile: compile.Dockerfile + # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist. + command: /bin/sh -c "if [ ! -f /models/*.bin ]; then python /sd.cpp/sd_cpp/models/convert.py --out_type q4_0 --out_file /models/${SD_MODEL_FILE}.q4_0.bin /models/${SD_MODEL_FILE}; fi" + # Mount a local directory to store the downloaded model. + volumes: + - ./jan-inference/sd/models:/models + networks: + jan_community: + ipv4_address: 172.20.0.19 + + # Service to run the SD web application. + sd: + build: + context: ./jan-inference/sd/ + dockerfile: inference.Dockerfile + # Mount the directory that contains the downloaded model. + volumes: + - ./jan-inference/sd/models:/models + - ./jan-inference/sd/output/:/serving/output + command: /bin/bash -c "python -m uvicorn main:app --proxy-headers --host 0.0.0.0 --port 8000" + environment: + # Specify the path to the model for the web application. + BASE_URL: http://0.0.0.0:8000 + MODEL_NAME: ${SD_MODEL_FILE}.q4_0.bin + MODEL_DIR: "/models" + SD_PATH: "/sd" + PYTHONUNBUFFERED: 1 + ports: + - 8001:8000 + # Restart policy configuration + restart: on-failure + # Specifies that this service should start only after wait-for-downloader has completed successfully. + depends_on: + sd-downloader: + condition: service_completed_successfully + networks: + jan_community: + ipv4_address: 172.20.0.21 + + # Service for Traefik, a modern HTTP reverse proxy and load balancer. + # traefik: + # image: traefik:v2.10 + # ports: + # # Map port 80 in the container to port 80 on the host. + # - "80:80" + # # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host. + # - "8080:8080" + # # Mount the Docker socket to allow Traefik to listen to Docker's API. + # volumes: + # - /var/run/docker.sock:/var/run/docker.sock:ro + # - ./traefik/traefik.yml:/traefik.yml:ro + # - ./traefik/config.yml:/config.yml:ro + # networks: + # jan_community: + # ipv4_address: 172.20.0.22 + networks: jan_community: driver: bridge diff --git a/jan-inference/docker-compose.yml b/jan-inference/docker-compose.yml deleted file mode 100644 index 7672d2a76..000000000 --- a/jan-inference/docker-compose.yml +++ /dev/null @@ -1,25 +0,0 @@ -version: '3' - -services: - # Service for Traefik, a modern HTTP reverse proxy and load balancer. - traefik: - image: traefik:v2.10 - ports: - # Map port 80 in the container to port 80 on the host. - - "80:80" - # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host. - - "8080:8080" - # Mount the Docker socket to allow Traefik to listen to Docker's API. - volumes: - - /var/run/docker.sock:/var/run/docker.sock:ro - - ./traefik/traefik.yml:/traefik.yml:ro - - ./traefik/config.yml:/config.yml:ro - # Connect this service to the traefik_public network. - networks: - - traefik_public - -# Define networks used in this docker-compose file. -networks: - # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. - traefik_public: - external: true diff --git a/jan-inference/llm/README.md b/jan-inference/llm/README.md deleted file mode 100644 index 3183a8fb6..000000000 --- a/jan-inference/llm/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Inference - LLM - -```bash -docker network create traefik_public -cp .env.example .env -# -> Update MODEL_URL in `.env` file -docker compose up -d --scale llm=2 -`````` diff --git a/jan-inference/llm/docker-compose.yml b/jan-inference/llm/docker-compose.yml deleted file mode 100644 index 0c15d3659..000000000 --- a/jan-inference/llm/docker-compose.yml +++ /dev/null @@ -1,60 +0,0 @@ -version: '3' - -services: - - # Service to download a model file. - downloader: - image: busybox - # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist. - command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$LLM_MODEL_FILE ]; then wget -O /models/$LLM_MODEL_FILE ${MODEL_URL}; fi" - # Mount a local directory to store the downloaded model. - volumes: - - ./models:/models - - # Service to wait for the downloader service to finish downloading the model. - wait-for-downloader: - image: busybox - # The command waits until the model file (specified in MODEL_URL) exists. - command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$LLM_MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'" - # Specifies that this service should start after the downloader service has started. - depends_on: - downloader: - condition: service_started - # Mount the same local directory to check for the downloaded model. - volumes: - - ./models:/models - - # Service to run the Llama web application. - llm: - image: ghcr.io/abetlen/llama-cpp-python:latest - # Mount the directory that contains the downloaded model. - volumes: - - ./models:/models - ports: - - 8000:8000 - environment: - # Specify the path to the model for the web application. - MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin - PYTHONUNBUFFERED: 1 - # Health check configuration - # healthcheck: - # test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] - # interval: 30s - # timeout: 10s - # retries: 3 - # start_period: 30s - # Restart policy configuration - restart: on-failure - # Specifies that this service should start only after wait-for-downloader has completed successfully. - depends_on: - wait-for-downloader: - condition: service_completed_successfully - # Connect this service to two networks: inference_net and traefik_public. - networks: - - traefik_public - -# Define networks used in this docker-compose file. -networks: - # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. - traefik_public: - external: true diff --git a/jan-inference/sd/docker-compose.yml b/jan-inference/sd/docker-compose.yml deleted file mode 100644 index fd1a73a8e..000000000 --- a/jan-inference/sd/docker-compose.yml +++ /dev/null @@ -1,71 +0,0 @@ -version: '3' - -services: - - # Service to download a model file. - downloader: - build: - context: ./ - dockerfile: compile.Dockerfile - # platform: "linux/amd64" - # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist. - command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /converted_models/*.bin ]; then wget -O /converted_models/$SD_MODEL_FILE ${MODEL_URL}; python /sd.cpp/models/convert.py --out_type q4_0 --out_file /converted_models/$SD_MODEL_FILE; fi" - # Mount a local directory to store the downloaded model. - volumes: - - ./models:/converted_models - - # Service to wait for the downloader service to finish downloading the model. - wait-for-downloader: - image: busybox - # The command waits until the model file (specified in MODEL_URL) exists. - command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/*.bin ]; do sleep 1; done; echo 'Model downloaded and converted!'" - # Specifies that this service should start after the downloader service has started. - depends_on: - downloader: - condition: service_started - # Mount the same local directory to check for the downloaded model. - volumes: - - ./models:/models - - # Service to run the SD web application. - sd: - build: - context: ./ - dockerfile: inference.Dockerfile - # Mount the directory that contains the downloaded model. - volumes: - - ./models:/models - - ./output/:/serving/output - command: /bin/bash -c "python -m uvicorn main:app --proxy-headers --host 0.0.0.0 --port 8000" - # platform: "linux/amd64" - environment: - # Specify the path to the model for the web application. - BASE_URL: http://0.0.0.0:8000 - MODEL_NAME: "v1-5-pruned-emaonly-ggml-model-q5_0.bin" - MODEL_DIR: "/models" - SD_PATH: "/sd" - PYTHONUNBUFFERED: 1 - ports: - - 8000:8000 - # Health check configuration - # healthcheck: - # test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] - # interval: 30s - # timeout: 10s - # retries: 3 - # start_period: 30s - # Restart policy configuration - restart: on-failure - # Specifies that this service should start only after wait-for-downloader has completed successfully. - depends_on: - wait-for-downloader: - condition: service_completed_successfully - # Connect this service to two networks: inference_net and traefik_public. - networks: - - traefik_public - -# Define networks used in this docker-compose file. -networks: - # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. - traefik_public: - external: true diff --git a/jan-inference/sd/main.py b/jan-inference/sd/main.py index f31380dd3..bf77767f2 100644 --- a/jan-inference/sd/main.py +++ b/jan-inference/sd/main.py @@ -4,16 +4,26 @@ from fastapi.staticfiles import StaticFiles import subprocess import os from uuid import uuid4 +from pydantic import BaseModel app = FastAPI() OUTPUT_DIR = "output" SD_PATH = os.environ.get("SD_PATH", "./sd") MODEL_DIR = os.environ.get("MODEL_DIR", "./models") -BASE_URL = os.environ.get("BASE_URL", "http://localhost:8000") MODEL_NAME = os.environ.get( "MODEL_NAME", "v1-5-pruned-emaonly-ggml-model-q5_0.bin") + +class Payload(BaseModel): + prompt: str + neg_prompt: str + seed: int + steps: int + width: int + height: int + + # Create the OUTPUT_DIR directory if it does not exist if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) @@ -26,33 +36,37 @@ if not os.path.exists(MODEL_DIR): app.mount("/output", StaticFiles(directory=OUTPUT_DIR), name="output") -def run_command(prompt: str, filename: str): +def run_command(payload: Payload, filename: str): # Construct the command based on your provided example command = [SD_PATH, - "-m", os.path.join(MODEL_DIR, MODEL_NAME), - "-p", prompt, - "-o", os.path.join(OUTPUT_DIR, filename) + "--model", f'{os.path.join(MODEL_DIR, MODEL_NAME)}', + "--prompt", f'"{payload.prompt}"', + "--negative-prompt", f'"{payload.neg_prompt}"', + "--height", str(payload.height), + "--width", str(payload.width), + "--steps", str(payload.steps), + "--seed", str(payload.seed), + "--mode", 'txt2img', + "-o", f'{os.path.join(OUTPUT_DIR, filename)}', ] try: - sub_output = subprocess.run(command, timeout=5*60, capture_output=True, - check=True, encoding="utf-8") - print(sub_output.stdout) + subprocess.run(command, timeout=5*60) except subprocess.CalledProcessError: raise HTTPException( status_code=500, detail="Failed to execute the command.") -@app.post("/inference/") -async def run_inference(background_tasks: BackgroundTasks, prompt: str = Form()): +@app.post("/inferences/txt2img") +async def run_inference(background_tasks: BackgroundTasks, payload: Payload): # Generate a unique filename using uuid4() filename = f"{uuid4()}.png" # We will use background task to run the command so it won't block - background_tasks.add_task(run_command, prompt, filename) + background_tasks.add_task(run_command, payload, filename) # Return the expected path of the output file - return {"url": f'{BASE_URL}/serve/{filename}'} + return {"url": f'/serve/{filename}'} @app.get("/serve/{filename}") diff --git a/sample.env b/sample.env index 2f2911923..507247efe 100644 --- a/sample.env +++ b/sample.env @@ -9,9 +9,9 @@ KEYCLOAK_ADMIN_PASSWORD=admin # Inference ## LLM -MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -LLM_MODEL_FILE=$(basename $MODEL_URL) +LLM_MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin +LLM_MODEL_FILE=llama-2-7b-chat.ggmlv3.q4_1.bin ## SD -MODEL_URL=https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors -SD_MODEL_FILE=$(basename $MODEL_URL) \ No newline at end of file +SD_MODEL_URL=https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors +SD_MODEL_FILE=v1-5-pruned-emaonly.safetensors diff --git a/jan-inference/traefik/config.yml b/traefik/config.yml similarity index 100% rename from jan-inference/traefik/config.yml rename to traefik/config.yml diff --git a/jan-inference/traefik/traefik.yml b/traefik/traefik.yml similarity index 100% rename from jan-inference/traefik/traefik.yml rename to traefik/traefik.yml