feat(inference): Add SD CPP module

2023-08-25 01:31:07 +07:00 · 2023-08-25 01:31:07 +07:00 · 1146c29a31
commit 1146c29a31
parent b891aa413e
14 changed files with 351 additions and 2 deletions
--- a/.env.sample
+++ b/.env.sample
@ -6,3 +6,12 @@ POSTGRES_PORT=your_db_port
 KC_DB_SCHEMA=public
 KEYCLOAK_ADMIN=your_keycloak_admin_username
 KEYCLOAK_ADMIN_PASSWORD=your_keycloak_admin_password
 # Inference
 ## LLM
 MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin
 LLM_MODEL_FILE=$(basename $MODEL_URL)
 ## SD
 MODEL_URL=https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors
 SD_MODEL_FILE=$(basename $MODEL_URL)
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,11 @@
 .vscode
 .env
 # Jan inference
 jan-inference/llm/models/**
 jan-inference/llm/.env
 jan-inference/sd/models/**
 jan-inference/sd/output/**
 jan-inference/sd/.env
 jan-inference/sd/sd
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
 [submodule "jan-inference/sd/sd_cpp"]
 	path = jan-inference/sd/sd_cpp
 	url = https://github.com/leejet/stable-diffusion.cpp
--- a/jan-inference/llm/README.md
+++ b/jan-inference/llm/README.md
@ -0,0 +1,8 @@
 # Inference - LLM
 ```bash
 docker network create traefik_public
 cp .env.example .env
 # -> Update MODEL_URL in `.env` file
 docker compose up -d --scale llm=2
 ``````
--- a/jan-inference/llm/docker-compose.yml
+++ b/jan-inference/llm/docker-compose.yml
@ -0,0 +1,94 @@
 version: '3'
 services:
  # Service to download a model file.
  downloader:
    image: busybox
    # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist.
    command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$LLM_MODEL_FILE ]; then wget -O /models/$LLM_MODEL_FILE ${MODEL_URL}; fi"
    # Mount a local directory to store the downloaded model.
    volumes:
      - ./models:/models
  # Service to wait for the downloader service to finish downloading the model.
  wait-for-downloader:
    image: busybox
    # The command waits until the model file (specified in MODEL_URL) exists.
    command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$LLM_MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'"
    # Specifies that this service should start after the downloader service has started.
    depends_on:
      downloader:
        condition: service_started
    # Mount the same local directory to check for the downloaded model.
    volumes:
      - ./models:/models
  # Service to run the Llama web application.
  llm:
    image: ghcr.io/abetlen/llama-cpp-python:latest
    # Mount the directory that contains the downloaded model.
    volumes:
      - ./models:/models
    environment:
      # Specify the path to the model for the web application.
      MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin
    labels:
      # Instead of using the Host rule, set a PathPrefix rule
      - "traefik.http.routers.web.rule=PathPrefix(`/inference/llm`)"
      # This tells Traefik where to forward the traffic for this route.
      - "traefik.http.routers.web.service=llm"
    # Define a service for the llm and specify its load balancer configuration
      - "traefik.http.services.llm-service.loadbalancer.server.port=8000"
      - "traefik.http.middlewares.strip-llm-prefix.stripprefix.prefixes=/inference/llm"
      - "traefik.http.routers.web.middlewares=strip-llm-prefix"
    # Health check configuration
    healthcheck:
      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 30s
    # Restart policy configuration
    restart: on-failure
    # Specifies that this service should start only after wait-for-downloader has completed successfully.
    depends_on:
      wait-for-downloader:
        condition: service_completed_successfully
    # Connect this service to two networks: inference_net and traefik_public.
    networks:
      - inference_net
      - traefik_public
  # Service for Traefik, a modern HTTP reverse proxy and load balancer.
  traefik:
    image: traefik:v2.5
    command:
      # Enable the Traefik API dashboard without TLS (not recommended for production).
      - "--api.insecure=true"
      # Enable Traefik to use Docker as a provider.
      - "--providers.docker=true"
      # Do not expose services by default. Explicitly specify in each service if it should be exposed.
      - "--providers.docker.exposedbydefault=false"
      # Specify the default entry point on port 80.
      - "--entrypoints.web.address=:80"
    ports:
      # Map port 80 in the container to port 80 on the host.
      - "80:80"
      # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host.
      - "8080:8080"
    # Mount the Docker socket to allow Traefik to listen to Docker's API.
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
    # Connect this service to the traefik_public network.
    networks:
      - traefik_public
 # Define networks used in this docker-compose file.
 networks:
  # Network for the llm service (used for inference).
  inference_net:
  # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
  traefik_public:
    external: true
--- a/jan-inference/llm/models/.gitkeep
+++ b/jan-inference/llm/models/.gitkeep
--- a/jan-inference/sd/.dockerignore
+++ b/jan-inference/sd/.dockerignore
@ -0,0 +1,7 @@
 output/
 models/
 sd_cpp/.git
 sd_cpp/.github
 sd
--- a/jan-inference/sd/compile.Dockerfile
+++ b/jan-inference/sd/compile.Dockerfile
@ -0,0 +1,10 @@
 FROM python:3.9.17
 RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
 ENV PATH=/root/.cargo/bin:$PATH
 WORKDIR /sd.cpp
 COPY . .
 RUN pip install -r compile.requirements.txt
--- a/jan-inference/sd/compile.requirements.txt
+++ b/jan-inference/sd/compile.requirements.txt
@ -0,0 +1,17 @@
 # Compiling
 -r sd_cpp/models/requirements.txt
 # diffusers
 # torch
 # ftfy
 # scipy
 # transformers
 # accelerate
 # huggingface-hub
 # xformers
 # omegaconf
 # safetensors
 # cog
 # tomesd
 # compel
--- a/jan-inference/sd/docker-compose.yml
+++ b/jan-inference/sd/docker-compose.yml
@ -0,0 +1,98 @@
 version: '3'
 services:
  # Service to download a model file.
  downloader:
    build:
      context: ./
      dockerfile: compile.Dockerfile
    # platform: "linux/amd64"
    # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist.
    command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /converted_models/*.bin ]; then wget -O /converted_models/$SD_MODEL_FILE ${MODEL_URL}; python /sd.cpp/models/convert.py --out_type q4_0 --out_file /converted_models/$SD_MODEL_FILE; fi"
    # Mount a local directory to store the downloaded model.
    volumes:
      - ./models:/converted_models
  # Service to wait for the downloader service to finish downloading the model.
  wait-for-downloader:
    image: busybox
    # The command waits until the model file (specified in MODEL_URL) exists.
    command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/*.bin ]; do sleep 1; done; echo 'Model downloaded and converted!'"
    # Specifies that this service should start after the downloader service has started.
    depends_on:
      downloader:
        condition: service_started
    # Mount the same local directory to check for the downloaded model.
    volumes:
      - ./models:/models
  # Service to run the SD web application.
  sd:
    build:
      context: ./
      dockerfile: inference.Dockerfile
    # Mount the directory that contains the downloaded model.
    volumes:
      - ./models:/models
      - ./output/:/serving/output
    command:  /bin/bash -c "python -m uvicorn main:app --proxy-headers --host 0.0.0.0 --port 8000"
    # platform: "linux/amd64"
    environment:
      # Specify the path to the model for the web application.
      BASE_URL: http://0.0.0.0:8000
      MODEL_NAME: "v1-5-pruned-emaonly-ggml-model-q5_0.bin"
      MODEL_DIR: "/models"
      SD_PATH: "/sd"
      PYTHONUNBUFFERED: 1
    ports:
      - 8000:8000
    # Health check configuration
    healthcheck:
      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 30s
    # Restart policy configuration
    restart: on-failure
    # Specifies that this service should start only after wait-for-downloader has completed successfully.
    depends_on:
      wait-for-downloader:
        condition: service_completed_successfully
    # Connect this service to two networks: inference_net and traefik_public.
    networks:
      - inference_net
      - traefik_public
  # Service for Traefik, a modern HTTP reverse proxy and load balancer.
  traefik:
    image: traefik:v2.5
    command:
      # Enable the Traefik API dashboard without TLS (not recommended for production).
      - "--api.insecure=true"
      # Enable Traefik to use Docker as a provider.
      - "--providers.docker=true"
      # Do not expose services by default. Explicitly specify in each service if it should be exposed.
      - "--providers.docker.exposedbydefault=false"
      # Specify the default entry point on port 80.
      - "--entrypoints.web.address=:80"
    ports:
      # Map port 80 in the container to port 80 on the host.
      - "80:80"
      # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host.
      - "8080:8080"
    # Mount the Docker socket to allow Traefik to listen to Docker's API.
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
    # Connect this service to the traefik_public network.
    networks:
      - traefik_public
 # Define networks used in this docker-compose file.
 networks:
  # Network for the llm service (used for inference).
  inference_net:
  # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
  traefik_public:
    external: true
--- a/jan-inference/sd/inference.Dockerfile
+++ b/jan-inference/sd/inference.Dockerfile
@ -0,0 +1,19 @@
 FROM python:3.9.17 as build
 RUN apt-get update && apt-get install -y build-essential git cmake
 WORKDIR /sd.cpp
 COPY sd_cpp /sd.cpp
 RUN mkdir build && cd build && cmake .. && cmake --build . --config Release
 FROM python:3.9.17 as runtime
 COPY --from=build /sd.cpp/build/bin/sd /sd
 WORKDIR /serving
 COPY . /serving/
 RUN pip install -r inference.requirements.txt
--- a/jan-inference/sd/inference.requirements.txt
+++ b/jan-inference/sd/inference.requirements.txt
@ -0,0 +1,4 @@
 # Inference
 fastapi
 uvicorn
 python-multipart
--- a/jan-inference/sd/main.py
+++ b/jan-inference/sd/main.py
@ -0,0 +1,70 @@
 from fastapi import FastAPI, BackgroundTasks, HTTPException, Form
 from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
 import subprocess
 import os
 from uuid import uuid4
 app = FastAPI()
 OUTPUT_DIR = "output"
 SD_PATH = os.environ.get("SD_PATH", "./sd")
 MODEL_DIR = os.environ.get("MODEL_DIR", "./models")
 BASE_URL = os.environ.get("BASE_URL", "http://localhost:8000")
 MODEL_NAME = os.environ.get(
    "MODEL_NAME", "v1-5-pruned-emaonly-ggml-model-q5_0.bin")
 # Create the OUTPUT_DIR directory if it does not exist
 if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
 # Create the OUTPUT_DIR directory if it does not exist
 if not os.path.exists(MODEL_DIR):
    os.makedirs(MODEL_DIR)
 # Serve files from the "files" directory
 app.mount("/output", StaticFiles(directory=OUTPUT_DIR), name="output")
 def run_command(prompt: str, filename: str):
    # Construct the command based on your provided example
    command = [SD_PATH,
               "-m", os.path.join(MODEL_DIR, MODEL_NAME),
               "-p", prompt,
               "-o", os.path.join(OUTPUT_DIR, filename)
               ]
    try:
        sub_output = subprocess.run(command, timeout=5*60, capture_output=True,
                                    check=True, encoding="utf-8")
        print(sub_output.stdout)
    except subprocess.CalledProcessError:
        raise HTTPException(
            status_code=500, detail="Failed to execute the command.")
@app.post("/inference/")
 async def run_inference(background_tasks: BackgroundTasks, prompt: str = Form()):
    # Generate a unique filename using uuid4()
    filename = f"{uuid4()}.png"
    # We will use background task to run the command so it won't block
    background_tasks.add_task(run_command, prompt, filename)
    # Return the expected path of the output file
    return {"url": f'{BASE_URL}/serve/{filename}'}
@app.get("/serve/{filename}")
 async def serve_file(filename: str):
    file_path = os.path.join(OUTPUT_DIR, filename)
    if os.path.exists(file_path):
        return FileResponse(file_path)
    else:
        raise HTTPException(status_code=404, detail="File not found")
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/jan-inference/sd/sd_cpp
+++ b/jan-inference/sd/sd_cpp
@ -0,0 +1 @@
 Subproject commit 0d7f04b135cd48e8d62aecd09a52eb2afa482744
		`@ -0,0 +1 @@`
							`Subproject commit 0d7f04b135cd48e8d62aecd09a52eb2afa482744`