feat(inference): Add SD CPP module
This commit is contained in:
parent
b891aa413e
commit
1146c29a31
@ -6,3 +6,12 @@ POSTGRES_PORT=your_db_port
|
|||||||
KC_DB_SCHEMA=public
|
KC_DB_SCHEMA=public
|
||||||
KEYCLOAK_ADMIN=your_keycloak_admin_username
|
KEYCLOAK_ADMIN=your_keycloak_admin_username
|
||||||
KEYCLOAK_ADMIN_PASSWORD=your_keycloak_admin_password
|
KEYCLOAK_ADMIN_PASSWORD=your_keycloak_admin_password
|
||||||
|
|
||||||
|
# Inference
|
||||||
|
## LLM
|
||||||
|
MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin
|
||||||
|
LLM_MODEL_FILE=$(basename $MODEL_URL)
|
||||||
|
|
||||||
|
## SD
|
||||||
|
MODEL_URL=https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors
|
||||||
|
SD_MODEL_FILE=$(basename $MODEL_URL)
|
||||||
9
.gitignore
vendored
9
.gitignore
vendored
@ -1,2 +1,11 @@
|
|||||||
.vscode
|
.vscode
|
||||||
.env
|
.env
|
||||||
|
|
||||||
|
# Jan inference
|
||||||
|
jan-inference/llm/models/**
|
||||||
|
jan-inference/llm/.env
|
||||||
|
|
||||||
|
jan-inference/sd/models/**
|
||||||
|
jan-inference/sd/output/**
|
||||||
|
jan-inference/sd/.env
|
||||||
|
jan-inference/sd/sd
|
||||||
|
|||||||
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
[submodule "jan-inference/sd/sd_cpp"]
|
||||||
|
path = jan-inference/sd/sd_cpp
|
||||||
|
url = https://github.com/leejet/stable-diffusion.cpp
|
||||||
8
jan-inference/llm/README.md
Normal file
8
jan-inference/llm/README.md
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# Inference - LLM
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker network create traefik_public
|
||||||
|
cp .env.example .env
|
||||||
|
# -> Update MODEL_URL in `.env` file
|
||||||
|
docker compose up -d --scale llm=2
|
||||||
|
``````
|
||||||
94
jan-inference/llm/docker-compose.yml
Normal file
94
jan-inference/llm/docker-compose.yml
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
version: '3'
|
||||||
|
|
||||||
|
services:
|
||||||
|
|
||||||
|
# Service to download a model file.
|
||||||
|
downloader:
|
||||||
|
image: busybox
|
||||||
|
# The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist.
|
||||||
|
command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$LLM_MODEL_FILE ]; then wget -O /models/$LLM_MODEL_FILE ${MODEL_URL}; fi"
|
||||||
|
# Mount a local directory to store the downloaded model.
|
||||||
|
volumes:
|
||||||
|
- ./models:/models
|
||||||
|
|
||||||
|
# Service to wait for the downloader service to finish downloading the model.
|
||||||
|
wait-for-downloader:
|
||||||
|
image: busybox
|
||||||
|
# The command waits until the model file (specified in MODEL_URL) exists.
|
||||||
|
command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$LLM_MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'"
|
||||||
|
# Specifies that this service should start after the downloader service has started.
|
||||||
|
depends_on:
|
||||||
|
downloader:
|
||||||
|
condition: service_started
|
||||||
|
# Mount the same local directory to check for the downloaded model.
|
||||||
|
volumes:
|
||||||
|
- ./models:/models
|
||||||
|
|
||||||
|
# Service to run the Llama web application.
|
||||||
|
llm:
|
||||||
|
image: ghcr.io/abetlen/llama-cpp-python:latest
|
||||||
|
# Mount the directory that contains the downloaded model.
|
||||||
|
volumes:
|
||||||
|
- ./models:/models
|
||||||
|
environment:
|
||||||
|
# Specify the path to the model for the web application.
|
||||||
|
MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin
|
||||||
|
labels:
|
||||||
|
# Instead of using the Host rule, set a PathPrefix rule
|
||||||
|
- "traefik.http.routers.web.rule=PathPrefix(`/inference/llm`)"
|
||||||
|
# This tells Traefik where to forward the traffic for this route.
|
||||||
|
- "traefik.http.routers.web.service=llm"
|
||||||
|
# Define a service for the llm and specify its load balancer configuration
|
||||||
|
- "traefik.http.services.llm-service.loadbalancer.server.port=8000"
|
||||||
|
|
||||||
|
- "traefik.http.middlewares.strip-llm-prefix.stripprefix.prefixes=/inference/llm"
|
||||||
|
- "traefik.http.routers.web.middlewares=strip-llm-prefix"
|
||||||
|
# Health check configuration
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 30s
|
||||||
|
# Restart policy configuration
|
||||||
|
restart: on-failure
|
||||||
|
# Specifies that this service should start only after wait-for-downloader has completed successfully.
|
||||||
|
depends_on:
|
||||||
|
wait-for-downloader:
|
||||||
|
condition: service_completed_successfully
|
||||||
|
# Connect this service to two networks: inference_net and traefik_public.
|
||||||
|
networks:
|
||||||
|
- inference_net
|
||||||
|
- traefik_public
|
||||||
|
|
||||||
|
# Service for Traefik, a modern HTTP reverse proxy and load balancer.
|
||||||
|
traefik:
|
||||||
|
image: traefik:v2.5
|
||||||
|
command:
|
||||||
|
# Enable the Traefik API dashboard without TLS (not recommended for production).
|
||||||
|
- "--api.insecure=true"
|
||||||
|
# Enable Traefik to use Docker as a provider.
|
||||||
|
- "--providers.docker=true"
|
||||||
|
# Do not expose services by default. Explicitly specify in each service if it should be exposed.
|
||||||
|
- "--providers.docker.exposedbydefault=false"
|
||||||
|
# Specify the default entry point on port 80.
|
||||||
|
- "--entrypoints.web.address=:80"
|
||||||
|
ports:
|
||||||
|
# Map port 80 in the container to port 80 on the host.
|
||||||
|
- "80:80"
|
||||||
|
# Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host.
|
||||||
|
- "8080:8080"
|
||||||
|
# Mount the Docker socket to allow Traefik to listen to Docker's API.
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
# Connect this service to the traefik_public network.
|
||||||
|
networks:
|
||||||
|
- traefik_public
|
||||||
|
|
||||||
|
# Define networks used in this docker-compose file.
|
||||||
|
networks:
|
||||||
|
# Network for the llm service (used for inference).
|
||||||
|
inference_net:
|
||||||
|
# Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
|
||||||
|
traefik_public:
|
||||||
|
external: true
|
||||||
0
jan-inference/llm/models/.gitkeep
Normal file
0
jan-inference/llm/models/.gitkeep
Normal file
7
jan-inference/sd/.dockerignore
Normal file
7
jan-inference/sd/.dockerignore
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
output/
|
||||||
|
models/
|
||||||
|
|
||||||
|
sd_cpp/.git
|
||||||
|
sd_cpp/.github
|
||||||
|
|
||||||
|
sd
|
||||||
10
jan-inference/sd/compile.Dockerfile
Normal file
10
jan-inference/sd/compile.Dockerfile
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
FROM python:3.9.17
|
||||||
|
|
||||||
|
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
|
||||||
|
ENV PATH=/root/.cargo/bin:$PATH
|
||||||
|
|
||||||
|
WORKDIR /sd.cpp
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN pip install -r compile.requirements.txt
|
||||||
17
jan-inference/sd/compile.requirements.txt
Normal file
17
jan-inference/sd/compile.requirements.txt
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# Compiling
|
||||||
|
-r sd_cpp/models/requirements.txt
|
||||||
|
|
||||||
|
# diffusers
|
||||||
|
# torch
|
||||||
|
# ftfy
|
||||||
|
# scipy
|
||||||
|
# transformers
|
||||||
|
# accelerate
|
||||||
|
# huggingface-hub
|
||||||
|
# xformers
|
||||||
|
# omegaconf
|
||||||
|
# safetensors
|
||||||
|
# cog
|
||||||
|
# tomesd
|
||||||
|
# compel
|
||||||
|
|
||||||
98
jan-inference/sd/docker-compose.yml
Normal file
98
jan-inference/sd/docker-compose.yml
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
version: '3'
|
||||||
|
|
||||||
|
services:
|
||||||
|
|
||||||
|
# Service to download a model file.
|
||||||
|
downloader:
|
||||||
|
build:
|
||||||
|
context: ./
|
||||||
|
dockerfile: compile.Dockerfile
|
||||||
|
# platform: "linux/amd64"
|
||||||
|
# The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist.
|
||||||
|
command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /converted_models/*.bin ]; then wget -O /converted_models/$SD_MODEL_FILE ${MODEL_URL}; python /sd.cpp/models/convert.py --out_type q4_0 --out_file /converted_models/$SD_MODEL_FILE; fi"
|
||||||
|
# Mount a local directory to store the downloaded model.
|
||||||
|
volumes:
|
||||||
|
- ./models:/converted_models
|
||||||
|
|
||||||
|
# Service to wait for the downloader service to finish downloading the model.
|
||||||
|
wait-for-downloader:
|
||||||
|
image: busybox
|
||||||
|
# The command waits until the model file (specified in MODEL_URL) exists.
|
||||||
|
command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/*.bin ]; do sleep 1; done; echo 'Model downloaded and converted!'"
|
||||||
|
# Specifies that this service should start after the downloader service has started.
|
||||||
|
depends_on:
|
||||||
|
downloader:
|
||||||
|
condition: service_started
|
||||||
|
# Mount the same local directory to check for the downloaded model.
|
||||||
|
volumes:
|
||||||
|
- ./models:/models
|
||||||
|
|
||||||
|
# Service to run the SD web application.
|
||||||
|
sd:
|
||||||
|
build:
|
||||||
|
context: ./
|
||||||
|
dockerfile: inference.Dockerfile
|
||||||
|
# Mount the directory that contains the downloaded model.
|
||||||
|
volumes:
|
||||||
|
- ./models:/models
|
||||||
|
- ./output/:/serving/output
|
||||||
|
command: /bin/bash -c "python -m uvicorn main:app --proxy-headers --host 0.0.0.0 --port 8000"
|
||||||
|
# platform: "linux/amd64"
|
||||||
|
environment:
|
||||||
|
# Specify the path to the model for the web application.
|
||||||
|
BASE_URL: http://0.0.0.0:8000
|
||||||
|
MODEL_NAME: "v1-5-pruned-emaonly-ggml-model-q5_0.bin"
|
||||||
|
MODEL_DIR: "/models"
|
||||||
|
SD_PATH: "/sd"
|
||||||
|
PYTHONUNBUFFERED: 1
|
||||||
|
ports:
|
||||||
|
- 8000:8000
|
||||||
|
# Health check configuration
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 30s
|
||||||
|
# Restart policy configuration
|
||||||
|
restart: on-failure
|
||||||
|
# Specifies that this service should start only after wait-for-downloader has completed successfully.
|
||||||
|
depends_on:
|
||||||
|
wait-for-downloader:
|
||||||
|
condition: service_completed_successfully
|
||||||
|
# Connect this service to two networks: inference_net and traefik_public.
|
||||||
|
networks:
|
||||||
|
- inference_net
|
||||||
|
- traefik_public
|
||||||
|
|
||||||
|
# Service for Traefik, a modern HTTP reverse proxy and load balancer.
|
||||||
|
traefik:
|
||||||
|
image: traefik:v2.5
|
||||||
|
command:
|
||||||
|
# Enable the Traefik API dashboard without TLS (not recommended for production).
|
||||||
|
- "--api.insecure=true"
|
||||||
|
# Enable Traefik to use Docker as a provider.
|
||||||
|
- "--providers.docker=true"
|
||||||
|
# Do not expose services by default. Explicitly specify in each service if it should be exposed.
|
||||||
|
- "--providers.docker.exposedbydefault=false"
|
||||||
|
# Specify the default entry point on port 80.
|
||||||
|
- "--entrypoints.web.address=:80"
|
||||||
|
ports:
|
||||||
|
# Map port 80 in the container to port 80 on the host.
|
||||||
|
- "80:80"
|
||||||
|
# Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host.
|
||||||
|
- "8080:8080"
|
||||||
|
# Mount the Docker socket to allow Traefik to listen to Docker's API.
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
# Connect this service to the traefik_public network.
|
||||||
|
networks:
|
||||||
|
- traefik_public
|
||||||
|
|
||||||
|
# Define networks used in this docker-compose file.
|
||||||
|
networks:
|
||||||
|
# Network for the llm service (used for inference).
|
||||||
|
inference_net:
|
||||||
|
# Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
|
||||||
|
traefik_public:
|
||||||
|
external: true
|
||||||
19
jan-inference/sd/inference.Dockerfile
Normal file
19
jan-inference/sd/inference.Dockerfile
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
FROM python:3.9.17 as build
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y build-essential git cmake
|
||||||
|
|
||||||
|
WORKDIR /sd.cpp
|
||||||
|
|
||||||
|
COPY sd_cpp /sd.cpp
|
||||||
|
|
||||||
|
RUN mkdir build && cd build && cmake .. && cmake --build . --config Release
|
||||||
|
|
||||||
|
FROM python:3.9.17 as runtime
|
||||||
|
|
||||||
|
COPY --from=build /sd.cpp/build/bin/sd /sd
|
||||||
|
|
||||||
|
WORKDIR /serving
|
||||||
|
|
||||||
|
COPY . /serving/
|
||||||
|
|
||||||
|
RUN pip install -r inference.requirements.txt
|
||||||
4
jan-inference/sd/inference.requirements.txt
Normal file
4
jan-inference/sd/inference.requirements.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# Inference
|
||||||
|
fastapi
|
||||||
|
uvicorn
|
||||||
|
python-multipart
|
||||||
70
jan-inference/sd/main.py
Normal file
70
jan-inference/sd/main.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
from fastapi import FastAPI, BackgroundTasks, HTTPException, Form
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
OUTPUT_DIR = "output"
|
||||||
|
SD_PATH = os.environ.get("SD_PATH", "./sd")
|
||||||
|
MODEL_DIR = os.environ.get("MODEL_DIR", "./models")
|
||||||
|
BASE_URL = os.environ.get("BASE_URL", "http://localhost:8000")
|
||||||
|
MODEL_NAME = os.environ.get(
|
||||||
|
"MODEL_NAME", "v1-5-pruned-emaonly-ggml-model-q5_0.bin")
|
||||||
|
|
||||||
|
# Create the OUTPUT_DIR directory if it does not exist
|
||||||
|
if not os.path.exists(OUTPUT_DIR):
|
||||||
|
os.makedirs(OUTPUT_DIR)
|
||||||
|
|
||||||
|
# Create the OUTPUT_DIR directory if it does not exist
|
||||||
|
if not os.path.exists(MODEL_DIR):
|
||||||
|
os.makedirs(MODEL_DIR)
|
||||||
|
|
||||||
|
# Serve files from the "files" directory
|
||||||
|
app.mount("/output", StaticFiles(directory=OUTPUT_DIR), name="output")
|
||||||
|
|
||||||
|
|
||||||
|
def run_command(prompt: str, filename: str):
|
||||||
|
# Construct the command based on your provided example
|
||||||
|
command = [SD_PATH,
|
||||||
|
"-m", os.path.join(MODEL_DIR, MODEL_NAME),
|
||||||
|
"-p", prompt,
|
||||||
|
"-o", os.path.join(OUTPUT_DIR, filename)
|
||||||
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
sub_output = subprocess.run(command, timeout=5*60, capture_output=True,
|
||||||
|
check=True, encoding="utf-8")
|
||||||
|
print(sub_output.stdout)
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500, detail="Failed to execute the command.")
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/inference/")
|
||||||
|
async def run_inference(background_tasks: BackgroundTasks, prompt: str = Form()):
|
||||||
|
# Generate a unique filename using uuid4()
|
||||||
|
filename = f"{uuid4()}.png"
|
||||||
|
|
||||||
|
# We will use background task to run the command so it won't block
|
||||||
|
background_tasks.add_task(run_command, prompt, filename)
|
||||||
|
|
||||||
|
# Return the expected path of the output file
|
||||||
|
return {"url": f'{BASE_URL}/serve/{filename}'}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/serve/{filename}")
|
||||||
|
async def serve_file(filename: str):
|
||||||
|
file_path = os.path.join(OUTPUT_DIR, filename)
|
||||||
|
|
||||||
|
if os.path.exists(file_path):
|
||||||
|
return FileResponse(file_path)
|
||||||
|
else:
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||||
1
jan-inference/sd/sd_cpp
Submodule
1
jan-inference/sd/sd_cpp
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 0d7f04b135cd48e8d62aecd09a52eb2afa482744
|
||||||
Loading…
x
Reference in New Issue
Block a user