Merge pull request #4 from janhq/first_mvp_jan_inference

First mvp jan inference
This commit is contained in:
namvuong 2023-08-25 13:01:35 +07:00 committed by GitHub
commit d715e5a1df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 370 additions and 2 deletions

View File

@ -5,4 +5,13 @@ POSTGRES_USERNAME=your_db_username
POSTGRES_PORT=your_db_port
KC_DB_SCHEMA=public
KEYCLOAK_ADMIN=your_keycloak_admin_username
KEYCLOAK_ADMIN_PASSWORD=your_keycloak_admin_password
KEYCLOAK_ADMIN_PASSWORD=your_keycloak_admin_password
# Inference
## LLM
MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin
LLM_MODEL_FILE=$(basename $MODEL_URL)
## SD
MODEL_URL=https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors
SD_MODEL_FILE=$(basename $MODEL_URL)

11
.gitignore vendored
View File

@ -1,2 +1,11 @@
.vscode
.env
.env
# Jan inference
jan-inference/llm/models/**
jan-inference/llm/.env
jan-inference/sd/models/**
jan-inference/sd/output/**
jan-inference/sd/.env
jan-inference/sd/sd

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "jan-inference/sd/sd_cpp"]
path = jan-inference/sd/sd_cpp
url = https://github.com/leejet/stable-diffusion.cpp

View File

@ -0,0 +1,25 @@
version: '3'
services:
# Service for Traefik, a modern HTTP reverse proxy and load balancer.
traefik:
image: traefik:v2.10
ports:
# Map port 80 in the container to port 80 on the host.
- "80:80"
# Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host.
- "8080:8080"
# Mount the Docker socket to allow Traefik to listen to Docker's API.
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- ./traefik/traefik.yml:/traefik.yml:ro
- ./traefik/config.yml:/config.yml:ro
# Connect this service to the traefik_public network.
networks:
- traefik_public
# Define networks used in this docker-compose file.
networks:
# Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
traefik_public:
external: true

View File

@ -0,0 +1,8 @@
# Inference - LLM
```bash
docker network create traefik_public
cp .env.example .env
# -> Update MODEL_URL in `.env` file
docker compose up -d --scale llm=2
``````

View File

@ -0,0 +1,60 @@
version: '3'
services:
# Service to download a model file.
downloader:
image: busybox
# The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist.
command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$LLM_MODEL_FILE ]; then wget -O /models/$LLM_MODEL_FILE ${MODEL_URL}; fi"
# Mount a local directory to store the downloaded model.
volumes:
- ./models:/models
# Service to wait for the downloader service to finish downloading the model.
wait-for-downloader:
image: busybox
# The command waits until the model file (specified in MODEL_URL) exists.
command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$LLM_MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'"
# Specifies that this service should start after the downloader service has started.
depends_on:
downloader:
condition: service_started
# Mount the same local directory to check for the downloaded model.
volumes:
- ./models:/models
# Service to run the Llama web application.
llm:
image: ghcr.io/abetlen/llama-cpp-python:latest
# Mount the directory that contains the downloaded model.
volumes:
- ./models:/models
ports:
- 8000:8000
environment:
# Specify the path to the model for the web application.
MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin
PYTHONUNBUFFERED: 1
# Health check configuration
# healthcheck:
# test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
# interval: 30s
# timeout: 10s
# retries: 3
# start_period: 30s
# Restart policy configuration
restart: on-failure
# Specifies that this service should start only after wait-for-downloader has completed successfully.
depends_on:
wait-for-downloader:
condition: service_completed_successfully
# Connect this service to two networks: inference_net and traefik_public.
networks:
- traefik_public
# Define networks used in this docker-compose file.
networks:
# Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
traefik_public:
external: true

View File

View File

@ -0,0 +1,7 @@
output/
models/
sd_cpp/.git
sd_cpp/.github
sd

View File

@ -0,0 +1,10 @@
FROM python:3.9.17
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
ENV PATH=/root/.cargo/bin:$PATH
WORKDIR /sd.cpp
COPY . .
RUN pip install -r compile.requirements.txt

View File

@ -0,0 +1,17 @@
# Compiling
-r sd_cpp/models/requirements.txt
# diffusers
# torch
# ftfy
# scipy
# transformers
# accelerate
# huggingface-hub
# xformers
# omegaconf
# safetensors
# cog
# tomesd
# compel

View File

@ -0,0 +1,71 @@
version: '3'
services:
# Service to download a model file.
downloader:
build:
context: ./
dockerfile: compile.Dockerfile
# platform: "linux/amd64"
# The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist.
command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /converted_models/*.bin ]; then wget -O /converted_models/$SD_MODEL_FILE ${MODEL_URL}; python /sd.cpp/models/convert.py --out_type q4_0 --out_file /converted_models/$SD_MODEL_FILE; fi"
# Mount a local directory to store the downloaded model.
volumes:
- ./models:/converted_models
# Service to wait for the downloader service to finish downloading the model.
wait-for-downloader:
image: busybox
# The command waits until the model file (specified in MODEL_URL) exists.
command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/*.bin ]; do sleep 1; done; echo 'Model downloaded and converted!'"
# Specifies that this service should start after the downloader service has started.
depends_on:
downloader:
condition: service_started
# Mount the same local directory to check for the downloaded model.
volumes:
- ./models:/models
# Service to run the SD web application.
sd:
build:
context: ./
dockerfile: inference.Dockerfile
# Mount the directory that contains the downloaded model.
volumes:
- ./models:/models
- ./output/:/serving/output
command: /bin/bash -c "python -m uvicorn main:app --proxy-headers --host 0.0.0.0 --port 8000"
# platform: "linux/amd64"
environment:
# Specify the path to the model for the web application.
BASE_URL: http://0.0.0.0:8000
MODEL_NAME: "v1-5-pruned-emaonly-ggml-model-q5_0.bin"
MODEL_DIR: "/models"
SD_PATH: "/sd"
PYTHONUNBUFFERED: 1
ports:
- 8000:8000
# Health check configuration
# healthcheck:
# test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
# interval: 30s
# timeout: 10s
# retries: 3
# start_period: 30s
# Restart policy configuration
restart: on-failure
# Specifies that this service should start only after wait-for-downloader has completed successfully.
depends_on:
wait-for-downloader:
condition: service_completed_successfully
# Connect this service to two networks: inference_net and traefik_public.
networks:
- traefik_public
# Define networks used in this docker-compose file.
networks:
# Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
traefik_public:
external: true

View File

@ -0,0 +1,19 @@
FROM python:3.9.17 as build
RUN apt-get update && apt-get install -y build-essential git cmake
WORKDIR /sd.cpp
COPY sd_cpp /sd.cpp
RUN mkdir build && cd build && cmake .. && cmake --build . --config Release
FROM python:3.9.17 as runtime
COPY --from=build /sd.cpp/build/bin/sd /sd
WORKDIR /serving
COPY . /serving/
RUN pip install -r inference.requirements.txt

View File

@ -0,0 +1,4 @@
# Inference
fastapi
uvicorn
python-multipart

70
jan-inference/sd/main.py Normal file
View File

@ -0,0 +1,70 @@
from fastapi import FastAPI, BackgroundTasks, HTTPException, Form
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
import subprocess
import os
from uuid import uuid4
app = FastAPI()
OUTPUT_DIR = "output"
SD_PATH = os.environ.get("SD_PATH", "./sd")
MODEL_DIR = os.environ.get("MODEL_DIR", "./models")
BASE_URL = os.environ.get("BASE_URL", "http://localhost:8000")
MODEL_NAME = os.environ.get(
"MODEL_NAME", "v1-5-pruned-emaonly-ggml-model-q5_0.bin")
# Create the OUTPUT_DIR directory if it does not exist
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
# Create the OUTPUT_DIR directory if it does not exist
if not os.path.exists(MODEL_DIR):
os.makedirs(MODEL_DIR)
# Serve files from the "files" directory
app.mount("/output", StaticFiles(directory=OUTPUT_DIR), name="output")
def run_command(prompt: str, filename: str):
# Construct the command based on your provided example
command = [SD_PATH,
"-m", os.path.join(MODEL_DIR, MODEL_NAME),
"-p", prompt,
"-o", os.path.join(OUTPUT_DIR, filename)
]
try:
sub_output = subprocess.run(command, timeout=5*60, capture_output=True,
check=True, encoding="utf-8")
print(sub_output.stdout)
except subprocess.CalledProcessError:
raise HTTPException(
status_code=500, detail="Failed to execute the command.")
@app.post("/inference/")
async def run_inference(background_tasks: BackgroundTasks, prompt: str = Form()):
# Generate a unique filename using uuid4()
filename = f"{uuid4()}.png"
# We will use background task to run the command so it won't block
background_tasks.add_task(run_command, prompt, filename)
# Return the expected path of the output file
return {"url": f'{BASE_URL}/serve/{filename}'}
@app.get("/serve/{filename}")
async def serve_file(filename: str):
file_path = os.path.join(OUTPUT_DIR, filename)
if os.path.exists(file_path):
return FileResponse(file_path)
else:
raise HTTPException(status_code=404, detail="File not found")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)

@ -0,0 +1 @@
Subproject commit 0d7f04b135cd48e8d62aecd09a52eb2afa482744

View File

@ -0,0 +1,38 @@
http:
#region routers
routers:
llm:
entryPoints:
- "web"
rule: "PathPrefix(`/inference/llm/`)"
middlewares:
- llmm-strip-prefix
service: llm
sd:
entryPoints:
- "web"
rule: "PathPrefix(`/inference/sd/`)"
middlewares:
- sd-strip-prefix
service: sd
#endregion
#region services
services:
llm:
loadBalancer:
servers:
- url: "http://llm:8000"
sd:
loadBalancer:
servers:
- url: "http://sd:8000"
#endregion
middlewares:
llm-strip-prefix:
stripPrefix:
prefixes:
- "/inference/llm/"
sd-strip-prefix:
stripPrefix:
prefixes:
- "/inference/sd/"

View File

@ -0,0 +1,17 @@
api:
dashboard: true
debug: true
insecure: true
entryPoints:
web:
address: ":80"
serversTransport:
insecureSkipVerify: true
providers:
docker:
endpoint: "unix:///var/run/docker.sock"
exposedByDefault: false
file:
filename: /config.yml
log:
level: DEBUG