fix: Remove sd ggml + minio + traefffik (#63)

* fix(inf-sd): Delete gitsubmodule for SD ggml

* chore: Delete custom code for sd.cpp

* chore: Update gitignore for sd models

* chore: Update readme for sd ggml service removal

* chore: Refactor compose file to remove minio, sd, sd-downloader and traeffik

* fix(.env): Removing SD and minio related configuration
This commit is contained in:
namvuong 2023-09-06 15:48:54 +07:00 committed by GitHub
parent 83e21250a4
commit 34939feb90
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 2 additions and 307 deletions

7
.gitignore vendored
View File

@ -3,10 +3,3 @@
# Jan inference
jan-inference/llm/models/**
jan-inference/sd/models/**
jan-inference/sd/output/**
jan-inference/sd/sd
# Minio
minio/**

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "jan-inference/sd/sd_cpp"]
path = jan-inference/sd/sd_cpp
url = https://github.com/leejet/stable-diffusion.cpp

View File

@ -72,13 +72,6 @@ export DOCKER_DEFAULT_PLATFORM=linux/$(uname -m)
| web-client | `conf/sample.env_web-client` |
### Install Models
- Download Runway SD 1.5 from HuggingFace
```sh
wget https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors -P jan-inference/sd/models
```
- Download Llama 7Bn ggml from HuggingFace
```sh
wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -P jan-inference/llm/models
```
@ -106,8 +99,7 @@ docker compose up -d
| Web App | http://localhost:3000 | Users are signed up to keycloak, default created user is set via `conf/keycloak_conf/example-realm.json` on keycloak with username: `username`, password: `password` |
| Keycloak Admin | http://localhost:8088 | Admin credentials are set via the environment variables `KEYCLOAK_ADMIN` and `KEYCLOAK_ADMIN_PASSWORD` |
| Hasura App Backend | http://localhost:8080 | Admin credentials are set via the environment variables `HASURA_GRAPHQL_ADMIN_SECRET` in file `conf/sample.env_app-backend` |
| LLM Service | http://localhost:8000 | |
| Stable Diffusion Service | http://localhost:8001 | |
| LLM Service | http://localhost:8000 | |
## Usage

View File

@ -63,13 +63,6 @@ services:
interval: 5s
timeout: 10s
retries: 30
labels:
- "traefik.enable=true"
- "traefik.http.routers.graphql-engine.entrypoints=jan"
- "traefik.http.routers.graphql-engine.rule=(PathPrefix(`/graphql-engine`))"
- "traefik.http.middlewares.graphql-engine.stripprefix.prefixes=/graphql-engine"
- "traefik.http.middlewares.graphql-engine.stripprefix.forceslash=false"
- "traefik.http.routers.graphql-engine.middlewares=graphql-engine"
networks:
jan_community:
ipv4_address: 172.20.0.12
@ -143,12 +136,6 @@ services:
NODE_ENV: development
extra_hosts:
- "localhost:172.20.0.9"
labels:
- "traefik.enable=true"
- "traefik.http.routers.web.entrypoints=jan"
- "traefik.http.routers.web.rule=(Host(`localhost`) && PathPrefix(`/web`))"
- "traefik.http.routers.web.middlewares=rewritepath"
- "traefik.http.middlewares.rewritepath.addprefix.prefix=/web"
depends_on:
graphql-engine:
condition: service_healthy
@ -166,122 +153,10 @@ services:
MODEL: /models/${LLM_MODEL_FILE}
PYTHONUNBUFFERED: 1
restart: on-failure
labels:
- "traefik.enable=true"
- "traefik.http.routers.llm.entrypoints=jan"
- "traefik.http.routers.llm.rule=(PathPrefix(`/llm`))"
- "traefik.http.middlewares.llm.stripprefix.prefixes=/llm"
- "traefik.http.middlewares.llm.stripprefix.forceslash=false"
- "traefik.http.routers.llm.middlewares=llm"
networks:
jan_community:
ipv4_address: 172.20.0.18
sd-downloader:
build:
context: ./jan-inference/sd/
dockerfile: compile.Dockerfile
command: /bin/sh -c "if [ ! -f /models/*.bin ]; then python /sd.cpp/sd_cpp/models/convert.py --out_type q4_0 --out_file /models/${SD_MODEL_FILE}.q4_0.bin /models/${SD_MODEL_FILE}; fi"
volumes:
- ./jan-inference/sd/models:/models
networks:
jan_community:
ipv4_address: 172.20.0.19
sd:
build:
context: ./jan-inference/sd/
dockerfile: inference.Dockerfile
volumes:
- ./jan-inference/sd/models:/models/
command: /bin/bash -c "python -m uvicorn main:app --proxy-headers --host 0.0.0.0 --port 8000"
environment:
S3_ENDPOINT_URL: ${S3_ENDPOINT_URL}
S3_PUBLIC_ENDPOINT_URL: ${S3_PUBLIC_ENDPOINT_URL}
S3_ACCESS_KEY_ID: ${S3_ACCESS_KEY_ID}
S3_SECRET_ACCESS_KEY: ${S3_SECRET_ACCESS_KEY}
S3_BUCKET_NAME: ${S3_BUCKET_NAME}
MODEL_NAME: ${SD_MODEL_FILE}.q4_0.bin
MODEL_DIR: /models
OUTPUT_DIR: /tmp
SD_PATH: /sd
PYTHONUNBUFFERED: 1
ports:
- 8001:8000
restart: on-failure
depends_on:
sd-downloader:
condition: service_completed_successfully
labels:
- "traefik.enable=true"
- "traefik.http.routers.sd.entrypoints=jan"
- "traefik.http.routers.sd.rule=(PathPrefix(`/sd`))"
- "traefik.http.middlewares.sd.stripprefix.prefixes=/sd"
- "traefik.http.middlewares.sd.stripprefix.forceslash=false"
- "traefik.http.routers.sd.middlewares=sd"
networks:
jan_community:
ipv4_address: 172.20.0.21
minio:
image: minio/minio
ports:
- 9000:9000
- 9001
volumes:
- ./minio/data:/export
- ./minio/config:/root/.minio
environment:
MINIO_ROOT_USER: ${S3_ACCESS_KEY_ID}
MINIO_ROOT_PASSWORD: ${S3_SECRET_ACCESS_KEY}
command: server /export --console-address ":9001"
labels:
- "traefik.enable=true"
- "traefik.http.routers.minio.entrypoints=jan"
- "traefik.http.routers.minio.rule=(PathPrefix(`/minio`))"
- "traefik.http.middlewares.minio.stripprefix.prefixes=/minio"
- "traefik.http.middlewares.minio.stripprefix.forceslash=false"
- "traefik.http.routers.minio.middlewares=minio"
networks:
jan_community:
ipv4_address: 172.20.0.23
createbuckets:
image: minio/mc
depends_on:
- minio
environment:
S3_ACCESS_KEY_ID: ${S3_ACCESS_KEY_ID}
S3_SECRET_ACCESS_KEY: ${S3_SECRET_ACCESS_KEY}
S3_BUCKET_NAME: ${S3_BUCKET_NAME}
entrypoint: >
/bin/sh -c "
/usr/bin/mc config host add myminio http://minio:9000 ${S3_ACCESS_KEY_ID} ${S3_SECRET_ACCESS_KEY};
/usr/bin/mc rm -r --force myminio/${S3_BUCKET_NAME};
/usr/bin/mc mb myminio/${S3_BUCKET_NAME};
/usr/bin/mc anonymous set public myminio/${S3_BUCKET_NAME};
exit 0;
"
networks:
jan_community:
traefik:
image: traefik:v2.10
command:
- "--api.insecure=true"
- "--providers.docker=true"
- "--providers.docker.exposedbydefault=false"
- "--log.level=debug"
- "--entrypoints.jan.address=:1337"
ports:
- "1337:1337"
- "9090:8080"
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
networks:
jan_community:
ipv4_address: 172.20.0.22
networks:
jan_community:
driver: bridge

View File

@ -1,7 +0,0 @@
output/
models/
sd_cpp/.git
sd_cpp/.github
sd

View File

@ -1,10 +0,0 @@
FROM python:3.9.17
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
ENV PATH=/root/.cargo/bin:$PATH
WORKDIR /sd.cpp
COPY . .
RUN pip install -r compile.requirements.txt

View File

@ -1,17 +0,0 @@
# Compiling
-r sd_cpp/models/requirements.txt
# diffusers
# torch
# ftfy
# scipy
# transformers
# accelerate
# huggingface-hub
# xformers
# omegaconf
# safetensors
# cog
# tomesd
# compel

View File

@ -1,21 +0,0 @@
ARG UBUNTU_VERSION=22.04
FROM ubuntu:$UBUNTU_VERSION as build
RUN apt-get update && apt-get install -y build-essential git cmake
WORKDIR /sd.cpp
COPY sd_cpp /sd.cpp
RUN mkdir build && cd build && cmake .. && cmake --build . --config Release
FROM python:3.9.17 as runtime
COPY --from=build /sd.cpp/build/bin/sd /sd
WORKDIR /serving
COPY . /serving/
RUN pip install -r inference.requirements.txt

View File

@ -1,5 +0,0 @@
# Inference
fastapi
uvicorn
python-multipart
boto3

View File

@ -1,90 +0,0 @@
from fastapi import FastAPI, BackgroundTasks, HTTPException, Form
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
import subprocess
import os
from uuid import uuid4
from pydantic import BaseModel
import boto3
from botocore.client import Config
app = FastAPI()
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "output")
SD_PATH = os.environ.get("SD_PATH", "./sd")
MODEL_DIR = os.environ.get("MODEL_DIR", "./models")
MODEL_NAME = os.environ.get(
"MODEL_NAME", "v1-5-pruned-emaonly.safetensors.q4_0.bin")
S3_ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL", "http://localhost:9000")
S3_PUBLIC_ENDPOINT_URL = os.environ.get(
"S3_PUBLIC_ENDPOINT_URL", "http://localhost:9000")
S3_ACCESS_KEY_ID = os.environ.get("S3_ACCESS_KEY_ID", "minio")
S3_SECRET_ACCESS_KEY = os.environ.get("S3_SECRET_ACCESS_KEY", "minio123")
S3_BUCKET_NAME = os.environ.get("S3_BUCKET_NAME", "jan")
s3 = boto3.resource('s3',
endpoint_url=S3_ENDPOINT_URL,
aws_access_key_id=S3_ACCESS_KEY_ID,
aws_secret_access_key=S3_SECRET_ACCESS_KEY,
config=Config(signature_version='s3v4'),
region_name='us-east-1')
s3_bucket = s3.Bucket(S3_BUCKET_NAME)
class Payload(BaseModel):
prompt: str
neg_prompt: str
seed: int
steps: int
width: int
height: int
# Create the OUTPUT_DIR directory if it does not exist
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
# Create the OUTPUT_DIR directory if it does not exist
if not os.path.exists(MODEL_DIR):
os.makedirs(MODEL_DIR)
def run_command(payload: Payload, filename: str):
# Construct the command based on your provided example
command = [SD_PATH,
"--model", f'{os.path.join(MODEL_DIR, MODEL_NAME)}',
"--prompt", f'"{payload.prompt}"',
"--negative-prompt", f'"{payload.neg_prompt}"',
"--height", str(payload.height),
"--width", str(payload.width),
"--steps", str(payload.steps),
"--seed", str(payload.seed),
"--mode", 'txt2img',
"-o", f'{os.path.join(OUTPUT_DIR, filename)}',
]
try:
subprocess.run(command)
except subprocess.CalledProcessError:
raise HTTPException(
status_code=500, detail="Failed to execute the command.")
@app.post("/inferences/txt2img")
async def run_inference(background_tasks: BackgroundTasks, payload: Payload):
# Generate a unique filename using uuid4()
filename = f"{uuid4()}.png"
# We will use background task to run the command so it won't block
# background_tasks.add_task(run_command, payload, filename)
run_command(payload, filename)
s3_bucket.upload_file(f'{os.path.join(OUTPUT_DIR, filename)}', filename)
# Return the expected path of the output file
return {"url": f'{S3_PUBLIC_ENDPOINT_URL}/{S3_BUCKET_NAME}/{filename}'}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8002)

@ -1 +0,0 @@
Subproject commit c8f85a4e3063e2cdb27db57b8f6167da16453e0c

View File

@ -5,15 +5,4 @@ KEYCLOAK_ADMIN_PASSWORD=admin
# Inference
## LLM
LLM_MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin
LLM_MODEL_FILE=llama-2-7b-chat.ggmlv3.q4_1.bin
## SD
SD_MODEL_URL=https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors
SD_MODEL_FILE=v1-5-pruned-emaonly.safetensors
# Minio
S3_ACCESS_KEY_ID=minio
S3_SECRET_ACCESS_KEY=minio123
S3_BUCKET_NAME=jan
S3_ENDPOINT_URL=http://minio:9000
S3_PUBLIC_ENDPOINT_URL=http://127.0.0.1:9000
LLM_MODEL_FILE=llama-2-7b-chat.ggmlv3.q4_1.bin