diff --git a/.gitignore b/.gitignore index 4809a62fe..2f1d9563a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,10 +3,3 @@ # Jan inference jan-inference/llm/models/** - -jan-inference/sd/models/** -jan-inference/sd/output/** -jan-inference/sd/sd - -# Minio -minio/** \ No newline at end of file diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index fb1c6e128..000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "jan-inference/sd/sd_cpp"] - path = jan-inference/sd/sd_cpp - url = https://github.com/leejet/stable-diffusion.cpp diff --git a/README.md b/README.md index 6d6a90e58..db074e12b 100644 --- a/README.md +++ b/README.md @@ -72,13 +72,6 @@ export DOCKER_DEFAULT_PLATFORM=linux/$(uname -m) | web-client | `conf/sample.env_web-client` | ### Install Models - -- Download Runway SD 1.5 from HuggingFace -```sh -wget https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors -P jan-inference/sd/models -``` - -- Download Llama 7Bn ggml from HuggingFace ```sh wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -P jan-inference/llm/models ``` @@ -106,8 +99,7 @@ docker compose up -d | Web App | http://localhost:3000 | Users are signed up to keycloak, default created user is set via `conf/keycloak_conf/example-realm.json` on keycloak with username: `username`, password: `password` | | Keycloak Admin | http://localhost:8088 | Admin credentials are set via the environment variables `KEYCLOAK_ADMIN` and `KEYCLOAK_ADMIN_PASSWORD` | | Hasura App Backend | http://localhost:8080 | Admin credentials are set via the environment variables `HASURA_GRAPHQL_ADMIN_SECRET` in file `conf/sample.env_app-backend` | -| LLM Service | http://localhost:8000 | | -| Stable Diffusion Service | http://localhost:8001 | | +| LLM Service | http://localhost:8000 | | ## Usage diff --git a/docker-compose.yml b/docker-compose.yml index 5793101c7..621e26287 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -63,13 +63,6 @@ services: interval: 5s timeout: 10s retries: 30 - labels: - - "traefik.enable=true" - - "traefik.http.routers.graphql-engine.entrypoints=jan" - - "traefik.http.routers.graphql-engine.rule=(PathPrefix(`/graphql-engine`))" - - "traefik.http.middlewares.graphql-engine.stripprefix.prefixes=/graphql-engine" - - "traefik.http.middlewares.graphql-engine.stripprefix.forceslash=false" - - "traefik.http.routers.graphql-engine.middlewares=graphql-engine" networks: jan_community: ipv4_address: 172.20.0.12 @@ -143,12 +136,6 @@ services: NODE_ENV: development extra_hosts: - "localhost:172.20.0.9" - labels: - - "traefik.enable=true" - - "traefik.http.routers.web.entrypoints=jan" - - "traefik.http.routers.web.rule=(Host(`localhost`) && PathPrefix(`/web`))" - - "traefik.http.routers.web.middlewares=rewritepath" - - "traefik.http.middlewares.rewritepath.addprefix.prefix=/web" depends_on: graphql-engine: condition: service_healthy @@ -166,122 +153,10 @@ services: MODEL: /models/${LLM_MODEL_FILE} PYTHONUNBUFFERED: 1 restart: on-failure - labels: - - "traefik.enable=true" - - "traefik.http.routers.llm.entrypoints=jan" - - "traefik.http.routers.llm.rule=(PathPrefix(`/llm`))" - - "traefik.http.middlewares.llm.stripprefix.prefixes=/llm" - - "traefik.http.middlewares.llm.stripprefix.forceslash=false" - - "traefik.http.routers.llm.middlewares=llm" networks: jan_community: ipv4_address: 172.20.0.18 - sd-downloader: - build: - context: ./jan-inference/sd/ - dockerfile: compile.Dockerfile - command: /bin/sh -c "if [ ! -f /models/*.bin ]; then python /sd.cpp/sd_cpp/models/convert.py --out_type q4_0 --out_file /models/${SD_MODEL_FILE}.q4_0.bin /models/${SD_MODEL_FILE}; fi" - volumes: - - ./jan-inference/sd/models:/models - networks: - jan_community: - ipv4_address: 172.20.0.19 - - sd: - build: - context: ./jan-inference/sd/ - dockerfile: inference.Dockerfile - volumes: - - ./jan-inference/sd/models:/models/ - command: /bin/bash -c "python -m uvicorn main:app --proxy-headers --host 0.0.0.0 --port 8000" - environment: - S3_ENDPOINT_URL: ${S3_ENDPOINT_URL} - S3_PUBLIC_ENDPOINT_URL: ${S3_PUBLIC_ENDPOINT_URL} - S3_ACCESS_KEY_ID: ${S3_ACCESS_KEY_ID} - S3_SECRET_ACCESS_KEY: ${S3_SECRET_ACCESS_KEY} - S3_BUCKET_NAME: ${S3_BUCKET_NAME} - MODEL_NAME: ${SD_MODEL_FILE}.q4_0.bin - MODEL_DIR: /models - OUTPUT_DIR: /tmp - SD_PATH: /sd - PYTHONUNBUFFERED: 1 - ports: - - 8001:8000 - restart: on-failure - depends_on: - sd-downloader: - condition: service_completed_successfully - labels: - - "traefik.enable=true" - - "traefik.http.routers.sd.entrypoints=jan" - - "traefik.http.routers.sd.rule=(PathPrefix(`/sd`))" - - "traefik.http.middlewares.sd.stripprefix.prefixes=/sd" - - "traefik.http.middlewares.sd.stripprefix.forceslash=false" - - "traefik.http.routers.sd.middlewares=sd" - networks: - jan_community: - ipv4_address: 172.20.0.21 - - minio: - image: minio/minio - ports: - - 9000:9000 - - 9001 - volumes: - - ./minio/data:/export - - ./minio/config:/root/.minio - environment: - MINIO_ROOT_USER: ${S3_ACCESS_KEY_ID} - MINIO_ROOT_PASSWORD: ${S3_SECRET_ACCESS_KEY} - command: server /export --console-address ":9001" - labels: - - "traefik.enable=true" - - "traefik.http.routers.minio.entrypoints=jan" - - "traefik.http.routers.minio.rule=(PathPrefix(`/minio`))" - - "traefik.http.middlewares.minio.stripprefix.prefixes=/minio" - - "traefik.http.middlewares.minio.stripprefix.forceslash=false" - - "traefik.http.routers.minio.middlewares=minio" - networks: - jan_community: - ipv4_address: 172.20.0.23 - - createbuckets: - image: minio/mc - depends_on: - - minio - environment: - S3_ACCESS_KEY_ID: ${S3_ACCESS_KEY_ID} - S3_SECRET_ACCESS_KEY: ${S3_SECRET_ACCESS_KEY} - S3_BUCKET_NAME: ${S3_BUCKET_NAME} - entrypoint: > - /bin/sh -c " - /usr/bin/mc config host add myminio http://minio:9000 ${S3_ACCESS_KEY_ID} ${S3_SECRET_ACCESS_KEY}; - /usr/bin/mc rm -r --force myminio/${S3_BUCKET_NAME}; - /usr/bin/mc mb myminio/${S3_BUCKET_NAME}; - /usr/bin/mc anonymous set public myminio/${S3_BUCKET_NAME}; - exit 0; - " - networks: - jan_community: - - traefik: - image: traefik:v2.10 - command: - - "--api.insecure=true" - - "--providers.docker=true" - - "--providers.docker.exposedbydefault=false" - - "--log.level=debug" - - "--entrypoints.jan.address=:1337" - ports: - - "1337:1337" - - "9090:8080" - volumes: - - /var/run/docker.sock:/var/run/docker.sock:ro - networks: - jan_community: - ipv4_address: 172.20.0.22 - networks: jan_community: driver: bridge diff --git a/jan-inference/sd/.dockerignore b/jan-inference/sd/.dockerignore deleted file mode 100644 index d5b227c62..000000000 --- a/jan-inference/sd/.dockerignore +++ /dev/null @@ -1,7 +0,0 @@ -output/ -models/ - -sd_cpp/.git -sd_cpp/.github - -sd \ No newline at end of file diff --git a/jan-inference/sd/compile.Dockerfile b/jan-inference/sd/compile.Dockerfile deleted file mode 100644 index 383e5202e..000000000 --- a/jan-inference/sd/compile.Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM python:3.9.17 - -RUN curl https://sh.rustup.rs -sSf | bash -s -- -y -ENV PATH=/root/.cargo/bin:$PATH - -WORKDIR /sd.cpp - -COPY . . - -RUN pip install -r compile.requirements.txt diff --git a/jan-inference/sd/compile.requirements.txt b/jan-inference/sd/compile.requirements.txt deleted file mode 100644 index 48f4f8730..000000000 --- a/jan-inference/sd/compile.requirements.txt +++ /dev/null @@ -1,17 +0,0 @@ -# Compiling --r sd_cpp/models/requirements.txt - -# diffusers -# torch -# ftfy -# scipy -# transformers -# accelerate -# huggingface-hub -# xformers -# omegaconf -# safetensors -# cog -# tomesd -# compel - diff --git a/jan-inference/sd/inference.Dockerfile b/jan-inference/sd/inference.Dockerfile deleted file mode 100644 index db31e7f98..000000000 --- a/jan-inference/sd/inference.Dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -ARG UBUNTU_VERSION=22.04 - -FROM ubuntu:$UBUNTU_VERSION as build - -RUN apt-get update && apt-get install -y build-essential git cmake - -WORKDIR /sd.cpp - -COPY sd_cpp /sd.cpp - -RUN mkdir build && cd build && cmake .. && cmake --build . --config Release - -FROM python:3.9.17 as runtime - -COPY --from=build /sd.cpp/build/bin/sd /sd - -WORKDIR /serving - -COPY . /serving/ - -RUN pip install -r inference.requirements.txt \ No newline at end of file diff --git a/jan-inference/sd/inference.requirements.txt b/jan-inference/sd/inference.requirements.txt deleted file mode 100644 index 909923424..000000000 --- a/jan-inference/sd/inference.requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Inference -fastapi -uvicorn -python-multipart -boto3 \ No newline at end of file diff --git a/jan-inference/sd/main.py b/jan-inference/sd/main.py deleted file mode 100644 index e310ec15e..000000000 --- a/jan-inference/sd/main.py +++ /dev/null @@ -1,90 +0,0 @@ -from fastapi import FastAPI, BackgroundTasks, HTTPException, Form -from fastapi.responses import FileResponse -from fastapi.staticfiles import StaticFiles -import subprocess -import os -from uuid import uuid4 -from pydantic import BaseModel -import boto3 -from botocore.client import Config - -app = FastAPI() - -OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "output") -SD_PATH = os.environ.get("SD_PATH", "./sd") -MODEL_DIR = os.environ.get("MODEL_DIR", "./models") -MODEL_NAME = os.environ.get( - "MODEL_NAME", "v1-5-pruned-emaonly.safetensors.q4_0.bin") - -S3_ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL", "http://localhost:9000") -S3_PUBLIC_ENDPOINT_URL = os.environ.get( - "S3_PUBLIC_ENDPOINT_URL", "http://localhost:9000") -S3_ACCESS_KEY_ID = os.environ.get("S3_ACCESS_KEY_ID", "minio") -S3_SECRET_ACCESS_KEY = os.environ.get("S3_SECRET_ACCESS_KEY", "minio123") -S3_BUCKET_NAME = os.environ.get("S3_BUCKET_NAME", "jan") - -s3 = boto3.resource('s3', - endpoint_url=S3_ENDPOINT_URL, - aws_access_key_id=S3_ACCESS_KEY_ID, - aws_secret_access_key=S3_SECRET_ACCESS_KEY, - config=Config(signature_version='s3v4'), - region_name='us-east-1') - -s3_bucket = s3.Bucket(S3_BUCKET_NAME) - - -class Payload(BaseModel): - prompt: str - neg_prompt: str - seed: int - steps: int - width: int - height: int - - -# Create the OUTPUT_DIR directory if it does not exist -if not os.path.exists(OUTPUT_DIR): - os.makedirs(OUTPUT_DIR) - -# Create the OUTPUT_DIR directory if it does not exist -if not os.path.exists(MODEL_DIR): - os.makedirs(MODEL_DIR) - - -def run_command(payload: Payload, filename: str): - # Construct the command based on your provided example - command = [SD_PATH, - "--model", f'{os.path.join(MODEL_DIR, MODEL_NAME)}', - "--prompt", f'"{payload.prompt}"', - "--negative-prompt", f'"{payload.neg_prompt}"', - "--height", str(payload.height), - "--width", str(payload.width), - "--steps", str(payload.steps), - "--seed", str(payload.seed), - "--mode", 'txt2img', - "-o", f'{os.path.join(OUTPUT_DIR, filename)}', - ] - - try: - subprocess.run(command) - except subprocess.CalledProcessError: - raise HTTPException( - status_code=500, detail="Failed to execute the command.") - - -@app.post("/inferences/txt2img") -async def run_inference(background_tasks: BackgroundTasks, payload: Payload): - # Generate a unique filename using uuid4() - filename = f"{uuid4()}.png" - - # We will use background task to run the command so it won't block - # background_tasks.add_task(run_command, payload, filename) - run_command(payload, filename) - s3_bucket.upload_file(f'{os.path.join(OUTPUT_DIR, filename)}', filename) - # Return the expected path of the output file - return {"url": f'{S3_PUBLIC_ENDPOINT_URL}/{S3_BUCKET_NAME}/{filename}'} - - -if __name__ == "__main__": - import uvicorn - uvicorn.run(app, host="0.0.0.0", port=8002) diff --git a/jan-inference/sd/sd_cpp b/jan-inference/sd/sd_cpp deleted file mode 160000 index c8f85a4e3..000000000 --- a/jan-inference/sd/sd_cpp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c8f85a4e3063e2cdb27db57b8f6167da16453e0c diff --git a/sample.env b/sample.env index d647a45cd..6024c80ad 100644 --- a/sample.env +++ b/sample.env @@ -5,15 +5,4 @@ KEYCLOAK_ADMIN_PASSWORD=admin # Inference ## LLM LLM_MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -LLM_MODEL_FILE=llama-2-7b-chat.ggmlv3.q4_1.bin - -## SD -SD_MODEL_URL=https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors -SD_MODEL_FILE=v1-5-pruned-emaonly.safetensors - -# Minio -S3_ACCESS_KEY_ID=minio -S3_SECRET_ACCESS_KEY=minio123 -S3_BUCKET_NAME=jan -S3_ENDPOINT_URL=http://minio:9000 -S3_PUBLIC_ENDPOINT_URL=http://127.0.0.1:9000 \ No newline at end of file +LLM_MODEL_FILE=llama-2-7b-chat.ggmlv3.q4_1.bin \ No newline at end of file