fix: Remove sd ggml + minio + traefffik (#63)

* fix(inf-sd): Delete gitsubmodule for SD ggml * chore: Delete custom code for sd.cpp * chore: Update gitignore for sd models * chore: Update readme for sd ggml service removal * chore: Refactor compose file to remove minio, sd, sd-downloader and traeffik * fix(.env): Removing SD and minio related configuration
2023-09-06 15:48:54 +07:00 · 2023-09-06 15:48:54 +07:00 · 34939feb90
commit 34939feb90
parent 83e21250a4
12 changed files with 2 additions and 307 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,10 +3,3 @@

 # Jan inference
 jan-inference/llm/models/**
-
-jan-inference/sd/models/**
-jan-inference/sd/output/**
-jan-inference/sd/sd
-
-# Minio
-minio/**
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +0,0 @@
-[submodule "jan-inference/sd/sd_cpp"]
-	path = jan-inference/sd/sd_cpp
-	url = https://github.com/leejet/stable-diffusion.cpp
--- a/README.md
+++ b/README.md
@ -72,13 +72,6 @@ export DOCKER_DEFAULT_PLATFORM=linux/$(uname -m)
  | web-client             | `conf/sample.env_web-client`                                                                                                    |

 ### Install Models
-
- Download Runway SD 1.5 from HuggingFace
-```sh
-wget https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors -P jan-inference/sd/models
-```
-
-  Download Llama 7Bn ggml from HuggingFace
 ```sh
 wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -P jan-inference/llm/models
 ```
@ -106,8 +99,7 @@ docker compose up -d
 | Web App           | http://localhost:3000 | Users are signed up to keycloak, default created user is set via `conf/keycloak_conf/example-realm.json` on keycloak with username: `username`, password: `password` |
 | Keycloak Admin             | http://localhost:8088 | Admin credentials are set via the environment variables `KEYCLOAK_ADMIN` and `KEYCLOAK_ADMIN_PASSWORD`                                                               |
 | Hasura App Backend | http://localhost:8080 | Admin credentials are set via the environment variables `HASURA_GRAPHQL_ADMIN_SECRET` in file `conf/sample.env_app-backend`                                          |
-| LLM Service          | http://localhost:8000 |                                                                                                                                                                      |
-| Stable Diffusion Service          | http://localhost:8001 |                                                                                                                                                                      |
+| LLM Service          | http://localhost:8000 |                                                                                                                                                                                                                                                                                          |

 ## Usage

--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -63,13 +63,6 @@ services:
      interval: 5s
      timeout: 10s
      retries: 30
-    labels:
-      - "traefik.enable=true"
-      - "traefik.http.routers.graphql-engine.entrypoints=jan"
-      - "traefik.http.routers.graphql-engine.rule=(PathPrefix(`/graphql-engine`))"
-      - "traefik.http.middlewares.graphql-engine.stripprefix.prefixes=/graphql-engine"
-      - "traefik.http.middlewares.graphql-engine.stripprefix.forceslash=false"
-      - "traefik.http.routers.graphql-engine.middlewares=graphql-engine"
    networks:
      jan_community:
        ipv4_address: 172.20.0.12
@ -143,12 +136,6 @@ services:
      NODE_ENV: development
    extra_hosts:
      - "localhost:172.20.0.9"
-    labels:
-      - "traefik.enable=true"
-      - "traefik.http.routers.web.entrypoints=jan"
-      - "traefik.http.routers.web.rule=(Host(`localhost`) && PathPrefix(`/web`))"
-      - "traefik.http.routers.web.middlewares=rewritepath"
-      - "traefik.http.middlewares.rewritepath.addprefix.prefix=/web"
    depends_on:
      graphql-engine:
        condition: service_healthy
@ -166,122 +153,10 @@ services:
      MODEL: /models/${LLM_MODEL_FILE}
      PYTHONUNBUFFERED: 1
    restart: on-failure
-    labels:
-      - "traefik.enable=true"
-      - "traefik.http.routers.llm.entrypoints=jan"
-      - "traefik.http.routers.llm.rule=(PathPrefix(`/llm`))"
-      - "traefik.http.middlewares.llm.stripprefix.prefixes=/llm"
-      - "traefik.http.middlewares.llm.stripprefix.forceslash=false"
-      - "traefik.http.routers.llm.middlewares=llm"
    networks:
      jan_community:
        ipv4_address: 172.20.0.18

-  sd-downloader:
-    build:
-      context: ./jan-inference/sd/
-      dockerfile: compile.Dockerfile
-    command: /bin/sh -c "if [ ! -f /models/*.bin ]; then python /sd.cpp/sd_cpp/models/convert.py  --out_type q4_0 --out_file /models/${SD_MODEL_FILE}.q4_0.bin  /models/${SD_MODEL_FILE}; fi"
-    volumes:
-      - ./jan-inference/sd/models:/models
-    networks:
-      jan_community:
-        ipv4_address: 172.20.0.19
-
-  sd:
-    build:
-      context: ./jan-inference/sd/
-      dockerfile: inference.Dockerfile
-    volumes:
-      - ./jan-inference/sd/models:/models/
-    command: /bin/bash -c "python -m uvicorn main:app --proxy-headers --host 0.0.0.0 --port 8000"
-    environment:
-      S3_ENDPOINT_URL: ${S3_ENDPOINT_URL}
-      S3_PUBLIC_ENDPOINT_URL: ${S3_PUBLIC_ENDPOINT_URL}
-      S3_ACCESS_KEY_ID: ${S3_ACCESS_KEY_ID}
-      S3_SECRET_ACCESS_KEY: ${S3_SECRET_ACCESS_KEY}
-      S3_BUCKET_NAME: ${S3_BUCKET_NAME}
-      MODEL_NAME: ${SD_MODEL_FILE}.q4_0.bin
-      MODEL_DIR: /models
-      OUTPUT_DIR: /tmp
-      SD_PATH: /sd
-      PYTHONUNBUFFERED: 1
-    ports:
-      - 8001:8000
-    restart: on-failure
-    depends_on:
-      sd-downloader:
-        condition: service_completed_successfully
-    labels:
-      - "traefik.enable=true"
-      - "traefik.http.routers.sd.entrypoints=jan"
-      - "traefik.http.routers.sd.rule=(PathPrefix(`/sd`))"
-      - "traefik.http.middlewares.sd.stripprefix.prefixes=/sd"
-      - "traefik.http.middlewares.sd.stripprefix.forceslash=false"
-      - "traefik.http.routers.sd.middlewares=sd"
-    networks:
-      jan_community:
-        ipv4_address: 172.20.0.21
-
-  minio:
-    image: minio/minio
-    ports:
-      - 9000:9000
-      - 9001
-    volumes:
-      - ./minio/data:/export
-      - ./minio/config:/root/.minio
-    environment:
-      MINIO_ROOT_USER: ${S3_ACCESS_KEY_ID}
-      MINIO_ROOT_PASSWORD: ${S3_SECRET_ACCESS_KEY}
-    command: server /export --console-address ":9001"
-    labels:
-      - "traefik.enable=true"
-      - "traefik.http.routers.minio.entrypoints=jan"
-      - "traefik.http.routers.minio.rule=(PathPrefix(`/minio`))"
-      - "traefik.http.middlewares.minio.stripprefix.prefixes=/minio"
-      - "traefik.http.middlewares.minio.stripprefix.forceslash=false"
-      - "traefik.http.routers.minio.middlewares=minio"
-    networks:
-      jan_community:
-        ipv4_address: 172.20.0.23
-
-  createbuckets:
-    image: minio/mc
-    depends_on:
-      - minio
-    environment:
-      S3_ACCESS_KEY_ID: ${S3_ACCESS_KEY_ID}
-      S3_SECRET_ACCESS_KEY: ${S3_SECRET_ACCESS_KEY}
-      S3_BUCKET_NAME: ${S3_BUCKET_NAME}
-    entrypoint: >
-      /bin/sh -c "
-      /usr/bin/mc config host add myminio http://minio:9000 ${S3_ACCESS_KEY_ID} ${S3_SECRET_ACCESS_KEY};
-      /usr/bin/mc rm -r --force myminio/${S3_BUCKET_NAME};
-      /usr/bin/mc mb myminio/${S3_BUCKET_NAME};
-      /usr/bin/mc anonymous set public myminio/${S3_BUCKET_NAME};
-      exit 0;
-      "
-    networks:
-      jan_community:
-
-  traefik:
-    image: traefik:v2.10
-    command:
-      - "--api.insecure=true"
-      - "--providers.docker=true"
-      - "--providers.docker.exposedbydefault=false"
-      - "--log.level=debug"
-      - "--entrypoints.jan.address=:1337"
-    ports:
-      - "1337:1337"
-      - "9090:8080"
-    volumes:
-      - /var/run/docker.sock:/var/run/docker.sock:ro
-    networks:
-      jan_community:
-        ipv4_address: 172.20.0.22
-
 networks:
  jan_community:
    driver: bridge
--- a/jan-inference/sd/.dockerignore
+++ b/jan-inference/sd/.dockerignore
@ -1,7 +0,0 @@
-output/
-models/
-
-sd_cpp/.git
-sd_cpp/.github
-
-sd
--- a/jan-inference/sd/compile.Dockerfile
+++ b/jan-inference/sd/compile.Dockerfile
@ -1,10 +0,0 @@
-FROM python:3.9.17
-
-RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
-ENV PATH=/root/.cargo/bin:$PATH
-
-WORKDIR /sd.cpp
-
-COPY . .
-
-RUN pip install -r compile.requirements.txt
--- a/jan-inference/sd/compile.requirements.txt
+++ b/jan-inference/sd/compile.requirements.txt
@ -1,17 +0,0 @@
-# Compiling
-r sd_cpp/models/requirements.txt
-
-# diffusers
-# torch
-# ftfy
-# scipy
-# transformers
-# accelerate
-# huggingface-hub
-# xformers
-# omegaconf
-# safetensors
-# cog
-# tomesd
-# compel
-
--- a/jan-inference/sd/inference.Dockerfile
+++ b/jan-inference/sd/inference.Dockerfile
@ -1,21 +0,0 @@
-ARG UBUNTU_VERSION=22.04
-
-FROM ubuntu:$UBUNTU_VERSION as build
-
-RUN apt-get update && apt-get install -y build-essential git cmake
-
-WORKDIR /sd.cpp
-
-COPY sd_cpp /sd.cpp
-
-RUN mkdir build && cd build && cmake .. && cmake --build . --config Release
-
-FROM python:3.9.17 as runtime
-
-COPY --from=build /sd.cpp/build/bin/sd /sd
-
-WORKDIR /serving
-
-COPY . /serving/
-
-RUN pip install -r inference.requirements.txt
--- a/jan-inference/sd/inference.requirements.txt
+++ b/jan-inference/sd/inference.requirements.txt
@ -1,5 +0,0 @@
-# Inference
-fastapi
-uvicorn
-python-multipart
-boto3
--- a/jan-inference/sd/main.py
+++ b/jan-inference/sd/main.py
@ -1,90 +0,0 @@
-from fastapi import FastAPI, BackgroundTasks, HTTPException, Form
-from fastapi.responses import FileResponse
-from fastapi.staticfiles import StaticFiles
-import subprocess
-import os
-from uuid import uuid4
-from pydantic import BaseModel
-import boto3
-from botocore.client import Config
-
-app = FastAPI()
-
-OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "output")
-SD_PATH = os.environ.get("SD_PATH", "./sd")
-MODEL_DIR = os.environ.get("MODEL_DIR", "./models")
-MODEL_NAME = os.environ.get(
-    "MODEL_NAME", "v1-5-pruned-emaonly.safetensors.q4_0.bin")
-
-S3_ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL", "http://localhost:9000")
-S3_PUBLIC_ENDPOINT_URL = os.environ.get(
-    "S3_PUBLIC_ENDPOINT_URL", "http://localhost:9000")
-S3_ACCESS_KEY_ID = os.environ.get("S3_ACCESS_KEY_ID", "minio")
-S3_SECRET_ACCESS_KEY = os.environ.get("S3_SECRET_ACCESS_KEY", "minio123")
-S3_BUCKET_NAME = os.environ.get("S3_BUCKET_NAME", "jan")
-
-s3 = boto3.resource('s3',
-                    endpoint_url=S3_ENDPOINT_URL,
-                    aws_access_key_id=S3_ACCESS_KEY_ID,
-                    aws_secret_access_key=S3_SECRET_ACCESS_KEY,
-                    config=Config(signature_version='s3v4'),
-                    region_name='us-east-1')
-
-s3_bucket = s3.Bucket(S3_BUCKET_NAME)
-
-
-class Payload(BaseModel):
-    prompt: str
-    neg_prompt: str
-    seed: int
-    steps: int
-    width: int
-    height: int
-
-
-# Create the OUTPUT_DIR directory if it does not exist
-if not os.path.exists(OUTPUT_DIR):
-    os.makedirs(OUTPUT_DIR)
-
-# Create the OUTPUT_DIR directory if it does not exist
-if not os.path.exists(MODEL_DIR):
-    os.makedirs(MODEL_DIR)
-
-
-def run_command(payload: Payload, filename: str):
-    # Construct the command based on your provided example
-    command = [SD_PATH,
-               "--model", f'{os.path.join(MODEL_DIR, MODEL_NAME)}',
-               "--prompt", f'"{payload.prompt}"',
-               "--negative-prompt", f'"{payload.neg_prompt}"',
-               "--height", str(payload.height),
-               "--width", str(payload.width),
-               "--steps", str(payload.steps),
-               "--seed", str(payload.seed),
-               "--mode", 'txt2img',
-               "-o", f'{os.path.join(OUTPUT_DIR, filename)}',
-               ]
-
-    try:
-        subprocess.run(command)
-    except subprocess.CalledProcessError:
-        raise HTTPException(
-            status_code=500, detail="Failed to execute the command.")
-
-
-@app.post("/inferences/txt2img")
-async def run_inference(background_tasks: BackgroundTasks, payload: Payload):
-    # Generate a unique filename using uuid4()
-    filename = f"{uuid4()}.png"
-
-    # We will use background task to run the command so it won't block
-    # background_tasks.add_task(run_command, payload, filename)
-    run_command(payload, filename)
-    s3_bucket.upload_file(f'{os.path.join(OUTPUT_DIR, filename)}', filename)
-    # Return the expected path of the output file
-    return {"url": f'{S3_PUBLIC_ENDPOINT_URL}/{S3_BUCKET_NAME}/{filename}'}
-
-
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8002)
--- a/jan-inference/sd/sd_cpp
+++ b/jan-inference/sd/sd_cpp
@ -1 +0,0 @@
-Subproject commit c8f85a4e3063e2cdb27db57b8f6167da16453e0c
--- a/sample.env
+++ b/sample.env
@ -5,15 +5,4 @@ KEYCLOAK_ADMIN_PASSWORD=admin
 # Inference
 ## LLM
 LLM_MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin
-LLM_MODEL_FILE=llama-2-7b-chat.ggmlv3.q4_1.bin
-
-## SD
-SD_MODEL_URL=https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors
-SD_MODEL_FILE=v1-5-pruned-emaonly.safetensors
-
-# Minio
-S3_ACCESS_KEY_ID=minio
-S3_SECRET_ACCESS_KEY=minio123
-S3_BUCKET_NAME=jan
-S3_ENDPOINT_URL=http://minio:9000
-S3_PUBLIC_ENDPOINT_URL=http://127.0.0.1:9000
+LLM_MODEL_FILE=llama-2-7b-chat.ggmlv3.q4_1.bin
				`@ -1 +0,0 @@`
				`Subproject commit c8f85a4e3063e2cdb27db57b8f6167da16453e0c`