From 6ef61c45ec684e22b5ce2a9dab7bc79a021f5a50 Mon Sep 17 00:00:00 2001 From: vuonghoainam Date: Thu, 24 Aug 2023 09:14:18 +0700 Subject: [PATCH 1/7] feat(inference): Init commit for inference llm using python llama ggml --- .gitignore | 3 + jan-inference/llm/.env.example | 2 + jan-inference/llm/README.md | 8 +++ jan-inference/llm/docker-compose.yml | 94 ++++++++++++++++++++++++++++ jan-inference/llm/models/.gitkeep | 0 5 files changed, 107 insertions(+) create mode 100644 .gitignore create mode 100644 jan-inference/llm/.env.example create mode 100644 jan-inference/llm/README.md create mode 100644 jan-inference/llm/docker-compose.yml create mode 100644 jan-inference/llm/models/.gitkeep diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..626303c91 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +# Jan inference +# jan-inference/llm/models/** +jan-inference/llm/.env \ No newline at end of file diff --git a/jan-inference/llm/.env.example b/jan-inference/llm/.env.example new file mode 100644 index 000000000..40a331b36 --- /dev/null +++ b/jan-inference/llm/.env.example @@ -0,0 +1,2 @@ +MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin +MODEL_FILE=$(basename $MODEL_URL) \ No newline at end of file diff --git a/jan-inference/llm/README.md b/jan-inference/llm/README.md new file mode 100644 index 000000000..3183a8fb6 --- /dev/null +++ b/jan-inference/llm/README.md @@ -0,0 +1,8 @@ +# Inference - LLM + +```bash +docker network create traefik_public +cp .env.example .env +# -> Update MODEL_URL in `.env` file +docker compose up -d --scale llm=2 +`````` diff --git a/jan-inference/llm/docker-compose.yml b/jan-inference/llm/docker-compose.yml new file mode 100644 index 000000000..50441dee3 --- /dev/null +++ b/jan-inference/llm/docker-compose.yml @@ -0,0 +1,94 @@ +version: '3' + +services: + + # Service to download a model file. + downloader: + image: busybox + # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist. + command: /bin/sh -c "MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$MODEL_FILE ]; then wget -O /models/$MODEL_FILE ${MODEL_URL}; fi" + # Mount a local directory to store the downloaded model. + volumes: + - ./models:/models + + # Service to wait for the downloader service to finish downloading the model. + wait-for-downloader: + image: busybox + # The command waits until the model file (specified in MODEL_URL) exists. + command: /bin/sh -c "MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'" + # Specifies that this service should start after the downloader service has started. + depends_on: + downloader: + condition: service_started + # Mount the same local directory to check for the downloaded model. + volumes: + - ./models:/models + + # Service to run the Llama web application. + llm: + image: ghcr.io/abetlen/llama-cpp-python:latest + # Mount the directory that contains the downloaded model. + volumes: + - ./models:/models + environment: + # Specify the path to the model for the web application. + MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin + labels: + # Instead of using the Host rule, set a PathPrefix rule + - "traefik.http.routers.web.rule=PathPrefix(`/inference/llm`)" + # This tells Traefik where to forward the traffic for this route. + - "traefik.http.routers.web.service=llm" + # Define a service for the llm and specify its load balancer configuration + - "traefik.http.services.llm-service.loadbalancer.server.port=8000" + + - "traefik.http.middlewares.strip-llm-prefix.stripprefix.prefixes=/inference/llm" + - "traefik.http.routers.web.middlewares=strip-llm-prefix" + # Health check configuration + healthcheck: + test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + # Restart policy configuration + restart: on-failure + # Specifies that this service should start only after wait-for-downloader has completed successfully. + depends_on: + wait-for-downloader: + condition: service_completed_successfully + # Connect this service to two networks: inference_net and traefik_public. + networks: + - inference_net + - traefik_public + + # Service for Traefik, a modern HTTP reverse proxy and load balancer. + traefik: + image: traefik:v2.5 + command: + # Enable the Traefik API dashboard without TLS (not recommended for production). + - "--api.insecure=true" + # Enable Traefik to use Docker as a provider. + - "--providers.docker=true" + # Do not expose services by default. Explicitly specify in each service if it should be exposed. + - "--providers.docker.exposedbydefault=false" + # Specify the default entry point on port 80. + - "--entrypoints.web.address=:80" + ports: + # Map port 80 in the container to port 80 on the host. + - "80:80" + # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host. + - "8080:8080" + # Mount the Docker socket to allow Traefik to listen to Docker's API. + volumes: + - /var/run/docker.sock:/var/run/docker.sock + # Connect this service to the traefik_public network. + networks: + - traefik_public + +# Define networks used in this docker-compose file. +networks: + # Network for the llm service (used for inference). + inference_net: + # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. + traefik_public: + external: true diff --git a/jan-inference/llm/models/.gitkeep b/jan-inference/llm/models/.gitkeep new file mode 100644 index 000000000..e69de29bb From 216ded2a76d92e3b2353ce2aa830815dde172e61 Mon Sep 17 00:00:00 2001 From: vuonghoainam Date: Thu, 24 Aug 2023 09:14:42 +0700 Subject: [PATCH 2/7] chore: Add gitignore for models in inference/llm --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 626303c91..bdf314e0f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ # Jan inference -# jan-inference/llm/models/** +jan-inference/llm/models/** jan-inference/llm/.env \ No newline at end of file From 6446f06f279721fa607813a530fd8aa995752f90 Mon Sep 17 00:00:00 2001 From: Hien To Date: Thu, 24 Aug 2023 14:10:01 +0700 Subject: [PATCH 3/7] Traefik use file provider --- jan-inference/llm/docker-compose.yml | 25 ++++--------------------- jan-inference/llm/traefik/config.yml | 23 +++++++++++++++++++++++ jan-inference/llm/traefik/traefik.yml | 17 +++++++++++++++++ 3 files changed, 44 insertions(+), 21 deletions(-) create mode 100644 jan-inference/llm/traefik/config.yml create mode 100644 jan-inference/llm/traefik/traefik.yml diff --git a/jan-inference/llm/docker-compose.yml b/jan-inference/llm/docker-compose.yml index 50441dee3..f2fc9a73d 100644 --- a/jan-inference/llm/docker-compose.yml +++ b/jan-inference/llm/docker-compose.yml @@ -33,16 +33,6 @@ services: environment: # Specify the path to the model for the web application. MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin - labels: - # Instead of using the Host rule, set a PathPrefix rule - - "traefik.http.routers.web.rule=PathPrefix(`/inference/llm`)" - # This tells Traefik where to forward the traffic for this route. - - "traefik.http.routers.web.service=llm" - # Define a service for the llm and specify its load balancer configuration - - "traefik.http.services.llm-service.loadbalancer.server.port=8000" - - - "traefik.http.middlewares.strip-llm-prefix.stripprefix.prefixes=/inference/llm" - - "traefik.http.routers.web.middlewares=strip-llm-prefix" # Health check configuration healthcheck: test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] @@ -63,16 +53,7 @@ services: # Service for Traefik, a modern HTTP reverse proxy and load balancer. traefik: - image: traefik:v2.5 - command: - # Enable the Traefik API dashboard without TLS (not recommended for production). - - "--api.insecure=true" - # Enable Traefik to use Docker as a provider. - - "--providers.docker=true" - # Do not expose services by default. Explicitly specify in each service if it should be exposed. - - "--providers.docker.exposedbydefault=false" - # Specify the default entry point on port 80. - - "--entrypoints.web.address=:80" + image: traefik:v2.10 ports: # Map port 80 in the container to port 80 on the host. - "80:80" @@ -80,7 +61,9 @@ services: - "8080:8080" # Mount the Docker socket to allow Traefik to listen to Docker's API. volumes: - - /var/run/docker.sock:/var/run/docker.sock + - /var/run/docker.sock:/var/run/docker.sock:ro + - ./traefik/traefik.yml:/traefik.yml:ro + - ./traefik/config.yml:/config.yml:ro # Connect this service to the traefik_public network. networks: - traefik_public diff --git a/jan-inference/llm/traefik/config.yml b/jan-inference/llm/traefik/config.yml new file mode 100644 index 000000000..744427d57 --- /dev/null +++ b/jan-inference/llm/traefik/config.yml @@ -0,0 +1,23 @@ +http: + #region routers + routers: + llm: + entryPoints: + - "web" + rule: "PathPrefix(`/inference/llm`)" + middlewares: + - llmm-strip-prefix + service: llm +#endregion +#region services + services: + llm: + loadBalancer: + servers: + - url: "http://llm:8000" +#endregion + middlewares: + llmm-strip-prefix: + stripPrefix: + prefixes: + - "/inference/llm" diff --git a/jan-inference/llm/traefik/traefik.yml b/jan-inference/llm/traefik/traefik.yml new file mode 100644 index 000000000..346425cd5 --- /dev/null +++ b/jan-inference/llm/traefik/traefik.yml @@ -0,0 +1,17 @@ +api: + dashboard: true + debug: true + insecure: true +entryPoints: + web: + address: ":80" +serversTransport: + insecureSkipVerify: true +providers: + docker: + endpoint: "unix:///var/run/docker.sock" + exposedByDefault: false + file: + filename: /config.yml +log: + level: DEBUG \ No newline at end of file From 1146c29a31f6428866d924e68a9c60e640577e18 Mon Sep 17 00:00:00 2001 From: vuonghoainam Date: Fri, 25 Aug 2023 01:31:07 +0700 Subject: [PATCH 4/7] feat(inference): Add SD CPP module --- .env.sample | 11 ++- .gitignore | 11 ++- .gitmodules | 3 + jan-inference/llm/README.md | 8 ++ jan-inference/llm/docker-compose.yml | 94 ++++++++++++++++++++ jan-inference/llm/models/.gitkeep | 0 jan-inference/sd/.dockerignore | 7 ++ jan-inference/sd/compile.Dockerfile | 10 +++ jan-inference/sd/compile.requirements.txt | 17 ++++ jan-inference/sd/docker-compose.yml | 98 +++++++++++++++++++++ jan-inference/sd/inference.Dockerfile | 19 ++++ jan-inference/sd/inference.requirements.txt | 4 + jan-inference/sd/main.py | 70 +++++++++++++++ jan-inference/sd/sd_cpp | 1 + 14 files changed, 351 insertions(+), 2 deletions(-) create mode 100644 .gitmodules create mode 100644 jan-inference/llm/README.md create mode 100644 jan-inference/llm/docker-compose.yml create mode 100644 jan-inference/llm/models/.gitkeep create mode 100644 jan-inference/sd/.dockerignore create mode 100644 jan-inference/sd/compile.Dockerfile create mode 100644 jan-inference/sd/compile.requirements.txt create mode 100644 jan-inference/sd/docker-compose.yml create mode 100644 jan-inference/sd/inference.Dockerfile create mode 100644 jan-inference/sd/inference.requirements.txt create mode 100644 jan-inference/sd/main.py create mode 160000 jan-inference/sd/sd_cpp diff --git a/.env.sample b/.env.sample index b4ff84e20..d29332dd2 100644 --- a/.env.sample +++ b/.env.sample @@ -5,4 +5,13 @@ POSTGRES_USERNAME=your_db_username POSTGRES_PORT=your_db_port KC_DB_SCHEMA=public KEYCLOAK_ADMIN=your_keycloak_admin_username -KEYCLOAK_ADMIN_PASSWORD=your_keycloak_admin_password \ No newline at end of file +KEYCLOAK_ADMIN_PASSWORD=your_keycloak_admin_password + +# Inference +## LLM +MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin +LLM_MODEL_FILE=$(basename $MODEL_URL) + +## SD +MODEL_URL=https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors +SD_MODEL_FILE=$(basename $MODEL_URL) \ No newline at end of file diff --git a/.gitignore b/.gitignore index a5c89361d..ee916094a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,11 @@ .vscode -.env \ No newline at end of file +.env + +# Jan inference +jan-inference/llm/models/** +jan-inference/llm/.env + +jan-inference/sd/models/** +jan-inference/sd/output/** +jan-inference/sd/.env +jan-inference/sd/sd diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..fb1c6e128 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "jan-inference/sd/sd_cpp"] + path = jan-inference/sd/sd_cpp + url = https://github.com/leejet/stable-diffusion.cpp diff --git a/jan-inference/llm/README.md b/jan-inference/llm/README.md new file mode 100644 index 000000000..3183a8fb6 --- /dev/null +++ b/jan-inference/llm/README.md @@ -0,0 +1,8 @@ +# Inference - LLM + +```bash +docker network create traefik_public +cp .env.example .env +# -> Update MODEL_URL in `.env` file +docker compose up -d --scale llm=2 +`````` diff --git a/jan-inference/llm/docker-compose.yml b/jan-inference/llm/docker-compose.yml new file mode 100644 index 000000000..129b01e74 --- /dev/null +++ b/jan-inference/llm/docker-compose.yml @@ -0,0 +1,94 @@ +version: '3' + +services: + + # Service to download a model file. + downloader: + image: busybox + # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist. + command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$LLM_MODEL_FILE ]; then wget -O /models/$LLM_MODEL_FILE ${MODEL_URL}; fi" + # Mount a local directory to store the downloaded model. + volumes: + - ./models:/models + + # Service to wait for the downloader service to finish downloading the model. + wait-for-downloader: + image: busybox + # The command waits until the model file (specified in MODEL_URL) exists. + command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$LLM_MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'" + # Specifies that this service should start after the downloader service has started. + depends_on: + downloader: + condition: service_started + # Mount the same local directory to check for the downloaded model. + volumes: + - ./models:/models + + # Service to run the Llama web application. + llm: + image: ghcr.io/abetlen/llama-cpp-python:latest + # Mount the directory that contains the downloaded model. + volumes: + - ./models:/models + environment: + # Specify the path to the model for the web application. + MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin + labels: + # Instead of using the Host rule, set a PathPrefix rule + - "traefik.http.routers.web.rule=PathPrefix(`/inference/llm`)" + # This tells Traefik where to forward the traffic for this route. + - "traefik.http.routers.web.service=llm" + # Define a service for the llm and specify its load balancer configuration + - "traefik.http.services.llm-service.loadbalancer.server.port=8000" + + - "traefik.http.middlewares.strip-llm-prefix.stripprefix.prefixes=/inference/llm" + - "traefik.http.routers.web.middlewares=strip-llm-prefix" + # Health check configuration + healthcheck: + test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + # Restart policy configuration + restart: on-failure + # Specifies that this service should start only after wait-for-downloader has completed successfully. + depends_on: + wait-for-downloader: + condition: service_completed_successfully + # Connect this service to two networks: inference_net and traefik_public. + networks: + - inference_net + - traefik_public + + # Service for Traefik, a modern HTTP reverse proxy and load balancer. + traefik: + image: traefik:v2.5 + command: + # Enable the Traefik API dashboard without TLS (not recommended for production). + - "--api.insecure=true" + # Enable Traefik to use Docker as a provider. + - "--providers.docker=true" + # Do not expose services by default. Explicitly specify in each service if it should be exposed. + - "--providers.docker.exposedbydefault=false" + # Specify the default entry point on port 80. + - "--entrypoints.web.address=:80" + ports: + # Map port 80 in the container to port 80 on the host. + - "80:80" + # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host. + - "8080:8080" + # Mount the Docker socket to allow Traefik to listen to Docker's API. + volumes: + - /var/run/docker.sock:/var/run/docker.sock + # Connect this service to the traefik_public network. + networks: + - traefik_public + +# Define networks used in this docker-compose file. +networks: + # Network for the llm service (used for inference). + inference_net: + # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. + traefik_public: + external: true diff --git a/jan-inference/llm/models/.gitkeep b/jan-inference/llm/models/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/jan-inference/sd/.dockerignore b/jan-inference/sd/.dockerignore new file mode 100644 index 000000000..d5b227c62 --- /dev/null +++ b/jan-inference/sd/.dockerignore @@ -0,0 +1,7 @@ +output/ +models/ + +sd_cpp/.git +sd_cpp/.github + +sd \ No newline at end of file diff --git a/jan-inference/sd/compile.Dockerfile b/jan-inference/sd/compile.Dockerfile new file mode 100644 index 000000000..383e5202e --- /dev/null +++ b/jan-inference/sd/compile.Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.9.17 + +RUN curl https://sh.rustup.rs -sSf | bash -s -- -y +ENV PATH=/root/.cargo/bin:$PATH + +WORKDIR /sd.cpp + +COPY . . + +RUN pip install -r compile.requirements.txt diff --git a/jan-inference/sd/compile.requirements.txt b/jan-inference/sd/compile.requirements.txt new file mode 100644 index 000000000..48f4f8730 --- /dev/null +++ b/jan-inference/sd/compile.requirements.txt @@ -0,0 +1,17 @@ +# Compiling +-r sd_cpp/models/requirements.txt + +# diffusers +# torch +# ftfy +# scipy +# transformers +# accelerate +# huggingface-hub +# xformers +# omegaconf +# safetensors +# cog +# tomesd +# compel + diff --git a/jan-inference/sd/docker-compose.yml b/jan-inference/sd/docker-compose.yml new file mode 100644 index 000000000..7c74e8b24 --- /dev/null +++ b/jan-inference/sd/docker-compose.yml @@ -0,0 +1,98 @@ +version: '3' + +services: + + # Service to download a model file. + downloader: + build: + context: ./ + dockerfile: compile.Dockerfile + # platform: "linux/amd64" + # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist. + command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /converted_models/*.bin ]; then wget -O /converted_models/$SD_MODEL_FILE ${MODEL_URL}; python /sd.cpp/models/convert.py --out_type q4_0 --out_file /converted_models/$SD_MODEL_FILE; fi" + # Mount a local directory to store the downloaded model. + volumes: + - ./models:/converted_models + + # Service to wait for the downloader service to finish downloading the model. + wait-for-downloader: + image: busybox + # The command waits until the model file (specified in MODEL_URL) exists. + command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/*.bin ]; do sleep 1; done; echo 'Model downloaded and converted!'" + # Specifies that this service should start after the downloader service has started. + depends_on: + downloader: + condition: service_started + # Mount the same local directory to check for the downloaded model. + volumes: + - ./models:/models + + # Service to run the SD web application. + sd: + build: + context: ./ + dockerfile: inference.Dockerfile + # Mount the directory that contains the downloaded model. + volumes: + - ./models:/models + - ./output/:/serving/output + command: /bin/bash -c "python -m uvicorn main:app --proxy-headers --host 0.0.0.0 --port 8000" + # platform: "linux/amd64" + environment: + # Specify the path to the model for the web application. + BASE_URL: http://0.0.0.0:8000 + MODEL_NAME: "v1-5-pruned-emaonly-ggml-model-q5_0.bin" + MODEL_DIR: "/models" + SD_PATH: "/sd" + PYTHONUNBUFFERED: 1 + ports: + - 8000:8000 + # Health check configuration + healthcheck: + test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + # Restart policy configuration + restart: on-failure + # Specifies that this service should start only after wait-for-downloader has completed successfully. + depends_on: + wait-for-downloader: + condition: service_completed_successfully + # Connect this service to two networks: inference_net and traefik_public. + networks: + - inference_net + - traefik_public + + # Service for Traefik, a modern HTTP reverse proxy and load balancer. + traefik: + image: traefik:v2.5 + command: + # Enable the Traefik API dashboard without TLS (not recommended for production). + - "--api.insecure=true" + # Enable Traefik to use Docker as a provider. + - "--providers.docker=true" + # Do not expose services by default. Explicitly specify in each service if it should be exposed. + - "--providers.docker.exposedbydefault=false" + # Specify the default entry point on port 80. + - "--entrypoints.web.address=:80" + ports: + # Map port 80 in the container to port 80 on the host. + - "80:80" + # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host. + - "8080:8080" + # Mount the Docker socket to allow Traefik to listen to Docker's API. + volumes: + - /var/run/docker.sock:/var/run/docker.sock + # Connect this service to the traefik_public network. + networks: + - traefik_public + +# Define networks used in this docker-compose file. +networks: + # Network for the llm service (used for inference). + inference_net: + # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. + traefik_public: + external: true diff --git a/jan-inference/sd/inference.Dockerfile b/jan-inference/sd/inference.Dockerfile new file mode 100644 index 000000000..39d42dd43 --- /dev/null +++ b/jan-inference/sd/inference.Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.9.17 as build + +RUN apt-get update && apt-get install -y build-essential git cmake + +WORKDIR /sd.cpp + +COPY sd_cpp /sd.cpp + +RUN mkdir build && cd build && cmake .. && cmake --build . --config Release + +FROM python:3.9.17 as runtime + +COPY --from=build /sd.cpp/build/bin/sd /sd + +WORKDIR /serving + +COPY . /serving/ + +RUN pip install -r inference.requirements.txt \ No newline at end of file diff --git a/jan-inference/sd/inference.requirements.txt b/jan-inference/sd/inference.requirements.txt new file mode 100644 index 000000000..519c496ba --- /dev/null +++ b/jan-inference/sd/inference.requirements.txt @@ -0,0 +1,4 @@ +# Inference +fastapi +uvicorn +python-multipart \ No newline at end of file diff --git a/jan-inference/sd/main.py b/jan-inference/sd/main.py new file mode 100644 index 000000000..f31380dd3 --- /dev/null +++ b/jan-inference/sd/main.py @@ -0,0 +1,70 @@ +from fastapi import FastAPI, BackgroundTasks, HTTPException, Form +from fastapi.responses import FileResponse +from fastapi.staticfiles import StaticFiles +import subprocess +import os +from uuid import uuid4 + +app = FastAPI() + +OUTPUT_DIR = "output" +SD_PATH = os.environ.get("SD_PATH", "./sd") +MODEL_DIR = os.environ.get("MODEL_DIR", "./models") +BASE_URL = os.environ.get("BASE_URL", "http://localhost:8000") +MODEL_NAME = os.environ.get( + "MODEL_NAME", "v1-5-pruned-emaonly-ggml-model-q5_0.bin") + +# Create the OUTPUT_DIR directory if it does not exist +if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) + +# Create the OUTPUT_DIR directory if it does not exist +if not os.path.exists(MODEL_DIR): + os.makedirs(MODEL_DIR) + +# Serve files from the "files" directory +app.mount("/output", StaticFiles(directory=OUTPUT_DIR), name="output") + + +def run_command(prompt: str, filename: str): + # Construct the command based on your provided example + command = [SD_PATH, + "-m", os.path.join(MODEL_DIR, MODEL_NAME), + "-p", prompt, + "-o", os.path.join(OUTPUT_DIR, filename) + ] + + try: + sub_output = subprocess.run(command, timeout=5*60, capture_output=True, + check=True, encoding="utf-8") + print(sub_output.stdout) + except subprocess.CalledProcessError: + raise HTTPException( + status_code=500, detail="Failed to execute the command.") + + +@app.post("/inference/") +async def run_inference(background_tasks: BackgroundTasks, prompt: str = Form()): + # Generate a unique filename using uuid4() + filename = f"{uuid4()}.png" + + # We will use background task to run the command so it won't block + background_tasks.add_task(run_command, prompt, filename) + + # Return the expected path of the output file + return {"url": f'{BASE_URL}/serve/{filename}'} + + +@app.get("/serve/{filename}") +async def serve_file(filename: str): + file_path = os.path.join(OUTPUT_DIR, filename) + + if os.path.exists(file_path): + return FileResponse(file_path) + else: + raise HTTPException(status_code=404, detail="File not found") + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/jan-inference/sd/sd_cpp b/jan-inference/sd/sd_cpp new file mode 160000 index 000000000..0d7f04b13 --- /dev/null +++ b/jan-inference/sd/sd_cpp @@ -0,0 +1 @@ +Subproject commit 0d7f04b135cd48e8d62aecd09a52eb2afa482744 From b34fa91dc4954e4b737fbd98d74e761fe24d30d7 Mon Sep 17 00:00:00 2001 From: vuonghoainam Date: Fri, 25 Aug 2023 02:23:33 +0700 Subject: [PATCH 5/7] chore(inference): Remove inference llm .env --- jan-inference/llm/.env.example | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 jan-inference/llm/.env.example diff --git a/jan-inference/llm/.env.example b/jan-inference/llm/.env.example deleted file mode 100644 index 40a331b36..000000000 --- a/jan-inference/llm/.env.example +++ /dev/null @@ -1,2 +0,0 @@ -MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -MODEL_FILE=$(basename $MODEL_URL) \ No newline at end of file From d97f3c759b9bbf33493ec64aafa1002bec27f877 Mon Sep 17 00:00:00 2001 From: vuonghoainam Date: Fri, 25 Aug 2023 02:24:21 +0700 Subject: [PATCH 6/7] fix(inference/traeffik): Refactor traeffik to be outside of models and add sd --- jan-inference/docker-compose.yml | 25 ++++++++++++++ jan-inference/llm/traefik/config.yml | 23 ------------- jan-inference/traefik/config.yml | 38 +++++++++++++++++++++ jan-inference/{llm => }/traefik/traefik.yml | 0 4 files changed, 63 insertions(+), 23 deletions(-) create mode 100644 jan-inference/docker-compose.yml delete mode 100644 jan-inference/llm/traefik/config.yml create mode 100644 jan-inference/traefik/config.yml rename jan-inference/{llm => }/traefik/traefik.yml (100%) diff --git a/jan-inference/docker-compose.yml b/jan-inference/docker-compose.yml new file mode 100644 index 000000000..7672d2a76 --- /dev/null +++ b/jan-inference/docker-compose.yml @@ -0,0 +1,25 @@ +version: '3' + +services: + # Service for Traefik, a modern HTTP reverse proxy and load balancer. + traefik: + image: traefik:v2.10 + ports: + # Map port 80 in the container to port 80 on the host. + - "80:80" + # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host. + - "8080:8080" + # Mount the Docker socket to allow Traefik to listen to Docker's API. + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - ./traefik/traefik.yml:/traefik.yml:ro + - ./traefik/config.yml:/config.yml:ro + # Connect this service to the traefik_public network. + networks: + - traefik_public + +# Define networks used in this docker-compose file. +networks: + # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. + traefik_public: + external: true diff --git a/jan-inference/llm/traefik/config.yml b/jan-inference/llm/traefik/config.yml deleted file mode 100644 index 744427d57..000000000 --- a/jan-inference/llm/traefik/config.yml +++ /dev/null @@ -1,23 +0,0 @@ -http: - #region routers - routers: - llm: - entryPoints: - - "web" - rule: "PathPrefix(`/inference/llm`)" - middlewares: - - llmm-strip-prefix - service: llm -#endregion -#region services - services: - llm: - loadBalancer: - servers: - - url: "http://llm:8000" -#endregion - middlewares: - llmm-strip-prefix: - stripPrefix: - prefixes: - - "/inference/llm" diff --git a/jan-inference/traefik/config.yml b/jan-inference/traefik/config.yml new file mode 100644 index 000000000..c8ee8f569 --- /dev/null +++ b/jan-inference/traefik/config.yml @@ -0,0 +1,38 @@ +http: + #region routers + routers: + llm: + entryPoints: + - "web" + rule: "PathPrefix(`/inference/llm/`)" + middlewares: + - llmm-strip-prefix + service: llm + sd: + entryPoints: + - "web" + rule: "PathPrefix(`/inference/sd/`)" + middlewares: + - sd-strip-prefix + service: sd +#endregion +#region services + services: + llm: + loadBalancer: + servers: + - url: "http://llm:8000" + sd: + loadBalancer: + servers: + - url: "http://sd:8000" +#endregion + middlewares: + llm-strip-prefix: + stripPrefix: + prefixes: + - "/inference/llm/" + sd-strip-prefix: + stripPrefix: + prefixes: + - "/inference/sd/" \ No newline at end of file diff --git a/jan-inference/llm/traefik/traefik.yml b/jan-inference/traefik/traefik.yml similarity index 100% rename from jan-inference/llm/traefik/traefik.yml rename to jan-inference/traefik/traefik.yml From db12863d49c0c9df827e39f53e01e7769f11683d Mon Sep 17 00:00:00 2001 From: vuonghoainam Date: Fri, 25 Aug 2023 02:24:46 +0700 Subject: [PATCH 7/7] refactor(inference): Remove traeffik in model specific compose files --- jan-inference/llm/docker-compose.yml | 35 +++++++------------------ jan-inference/sd/docker-compose.yml | 39 +++++----------------------- 2 files changed, 15 insertions(+), 59 deletions(-) diff --git a/jan-inference/llm/docker-compose.yml b/jan-inference/llm/docker-compose.yml index e4b768277..0c15d3659 100644 --- a/jan-inference/llm/docker-compose.yml +++ b/jan-inference/llm/docker-compose.yml @@ -30,16 +30,19 @@ services: # Mount the directory that contains the downloaded model. volumes: - ./models:/models + ports: + - 8000:8000 environment: # Specify the path to the model for the web application. MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin + PYTHONUNBUFFERED: 1 # Health check configuration - healthcheck: - test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 30s + # healthcheck: + # test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] + # interval: 30s + # timeout: 10s + # retries: 3 + # start_period: 30s # Restart policy configuration restart: on-failure # Specifies that this service should start only after wait-for-downloader has completed successfully. @@ -47,31 +50,11 @@ services: wait-for-downloader: condition: service_completed_successfully # Connect this service to two networks: inference_net and traefik_public. - networks: - - inference_net - - traefik_public - - # Service for Traefik, a modern HTTP reverse proxy and load balancer. - traefik: - image: traefik:v2.10 - ports: - # Map port 80 in the container to port 80 on the host. - - "80:80" - # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host. - - "8080:8080" - # Mount the Docker socket to allow Traefik to listen to Docker's API. - volumes: - - /var/run/docker.sock:/var/run/docker.sock:ro - - ./traefik/traefik.yml:/traefik.yml:ro - - ./traefik/config.yml:/config.yml:ro - # Connect this service to the traefik_public network. networks: - traefik_public # Define networks used in this docker-compose file. networks: - # Network for the llm service (used for inference). - inference_net: # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. traefik_public: external: true diff --git a/jan-inference/sd/docker-compose.yml b/jan-inference/sd/docker-compose.yml index 7c74e8b24..fd1a73a8e 100644 --- a/jan-inference/sd/docker-compose.yml +++ b/jan-inference/sd/docker-compose.yml @@ -48,12 +48,12 @@ services: ports: - 8000:8000 # Health check configuration - healthcheck: - test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 30s + # healthcheck: + # test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] + # interval: 30s + # timeout: 10s + # retries: 3 + # start_period: 30s # Restart policy configuration restart: on-failure # Specifies that this service should start only after wait-for-downloader has completed successfully. @@ -61,38 +61,11 @@ services: wait-for-downloader: condition: service_completed_successfully # Connect this service to two networks: inference_net and traefik_public. - networks: - - inference_net - - traefik_public - - # Service for Traefik, a modern HTTP reverse proxy and load balancer. - traefik: - image: traefik:v2.5 - command: - # Enable the Traefik API dashboard without TLS (not recommended for production). - - "--api.insecure=true" - # Enable Traefik to use Docker as a provider. - - "--providers.docker=true" - # Do not expose services by default. Explicitly specify in each service if it should be exposed. - - "--providers.docker.exposedbydefault=false" - # Specify the default entry point on port 80. - - "--entrypoints.web.address=:80" - ports: - # Map port 80 in the container to port 80 on the host. - - "80:80" - # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host. - - "8080:8080" - # Mount the Docker socket to allow Traefik to listen to Docker's API. - volumes: - - /var/run/docker.sock:/var/run/docker.sock - # Connect this service to the traefik_public network. networks: - traefik_public # Define networks used in this docker-compose file. networks: - # Network for the llm service (used for inference). - inference_net: # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. traefik_public: external: true