From 6ef61c45ec684e22b5ce2a9dab7bc79a021f5a50 Mon Sep 17 00:00:00 2001
From: vuonghoainam <vuonghoainam.work@gmail.com>
Date: Thu, 24 Aug 2023 09:14:18 +0700
Subject: [PATCH 1/7] feat(inference): Init commit for inference llm using
 python llama ggml

---
 .gitignore                           |  3 +
 jan-inference/llm/.env.example       |  2 +
 jan-inference/llm/README.md          |  8 +++
 jan-inference/llm/docker-compose.yml | 94 ++++++++++++++++++++++++++++
 jan-inference/llm/models/.gitkeep    |  0
 5 files changed, 107 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 jan-inference/llm/.env.example
 create mode 100644 jan-inference/llm/README.md
 create mode 100644 jan-inference/llm/docker-compose.yml
 create mode 100644 jan-inference/llm/models/.gitkeep

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..626303c91
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+# Jan inference
+# jan-inference/llm/models/**
+jan-inference/llm/.env
\ No newline at end of file
diff --git a/jan-inference/llm/.env.example b/jan-inference/llm/.env.example
new file mode 100644
index 000000000..40a331b36
--- /dev/null
+++ b/jan-inference/llm/.env.example
@@ -0,0 +1,2 @@
+MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin
+MODEL_FILE=$(basename $MODEL_URL)
\ No newline at end of file
diff --git a/jan-inference/llm/README.md b/jan-inference/llm/README.md
new file mode 100644
index 000000000..3183a8fb6
--- /dev/null
+++ b/jan-inference/llm/README.md
@@ -0,0 +1,8 @@
+# Inference - LLM
+
+```bash
+docker network create traefik_public
+cp .env.example .env
+# -> Update MODEL_URL in `.env` file
+docker compose up -d --scale llm=2
+``````
diff --git a/jan-inference/llm/docker-compose.yml b/jan-inference/llm/docker-compose.yml
new file mode 100644
index 000000000..50441dee3
--- /dev/null
+++ b/jan-inference/llm/docker-compose.yml
@@ -0,0 +1,94 @@
+version: '3'
+
+services:
+
+  # Service to download a model file.
+  downloader:
+    image: busybox
+    # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist.
+    command: /bin/sh -c "MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$MODEL_FILE ]; then wget -O /models/$MODEL_FILE ${MODEL_URL}; fi"
+    # Mount a local directory to store the downloaded model.
+    volumes:
+      - ./models:/models
+
+  # Service to wait for the downloader service to finish downloading the model.
+  wait-for-downloader:
+    image: busybox
+    # The command waits until the model file (specified in MODEL_URL) exists.
+    command: /bin/sh -c "MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'"
+    # Specifies that this service should start after the downloader service has started.
+    depends_on:
+      downloader:
+        condition: service_started
+    # Mount the same local directory to check for the downloaded model.
+    volumes:
+      - ./models:/models
+
+  # Service to run the Llama web application.
+  llm:
+    image: ghcr.io/abetlen/llama-cpp-python:latest
+    # Mount the directory that contains the downloaded model.
+    volumes:
+      - ./models:/models
+    environment:
+      # Specify the path to the model for the web application.
+      MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin
+    labels:
+      # Instead of using the Host rule, set a PathPrefix rule
+      - "traefik.http.routers.web.rule=PathPrefix(`/inference/llm`)"
+      # This tells Traefik where to forward the traffic for this route.
+      - "traefik.http.routers.web.service=llm"
+    # Define a service for the llm and specify its load balancer configuration
+      - "traefik.http.services.llm-service.loadbalancer.server.port=8000"
+
+      - "traefik.http.middlewares.strip-llm-prefix.stripprefix.prefixes=/inference/llm"
+      - "traefik.http.routers.web.middlewares=strip-llm-prefix"
+    # Health check configuration
+    healthcheck:
+      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
+    # Restart policy configuration
+    restart: on-failure
+    # Specifies that this service should start only after wait-for-downloader has completed successfully.
+    depends_on:
+      wait-for-downloader:
+        condition: service_completed_successfully
+    # Connect this service to two networks: inference_net and traefik_public.
+    networks:
+      - inference_net
+      - traefik_public
+  
+  # Service for Traefik, a modern HTTP reverse proxy and load balancer.
+  traefik:
+    image: traefik:v2.5
+    command:
+      # Enable the Traefik API dashboard without TLS (not recommended for production).
+      - "--api.insecure=true"
+      # Enable Traefik to use Docker as a provider.
+      - "--providers.docker=true"
+      # Do not expose services by default. Explicitly specify in each service if it should be exposed.
+      - "--providers.docker.exposedbydefault=false"
+      # Specify the default entry point on port 80.
+      - "--entrypoints.web.address=:80"
+    ports:
+      # Map port 80 in the container to port 80 on the host.
+      - "80:80"
+      # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host.
+      - "8080:8080"
+    # Mount the Docker socket to allow Traefik to listen to Docker's API.
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock
+    # Connect this service to the traefik_public network.
+    networks:
+      - traefik_public
+
+# Define networks used in this docker-compose file.
+networks:
+  # Network for the llm service (used for inference).
+  inference_net:
+  # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
+  traefik_public:
+    external: true
diff --git a/jan-inference/llm/models/.gitkeep b/jan-inference/llm/models/.gitkeep
new file mode 100644
index 000000000..e69de29bb

From 216ded2a76d92e3b2353ce2aa830815dde172e61 Mon Sep 17 00:00:00 2001
From: vuonghoainam <vuonghoainam.work@gmail.com>
Date: Thu, 24 Aug 2023 09:14:42 +0700
Subject: [PATCH 2/7] chore: Add gitignore for models in inference/llm

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 626303c91..bdf314e0f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
 # Jan inference
-# jan-inference/llm/models/**
+jan-inference/llm/models/**
 jan-inference/llm/.env
\ No newline at end of file

From 6446f06f279721fa607813a530fd8aa995752f90 Mon Sep 17 00:00:00 2001
From: Hien To <tominhhien97@gmail.com>
Date: Thu, 24 Aug 2023 14:10:01 +0700
Subject: [PATCH 3/7] Traefik use file provider

---
 jan-inference/llm/docker-compose.yml  | 25 ++++---------------------
 jan-inference/llm/traefik/config.yml  | 23 +++++++++++++++++++++++
 jan-inference/llm/traefik/traefik.yml | 17 +++++++++++++++++
 3 files changed, 44 insertions(+), 21 deletions(-)
 create mode 100644 jan-inference/llm/traefik/config.yml
 create mode 100644 jan-inference/llm/traefik/traefik.yml

diff --git a/jan-inference/llm/docker-compose.yml b/jan-inference/llm/docker-compose.yml
index 50441dee3..f2fc9a73d 100644
--- a/jan-inference/llm/docker-compose.yml
+++ b/jan-inference/llm/docker-compose.yml
@@ -33,16 +33,6 @@ services:
     environment:
       # Specify the path to the model for the web application.
       MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin
-    labels:
-      # Instead of using the Host rule, set a PathPrefix rule
-      - "traefik.http.routers.web.rule=PathPrefix(`/inference/llm`)"
-      # This tells Traefik where to forward the traffic for this route.
-      - "traefik.http.routers.web.service=llm"
-    # Define a service for the llm and specify its load balancer configuration
-      - "traefik.http.services.llm-service.loadbalancer.server.port=8000"
-
-      - "traefik.http.middlewares.strip-llm-prefix.stripprefix.prefixes=/inference/llm"
-      - "traefik.http.routers.web.middlewares=strip-llm-prefix"
     # Health check configuration
     healthcheck:
       test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
@@ -63,16 +53,7 @@ services:
   
   # Service for Traefik, a modern HTTP reverse proxy and load balancer.
   traefik:
-    image: traefik:v2.5
-    command:
-      # Enable the Traefik API dashboard without TLS (not recommended for production).
-      - "--api.insecure=true"
-      # Enable Traefik to use Docker as a provider.
-      - "--providers.docker=true"
-      # Do not expose services by default. Explicitly specify in each service if it should be exposed.
-      - "--providers.docker.exposedbydefault=false"
-      # Specify the default entry point on port 80.
-      - "--entrypoints.web.address=:80"
+    image: traefik:v2.10
     ports:
       # Map port 80 in the container to port 80 on the host.
       - "80:80"
@@ -80,7 +61,9 @@ services:
       - "8080:8080"
     # Mount the Docker socket to allow Traefik to listen to Docker's API.
     volumes:
-      - /var/run/docker.sock:/var/run/docker.sock
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+      - ./traefik/traefik.yml:/traefik.yml:ro
+      - ./traefik/config.yml:/config.yml:ro
     # Connect this service to the traefik_public network.
     networks:
       - traefik_public
diff --git a/jan-inference/llm/traefik/config.yml b/jan-inference/llm/traefik/config.yml
new file mode 100644
index 000000000..744427d57
--- /dev/null
+++ b/jan-inference/llm/traefik/config.yml
@@ -0,0 +1,23 @@
+http:
+ #region routers 
+  routers:
+    llm:
+      entryPoints:
+        - "web"
+      rule: "PathPrefix(`/inference/llm`)"
+      middlewares:
+        - llmm-strip-prefix
+      service: llm
+#endregion
+#region services
+  services:
+    llm:
+      loadBalancer:
+        servers:
+          - url: "http://llm:8000"
+#endregion
+  middlewares:
+    llmm-strip-prefix:
+      stripPrefix:
+        prefixes: 
+          - "/inference/llm"
diff --git a/jan-inference/llm/traefik/traefik.yml b/jan-inference/llm/traefik/traefik.yml
new file mode 100644
index 000000000..346425cd5
--- /dev/null
+++ b/jan-inference/llm/traefik/traefik.yml
@@ -0,0 +1,17 @@
+api:
+  dashboard: true
+  debug: true
+  insecure: true
+entryPoints:
+  web:
+    address: ":80"
+serversTransport:
+  insecureSkipVerify: true
+providers:
+  docker:
+    endpoint: "unix:///var/run/docker.sock"
+    exposedByDefault: false
+  file:
+    filename: /config.yml
+log:
+  level: DEBUG
\ No newline at end of file

From 1146c29a31f6428866d924e68a9c60e640577e18 Mon Sep 17 00:00:00 2001
From: vuonghoainam <vuonghoainam.work@gmail.com>
Date: Fri, 25 Aug 2023 01:31:07 +0700
Subject: [PATCH 4/7] feat(inference): Add SD CPP module

---
 .env.sample                                 | 11 ++-
 .gitignore                                  | 11 ++-
 .gitmodules                                 |  3 +
 jan-inference/llm/README.md                 |  8 ++
 jan-inference/llm/docker-compose.yml        | 94 ++++++++++++++++++++
 jan-inference/llm/models/.gitkeep           |  0
 jan-inference/sd/.dockerignore              |  7 ++
 jan-inference/sd/compile.Dockerfile         | 10 +++
 jan-inference/sd/compile.requirements.txt   | 17 ++++
 jan-inference/sd/docker-compose.yml         | 98 +++++++++++++++++++++
 jan-inference/sd/inference.Dockerfile       | 19 ++++
 jan-inference/sd/inference.requirements.txt |  4 +
 jan-inference/sd/main.py                    | 70 +++++++++++++++
 jan-inference/sd/sd_cpp                     |  1 +
 14 files changed, 351 insertions(+), 2 deletions(-)
 create mode 100644 .gitmodules
 create mode 100644 jan-inference/llm/README.md
 create mode 100644 jan-inference/llm/docker-compose.yml
 create mode 100644 jan-inference/llm/models/.gitkeep
 create mode 100644 jan-inference/sd/.dockerignore
 create mode 100644 jan-inference/sd/compile.Dockerfile
 create mode 100644 jan-inference/sd/compile.requirements.txt
 create mode 100644 jan-inference/sd/docker-compose.yml
 create mode 100644 jan-inference/sd/inference.Dockerfile
 create mode 100644 jan-inference/sd/inference.requirements.txt
 create mode 100644 jan-inference/sd/main.py
 create mode 160000 jan-inference/sd/sd_cpp

diff --git a/.env.sample b/.env.sample
index b4ff84e20..d29332dd2 100644
--- a/.env.sample
+++ b/.env.sample
@@ -5,4 +5,13 @@ POSTGRES_USERNAME=your_db_username
 POSTGRES_PORT=your_db_port
 KC_DB_SCHEMA=public
 KEYCLOAK_ADMIN=your_keycloak_admin_username
-KEYCLOAK_ADMIN_PASSWORD=your_keycloak_admin_password
\ No newline at end of file
+KEYCLOAK_ADMIN_PASSWORD=your_keycloak_admin_password
+
+# Inference
+## LLM
+MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin
+LLM_MODEL_FILE=$(basename $MODEL_URL)
+
+## SD
+MODEL_URL=https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors
+SD_MODEL_FILE=$(basename $MODEL_URL)
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index a5c89361d..ee916094a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,11 @@
 .vscode
-.env
\ No newline at end of file
+.env
+
+# Jan inference
+jan-inference/llm/models/**
+jan-inference/llm/.env
+
+jan-inference/sd/models/**
+jan-inference/sd/output/**
+jan-inference/sd/.env
+jan-inference/sd/sd
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..fb1c6e128
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "jan-inference/sd/sd_cpp"]
+	path = jan-inference/sd/sd_cpp
+	url = https://github.com/leejet/stable-diffusion.cpp
diff --git a/jan-inference/llm/README.md b/jan-inference/llm/README.md
new file mode 100644
index 000000000..3183a8fb6
--- /dev/null
+++ b/jan-inference/llm/README.md
@@ -0,0 +1,8 @@
+# Inference - LLM
+
+```bash
+docker network create traefik_public
+cp .env.example .env
+# -> Update MODEL_URL in `.env` file
+docker compose up -d --scale llm=2
+``````
diff --git a/jan-inference/llm/docker-compose.yml b/jan-inference/llm/docker-compose.yml
new file mode 100644
index 000000000..129b01e74
--- /dev/null
+++ b/jan-inference/llm/docker-compose.yml
@@ -0,0 +1,94 @@
+version: '3'
+
+services:
+
+  # Service to download a model file.
+  downloader:
+    image: busybox
+    # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist.
+    command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$LLM_MODEL_FILE ]; then wget -O /models/$LLM_MODEL_FILE ${MODEL_URL}; fi"
+    # Mount a local directory to store the downloaded model.
+    volumes:
+      - ./models:/models
+
+  # Service to wait for the downloader service to finish downloading the model.
+  wait-for-downloader:
+    image: busybox
+    # The command waits until the model file (specified in MODEL_URL) exists.
+    command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$LLM_MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'"
+    # Specifies that this service should start after the downloader service has started.
+    depends_on:
+      downloader:
+        condition: service_started
+    # Mount the same local directory to check for the downloaded model.
+    volumes:
+      - ./models:/models
+
+  # Service to run the Llama web application.
+  llm:
+    image: ghcr.io/abetlen/llama-cpp-python:latest
+    # Mount the directory that contains the downloaded model.
+    volumes:
+      - ./models:/models
+    environment:
+      # Specify the path to the model for the web application.
+      MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin
+    labels:
+      # Instead of using the Host rule, set a PathPrefix rule
+      - "traefik.http.routers.web.rule=PathPrefix(`/inference/llm`)"
+      # This tells Traefik where to forward the traffic for this route.
+      - "traefik.http.routers.web.service=llm"
+    # Define a service for the llm and specify its load balancer configuration
+      - "traefik.http.services.llm-service.loadbalancer.server.port=8000"
+
+      - "traefik.http.middlewares.strip-llm-prefix.stripprefix.prefixes=/inference/llm"
+      - "traefik.http.routers.web.middlewares=strip-llm-prefix"
+    # Health check configuration
+    healthcheck:
+      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
+    # Restart policy configuration
+    restart: on-failure
+    # Specifies that this service should start only after wait-for-downloader has completed successfully.
+    depends_on:
+      wait-for-downloader:
+        condition: service_completed_successfully
+    # Connect this service to two networks: inference_net and traefik_public.
+    networks:
+      - inference_net
+      - traefik_public
+  
+  # Service for Traefik, a modern HTTP reverse proxy and load balancer.
+  traefik:
+    image: traefik:v2.5
+    command:
+      # Enable the Traefik API dashboard without TLS (not recommended for production).
+      - "--api.insecure=true"
+      # Enable Traefik to use Docker as a provider.
+      - "--providers.docker=true"
+      # Do not expose services by default. Explicitly specify in each service if it should be exposed.
+      - "--providers.docker.exposedbydefault=false"
+      # Specify the default entry point on port 80.
+      - "--entrypoints.web.address=:80"
+    ports:
+      # Map port 80 in the container to port 80 on the host.
+      - "80:80"
+      # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host.
+      - "8080:8080"
+    # Mount the Docker socket to allow Traefik to listen to Docker's API.
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock
+    # Connect this service to the traefik_public network.
+    networks:
+      - traefik_public
+
+# Define networks used in this docker-compose file.
+networks:
+  # Network for the llm service (used for inference).
+  inference_net:
+  # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
+  traefik_public:
+    external: true
diff --git a/jan-inference/llm/models/.gitkeep b/jan-inference/llm/models/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/jan-inference/sd/.dockerignore b/jan-inference/sd/.dockerignore
new file mode 100644
index 000000000..d5b227c62
--- /dev/null
+++ b/jan-inference/sd/.dockerignore
@@ -0,0 +1,7 @@
+output/
+models/
+
+sd_cpp/.git
+sd_cpp/.github
+
+sd
\ No newline at end of file
diff --git a/jan-inference/sd/compile.Dockerfile b/jan-inference/sd/compile.Dockerfile
new file mode 100644
index 000000000..383e5202e
--- /dev/null
+++ b/jan-inference/sd/compile.Dockerfile
@@ -0,0 +1,10 @@
+FROM python:3.9.17
+
+RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
+ENV PATH=/root/.cargo/bin:$PATH
+
+WORKDIR /sd.cpp
+
+COPY . .
+
+RUN pip install -r compile.requirements.txt
diff --git a/jan-inference/sd/compile.requirements.txt b/jan-inference/sd/compile.requirements.txt
new file mode 100644
index 000000000..48f4f8730
--- /dev/null
+++ b/jan-inference/sd/compile.requirements.txt
@@ -0,0 +1,17 @@
+# Compiling
+-r sd_cpp/models/requirements.txt
+
+# diffusers
+# torch
+# ftfy
+# scipy
+# transformers
+# accelerate
+# huggingface-hub
+# xformers
+# omegaconf
+# safetensors
+# cog
+# tomesd
+# compel
+
diff --git a/jan-inference/sd/docker-compose.yml b/jan-inference/sd/docker-compose.yml
new file mode 100644
index 000000000..7c74e8b24
--- /dev/null
+++ b/jan-inference/sd/docker-compose.yml
@@ -0,0 +1,98 @@
+version: '3'
+
+services:
+
+  # Service to download a model file.
+  downloader:
+    build:
+      context: ./
+      dockerfile: compile.Dockerfile
+    # platform: "linux/amd64"
+    # The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist.
+    command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /converted_models/*.bin ]; then wget -O /converted_models/$SD_MODEL_FILE ${MODEL_URL}; python /sd.cpp/models/convert.py --out_type q4_0 --out_file /converted_models/$SD_MODEL_FILE; fi"
+    # Mount a local directory to store the downloaded model.
+    volumes:
+      - ./models:/converted_models
+
+  # Service to wait for the downloader service to finish downloading the model.
+  wait-for-downloader:
+    image: busybox
+    # The command waits until the model file (specified in MODEL_URL) exists.
+    command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/*.bin ]; do sleep 1; done; echo 'Model downloaded and converted!'"
+    # Specifies that this service should start after the downloader service has started.
+    depends_on:
+      downloader:
+        condition: service_started
+    # Mount the same local directory to check for the downloaded model.
+    volumes:
+      - ./models:/models
+
+  # Service to run the SD web application.
+  sd:
+    build:
+      context: ./
+      dockerfile: inference.Dockerfile
+    # Mount the directory that contains the downloaded model.
+    volumes:
+      - ./models:/models
+      - ./output/:/serving/output
+    command:  /bin/bash -c "python -m uvicorn main:app --proxy-headers --host 0.0.0.0 --port 8000"
+    # platform: "linux/amd64"
+    environment:
+      # Specify the path to the model for the web application.
+      BASE_URL: http://0.0.0.0:8000
+      MODEL_NAME: "v1-5-pruned-emaonly-ggml-model-q5_0.bin"
+      MODEL_DIR: "/models"
+      SD_PATH: "/sd"
+      PYTHONUNBUFFERED: 1
+    ports:
+      - 8000:8000
+    # Health check configuration
+    healthcheck:
+      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
+    # Restart policy configuration
+    restart: on-failure
+    # Specifies that this service should start only after wait-for-downloader has completed successfully.
+    depends_on:
+      wait-for-downloader:
+        condition: service_completed_successfully
+    # Connect this service to two networks: inference_net and traefik_public.
+    networks:
+      - inference_net
+      - traefik_public
+  
+  # Service for Traefik, a modern HTTP reverse proxy and load balancer.
+  traefik:
+    image: traefik:v2.5
+    command:
+      # Enable the Traefik API dashboard without TLS (not recommended for production).
+      - "--api.insecure=true"
+      # Enable Traefik to use Docker as a provider.
+      - "--providers.docker=true"
+      # Do not expose services by default. Explicitly specify in each service if it should be exposed.
+      - "--providers.docker.exposedbydefault=false"
+      # Specify the default entry point on port 80.
+      - "--entrypoints.web.address=:80"
+    ports:
+      # Map port 80 in the container to port 80 on the host.
+      - "80:80"
+      # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host.
+      - "8080:8080"
+    # Mount the Docker socket to allow Traefik to listen to Docker's API.
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock
+    # Connect this service to the traefik_public network.
+    networks:
+      - traefik_public
+
+# Define networks used in this docker-compose file.
+networks:
+  # Network for the llm service (used for inference).
+  inference_net:
+  # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
+  traefik_public:
+    external: true
diff --git a/jan-inference/sd/inference.Dockerfile b/jan-inference/sd/inference.Dockerfile
new file mode 100644
index 000000000..39d42dd43
--- /dev/null
+++ b/jan-inference/sd/inference.Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.9.17 as build
+
+RUN apt-get update && apt-get install -y build-essential git cmake
+
+WORKDIR /sd.cpp
+
+COPY sd_cpp /sd.cpp
+
+RUN mkdir build && cd build && cmake .. && cmake --build . --config Release
+
+FROM python:3.9.17 as runtime
+
+COPY --from=build /sd.cpp/build/bin/sd /sd
+
+WORKDIR /serving
+
+COPY . /serving/
+
+RUN pip install -r inference.requirements.txt
\ No newline at end of file
diff --git a/jan-inference/sd/inference.requirements.txt b/jan-inference/sd/inference.requirements.txt
new file mode 100644
index 000000000..519c496ba
--- /dev/null
+++ b/jan-inference/sd/inference.requirements.txt
@@ -0,0 +1,4 @@
+# Inference
+fastapi
+uvicorn
+python-multipart
\ No newline at end of file
diff --git a/jan-inference/sd/main.py b/jan-inference/sd/main.py
new file mode 100644
index 000000000..f31380dd3
--- /dev/null
+++ b/jan-inference/sd/main.py
@@ -0,0 +1,70 @@
+from fastapi import FastAPI, BackgroundTasks, HTTPException, Form
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
+import subprocess
+import os
+from uuid import uuid4
+
+app = FastAPI()
+
+OUTPUT_DIR = "output"
+SD_PATH = os.environ.get("SD_PATH", "./sd")
+MODEL_DIR = os.environ.get("MODEL_DIR", "./models")
+BASE_URL = os.environ.get("BASE_URL", "http://localhost:8000")
+MODEL_NAME = os.environ.get(
+    "MODEL_NAME", "v1-5-pruned-emaonly-ggml-model-q5_0.bin")
+
+# Create the OUTPUT_DIR directory if it does not exist
+if not os.path.exists(OUTPUT_DIR):
+    os.makedirs(OUTPUT_DIR)
+
+# Create the OUTPUT_DIR directory if it does not exist
+if not os.path.exists(MODEL_DIR):
+    os.makedirs(MODEL_DIR)
+
+# Serve files from the "files" directory
+app.mount("/output", StaticFiles(directory=OUTPUT_DIR), name="output")
+
+
+def run_command(prompt: str, filename: str):
+    # Construct the command based on your provided example
+    command = [SD_PATH,
+               "-m", os.path.join(MODEL_DIR, MODEL_NAME),
+               "-p", prompt,
+               "-o", os.path.join(OUTPUT_DIR, filename)
+               ]
+
+    try:
+        sub_output = subprocess.run(command, timeout=5*60, capture_output=True,
+                                    check=True, encoding="utf-8")
+        print(sub_output.stdout)
+    except subprocess.CalledProcessError:
+        raise HTTPException(
+            status_code=500, detail="Failed to execute the command.")
+
+
+@app.post("/inference/")
+async def run_inference(background_tasks: BackgroundTasks, prompt: str = Form()):
+    # Generate a unique filename using uuid4()
+    filename = f"{uuid4()}.png"
+
+    # We will use background task to run the command so it won't block
+    background_tasks.add_task(run_command, prompt, filename)
+
+    # Return the expected path of the output file
+    return {"url": f'{BASE_URL}/serve/{filename}'}
+
+
+@app.get("/serve/{filename}")
+async def serve_file(filename: str):
+    file_path = os.path.join(OUTPUT_DIR, filename)
+
+    if os.path.exists(file_path):
+        return FileResponse(file_path)
+    else:
+        raise HTTPException(status_code=404, detail="File not found")
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/jan-inference/sd/sd_cpp b/jan-inference/sd/sd_cpp
new file mode 160000
index 000000000..0d7f04b13
--- /dev/null
+++ b/jan-inference/sd/sd_cpp
@@ -0,0 +1 @@
+Subproject commit 0d7f04b135cd48e8d62aecd09a52eb2afa482744

From b34fa91dc4954e4b737fbd98d74e761fe24d30d7 Mon Sep 17 00:00:00 2001
From: vuonghoainam <vuonghoainam.work@gmail.com>
Date: Fri, 25 Aug 2023 02:23:33 +0700
Subject: [PATCH 5/7] chore(inference): Remove inference llm .env

---
 jan-inference/llm/.env.example | 2 --
 1 file changed, 2 deletions(-)
 delete mode 100644 jan-inference/llm/.env.example

diff --git a/jan-inference/llm/.env.example b/jan-inference/llm/.env.example
deleted file mode 100644
index 40a331b36..000000000
--- a/jan-inference/llm/.env.example
+++ /dev/null
@@ -1,2 +0,0 @@
-MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin
-MODEL_FILE=$(basename $MODEL_URL)
\ No newline at end of file

From d97f3c759b9bbf33493ec64aafa1002bec27f877 Mon Sep 17 00:00:00 2001
From: vuonghoainam <vuonghoainam.work@gmail.com>
Date: Fri, 25 Aug 2023 02:24:21 +0700
Subject: [PATCH 6/7] fix(inference/traeffik): Refactor traeffik to be outside
 of models and add sd

---
 jan-inference/docker-compose.yml            | 25 ++++++++++++++
 jan-inference/llm/traefik/config.yml        | 23 -------------
 jan-inference/traefik/config.yml            | 38 +++++++++++++++++++++
 jan-inference/{llm => }/traefik/traefik.yml |  0
 4 files changed, 63 insertions(+), 23 deletions(-)
 create mode 100644 jan-inference/docker-compose.yml
 delete mode 100644 jan-inference/llm/traefik/config.yml
 create mode 100644 jan-inference/traefik/config.yml
 rename jan-inference/{llm => }/traefik/traefik.yml (100%)

diff --git a/jan-inference/docker-compose.yml b/jan-inference/docker-compose.yml
new file mode 100644
index 000000000..7672d2a76
--- /dev/null
+++ b/jan-inference/docker-compose.yml
@@ -0,0 +1,25 @@
+version: '3'
+
+services:
+  # Service for Traefik, a modern HTTP reverse proxy and load balancer.
+  traefik:
+    image: traefik:v2.10
+    ports:
+      # Map port 80 in the container to port 80 on the host.
+      - "80:80"
+      # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host.
+      - "8080:8080"
+    # Mount the Docker socket to allow Traefik to listen to Docker's API.
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+      - ./traefik/traefik.yml:/traefik.yml:ro
+      - ./traefik/config.yml:/config.yml:ro
+    # Connect this service to the traefik_public network.
+    networks:
+      - traefik_public
+
+# Define networks used in this docker-compose file.
+networks:
+  # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
+  traefik_public:
+    external: true
diff --git a/jan-inference/llm/traefik/config.yml b/jan-inference/llm/traefik/config.yml
deleted file mode 100644
index 744427d57..000000000
--- a/jan-inference/llm/traefik/config.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-http:
- #region routers 
-  routers:
-    llm:
-      entryPoints:
-        - "web"
-      rule: "PathPrefix(`/inference/llm`)"
-      middlewares:
-        - llmm-strip-prefix
-      service: llm
-#endregion
-#region services
-  services:
-    llm:
-      loadBalancer:
-        servers:
-          - url: "http://llm:8000"
-#endregion
-  middlewares:
-    llmm-strip-prefix:
-      stripPrefix:
-        prefixes: 
-          - "/inference/llm"
diff --git a/jan-inference/traefik/config.yml b/jan-inference/traefik/config.yml
new file mode 100644
index 000000000..c8ee8f569
--- /dev/null
+++ b/jan-inference/traefik/config.yml
@@ -0,0 +1,38 @@
+http:
+ #region routers 
+  routers:
+    llm:
+      entryPoints:
+        - "web"
+      rule: "PathPrefix(`/inference/llm/`)"
+      middlewares:
+        - llmm-strip-prefix
+      service: llm
+    sd:
+      entryPoints:
+        - "web"
+      rule: "PathPrefix(`/inference/sd/`)"
+      middlewares:
+        - sd-strip-prefix
+      service: sd
+#endregion
+#region services
+  services:
+    llm:
+      loadBalancer:
+        servers:
+          - url: "http://llm:8000"
+    sd:
+      loadBalancer:
+        servers:
+          - url: "http://sd:8000"
+#endregion
+  middlewares:
+    llm-strip-prefix:
+      stripPrefix:
+        prefixes: 
+          - "/inference/llm/"
+    sd-strip-prefix:
+      stripPrefix:
+        prefixes: 
+          - "/inference/sd/"
\ No newline at end of file
diff --git a/jan-inference/llm/traefik/traefik.yml b/jan-inference/traefik/traefik.yml
similarity index 100%
rename from jan-inference/llm/traefik/traefik.yml
rename to jan-inference/traefik/traefik.yml

From db12863d49c0c9df827e39f53e01e7769f11683d Mon Sep 17 00:00:00 2001
From: vuonghoainam <vuonghoainam.work@gmail.com>
Date: Fri, 25 Aug 2023 02:24:46 +0700
Subject: [PATCH 7/7] refactor(inference): Remove traeffik in model specific
 compose files

---
 jan-inference/llm/docker-compose.yml | 35 +++++++------------------
 jan-inference/sd/docker-compose.yml  | 39 +++++-----------------------
 2 files changed, 15 insertions(+), 59 deletions(-)

diff --git a/jan-inference/llm/docker-compose.yml b/jan-inference/llm/docker-compose.yml
index e4b768277..0c15d3659 100644
--- a/jan-inference/llm/docker-compose.yml
+++ b/jan-inference/llm/docker-compose.yml
@@ -30,16 +30,19 @@ services:
     # Mount the directory that contains the downloaded model.
     volumes:
       - ./models:/models
+    ports:
+      - 8000:8000
     environment:
       # Specify the path to the model for the web application.
       MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin
+      PYTHONUNBUFFERED: 1
     # Health check configuration
-    healthcheck:
-      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-      start_period: 30s
+    # healthcheck:
+    #   test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
+    #   interval: 30s
+    #   timeout: 10s
+    #   retries: 3
+    #   start_period: 30s
     # Restart policy configuration
     restart: on-failure
     # Specifies that this service should start only after wait-for-downloader has completed successfully.
@@ -47,31 +50,11 @@ services:
       wait-for-downloader:
         condition: service_completed_successfully
     # Connect this service to two networks: inference_net and traefik_public.
-    networks:
-      - inference_net
-      - traefik_public
-  
-  # Service for Traefik, a modern HTTP reverse proxy and load balancer.
-  traefik:
-    image: traefik:v2.10
-    ports:
-      # Map port 80 in the container to port 80 on the host.
-      - "80:80"
-      # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host.
-      - "8080:8080"
-    # Mount the Docker socket to allow Traefik to listen to Docker's API.
-    volumes:
-      - /var/run/docker.sock:/var/run/docker.sock:ro
-      - ./traefik/traefik.yml:/traefik.yml:ro
-      - ./traefik/config.yml:/config.yml:ro
-    # Connect this service to the traefik_public network.
     networks:
       - traefik_public
 
 # Define networks used in this docker-compose file.
 networks:
-  # Network for the llm service (used for inference).
-  inference_net:
   # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
   traefik_public:
     external: true
diff --git a/jan-inference/sd/docker-compose.yml b/jan-inference/sd/docker-compose.yml
index 7c74e8b24..fd1a73a8e 100644
--- a/jan-inference/sd/docker-compose.yml
+++ b/jan-inference/sd/docker-compose.yml
@@ -48,12 +48,12 @@ services:
     ports:
       - 8000:8000
     # Health check configuration
-    healthcheck:
-      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-      start_period: 30s
+    # healthcheck:
+    #   test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
+    #   interval: 30s
+    #   timeout: 10s
+    #   retries: 3
+    #   start_period: 30s
     # Restart policy configuration
     restart: on-failure
     # Specifies that this service should start only after wait-for-downloader has completed successfully.
@@ -61,38 +61,11 @@ services:
       wait-for-downloader:
         condition: service_completed_successfully
     # Connect this service to two networks: inference_net and traefik_public.
-    networks:
-      - inference_net
-      - traefik_public
-  
-  # Service for Traefik, a modern HTTP reverse proxy and load balancer.
-  traefik:
-    image: traefik:v2.5
-    command:
-      # Enable the Traefik API dashboard without TLS (not recommended for production).
-      - "--api.insecure=true"
-      # Enable Traefik to use Docker as a provider.
-      - "--providers.docker=true"
-      # Do not expose services by default. Explicitly specify in each service if it should be exposed.
-      - "--providers.docker.exposedbydefault=false"
-      # Specify the default entry point on port 80.
-      - "--entrypoints.web.address=:80"
-    ports:
-      # Map port 80 in the container to port 80 on the host.
-      - "80:80"
-      # Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host.
-      - "8080:8080"
-    # Mount the Docker socket to allow Traefik to listen to Docker's API.
-    volumes:
-      - /var/run/docker.sock:/var/run/docker.sock
-    # Connect this service to the traefik_public network.
     networks:
       - traefik_public
 
 # Define networks used in this docker-compose file.
 networks:
-  # Network for the llm service (used for inference).
-  inference_net:
   # Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file.
   traefik_public:
     external: true