Merge branch 'first_mvp_jan_inference' into add_inference_sd_cpp
This commit is contained in:
commit
e345a44d59
2
jan-inference/llm/.env.example
Normal file
2
jan-inference/llm/.env.example
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
MODEL_URL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin
|
||||||
|
MODEL_FILE=$(basename $MODEL_URL)
|
||||||
@ -33,16 +33,6 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
# Specify the path to the model for the web application.
|
# Specify the path to the model for the web application.
|
||||||
MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin
|
MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin
|
||||||
labels:
|
|
||||||
# Instead of using the Host rule, set a PathPrefix rule
|
|
||||||
- "traefik.http.routers.web.rule=PathPrefix(`/inference/llm`)"
|
|
||||||
# This tells Traefik where to forward the traffic for this route.
|
|
||||||
- "traefik.http.routers.web.service=llm"
|
|
||||||
# Define a service for the llm and specify its load balancer configuration
|
|
||||||
- "traefik.http.services.llm-service.loadbalancer.server.port=8000"
|
|
||||||
|
|
||||||
- "traefik.http.middlewares.strip-llm-prefix.stripprefix.prefixes=/inference/llm"
|
|
||||||
- "traefik.http.routers.web.middlewares=strip-llm-prefix"
|
|
||||||
# Health check configuration
|
# Health check configuration
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
|
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"]
|
||||||
@ -63,16 +53,7 @@ services:
|
|||||||
|
|
||||||
# Service for Traefik, a modern HTTP reverse proxy and load balancer.
|
# Service for Traefik, a modern HTTP reverse proxy and load balancer.
|
||||||
traefik:
|
traefik:
|
||||||
image: traefik:v2.5
|
image: traefik:v2.10
|
||||||
command:
|
|
||||||
# Enable the Traefik API dashboard without TLS (not recommended for production).
|
|
||||||
- "--api.insecure=true"
|
|
||||||
# Enable Traefik to use Docker as a provider.
|
|
||||||
- "--providers.docker=true"
|
|
||||||
# Do not expose services by default. Explicitly specify in each service if it should be exposed.
|
|
||||||
- "--providers.docker.exposedbydefault=false"
|
|
||||||
# Specify the default entry point on port 80.
|
|
||||||
- "--entrypoints.web.address=:80"
|
|
||||||
ports:
|
ports:
|
||||||
# Map port 80 in the container to port 80 on the host.
|
# Map port 80 in the container to port 80 on the host.
|
||||||
- "80:80"
|
- "80:80"
|
||||||
@ -80,7 +61,9 @@ services:
|
|||||||
- "8080:8080"
|
- "8080:8080"
|
||||||
# Mount the Docker socket to allow Traefik to listen to Docker's API.
|
# Mount the Docker socket to allow Traefik to listen to Docker's API.
|
||||||
volumes:
|
volumes:
|
||||||
- /var/run/docker.sock:/var/run/docker.sock
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||||
|
- ./traefik/traefik.yml:/traefik.yml:ro
|
||||||
|
- ./traefik/config.yml:/config.yml:ro
|
||||||
# Connect this service to the traefik_public network.
|
# Connect this service to the traefik_public network.
|
||||||
networks:
|
networks:
|
||||||
- traefik_public
|
- traefik_public
|
||||||
|
|||||||
23
jan-inference/llm/traefik/config.yml
Normal file
23
jan-inference/llm/traefik/config.yml
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
http:
|
||||||
|
#region routers
|
||||||
|
routers:
|
||||||
|
llm:
|
||||||
|
entryPoints:
|
||||||
|
- "web"
|
||||||
|
rule: "PathPrefix(`/inference/llm`)"
|
||||||
|
middlewares:
|
||||||
|
- llmm-strip-prefix
|
||||||
|
service: llm
|
||||||
|
#endregion
|
||||||
|
#region services
|
||||||
|
services:
|
||||||
|
llm:
|
||||||
|
loadBalancer:
|
||||||
|
servers:
|
||||||
|
- url: "http://llm:8000"
|
||||||
|
#endregion
|
||||||
|
middlewares:
|
||||||
|
llmm-strip-prefix:
|
||||||
|
stripPrefix:
|
||||||
|
prefixes:
|
||||||
|
- "/inference/llm"
|
||||||
17
jan-inference/llm/traefik/traefik.yml
Normal file
17
jan-inference/llm/traefik/traefik.yml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
api:
|
||||||
|
dashboard: true
|
||||||
|
debug: true
|
||||||
|
insecure: true
|
||||||
|
entryPoints:
|
||||||
|
web:
|
||||||
|
address: ":80"
|
||||||
|
serversTransport:
|
||||||
|
insecureSkipVerify: true
|
||||||
|
providers:
|
||||||
|
docker:
|
||||||
|
endpoint: "unix:///var/run/docker.sock"
|
||||||
|
exposedByDefault: false
|
||||||
|
file:
|
||||||
|
filename: /config.yml
|
||||||
|
log:
|
||||||
|
level: DEBUG
|
||||||
Loading…
x
Reference in New Issue
Block a user