diff --git a/.github/workflows/jan-server-build-nightly.yml b/.github/workflows/jan-server-build-nightly.yml
deleted file mode 100644
index 29e13804e..000000000
--- a/.github/workflows/jan-server-build-nightly.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-name: Docker Builder - Nightly / Manual
-
-on:
-  push:
-    branches:
-      - main
-      - feature/helmchart-and-ci-jan-server
-    paths-ignore:
-      - 'README.md'
-      - 'docs/**'
-  schedule:
-    - cron: '0 21 * * 1,2,3' # At 8 PM UTC on Monday, Tuesday, and Wednesday which is 4 AM UTC+7 Tuesday, Wednesday, and Thursday
-  workflow_dispatch:
-
-jobs:
-  # Job create Update app version based on latest release tag with build number and save to output
-  get-update-version:
-    uses: ./.github/workflows/template-get-update-version.yml
-
-  build-cpu:
-    uses: ./.github/workflows/template-build-jan-server.yml
-    permissions:
-      packages: write
-    secrets: inherit
-    needs: [get-update-version]
-    with:
-      dockerfile_path: ./Dockerfile
-      docker_image_tag: "ghcr.io/janhq/jan-server:dev-cpu-latest,ghcr.io/janhq/jan-server:dev-cpu-${{ needs.get-update-version.outputs.new_version }}"
-
-  build-gpu:
-    uses: ./.github/workflows/template-build-jan-server.yml
-    permissions:
-      packages: write
-    secrets: inherit
-    needs: [get-update-version]
-    with:
-      dockerfile_path: ./Dockerfile.gpu
-      docker_image_tag: "ghcr.io/janhq/jan-server:dev-cuda-12.2-latest,ghcr.io/janhq/jan-server:dev-cuda-12.2-${{ needs.get-update-version.outputs.new_version }}"
-    
-
diff --git a/.github/workflows/jan-server-build.yml b/.github/workflows/jan-server-build.yml
deleted file mode 100644
index 503efd298..000000000
--- a/.github/workflows/jan-server-build.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: Docker Builder - Tag
-
-on:
-  push:
-    tags: ["v[0-9]+.[0-9]+.[0-9]+"]
-
-jobs:
-  # Job create Update app version based on latest release tag with build number and save to output
-  get-update-version:
-    uses: ./.github/workflows/template-get-update-version.yml
-
-  build-cpu:
-    permissions:
-      packages: write
-    uses: ./.github/workflows/template-build-jan-server.yml
-    secrets: inherit
-    needs: [get-update-version]
-    with:
-      dockerfile_path: ./Dockerfile
-      docker_image_tag: "ghcr.io/janhq/jan-server:cpu-latest,ghcr.io/janhq/jan-server:cpu-${{ needs.get-update-version.outputs.new_version }}"
-
-  build-gpu:
-    permissions:
-      packages: write
-    uses: ./.github/workflows/template-build-jan-server.yml
-    secrets: inherit
-    needs: [get-update-version]
-    with:
-      dockerfile_path: ./Dockerfile.gpu
-      docker_image_tag: "ghcr.io/janhq/jan-server:cuda-12.2-latest,ghcr.io/janhq/jan-server:cuda-12.2-${{ needs.get-update-version.outputs.new_version }}"
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index cc0a67720..000000000
--- a/Dockerfile
+++ /dev/null
@@ -1,58 +0,0 @@
-FROM node:20-bookworm AS base
-
-# 1. Install dependencies only when needed
-FROM base AS builder
-
-# Install g++ 11
-RUN apt update && apt install -y gcc-11 g++-11 cpp-11 jq xsel && rm -rf /var/lib/apt/lists/*
-
-WORKDIR /app
-
-# Install dependencies based on the preferred package manager
-COPY . ./
-
-RUN export NITRO_VERSION=$(cat extensions/inference-nitro-extension/bin/version.txt) && \
-    jq --arg nitroVersion $NITRO_VERSION '(.scripts."downloadnitro:linux" | gsub("\\${NITRO_VERSION}"; $nitroVersion)) | gsub("\r"; "")' extensions/inference-nitro-extension/package.json > /tmp/newcommand.txt && export NEW_COMMAND=$(sed 's/^"//;s/"$//' /tmp/newcommand.txt) && jq --arg newCommand "$NEW_COMMAND" '.scripts."downloadnitro:linux" = $newCommand' extensions/inference-nitro-extension/package.json > /tmp/package.json && mv /tmp/package.json extensions/inference-nitro-extension/package.json
-RUN make install-and-build
-
-# # 2. Rebuild the source code only when needed
-FROM base AS runner
-
-# Install g++ 11
-RUN apt update && apt install -y gcc-11 g++-11 cpp-11 jq xsel && rm -rf /var/lib/apt/lists/*
-
-WORKDIR /app
-
-# Copy the package.json and yarn.lock of root yarn space to leverage Docker cache
-COPY --from=builder /app/package.json ./package.json
-COPY --from=builder /app/node_modules ./node_modules/
-COPY --from=builder /app/yarn.lock ./yarn.lock
-
-# Copy the package.json, yarn.lock, and build output of server yarn space to leverage Docker cache
-COPY --from=builder /app/core ./core/
-COPY --from=builder /app/server ./server/
-RUN cd core && yarn install && yarn run build
-
-# Copy pre-install dependencies
-COPY --from=builder /app/pre-install ./pre-install/
-
-# Copy the package.json, yarn.lock, and output of web yarn space to leverage Docker cache
-COPY --from=builder /app/joi ./joi/
-COPY --from=builder /app/web ./web/
-
-RUN yarn workspace @janhq/joi install && yarn workspace @janhq/joi build
-RUN yarn workspace @janhq/web install
-
-RUN npm install -g serve@latest
-
-EXPOSE 1337 3000 3928
-
-ENV JAN_API_HOST 0.0.0.0
-ENV JAN_API_PORT 1337
-
-ENV API_BASE_URL http://localhost:1337
-
-CMD ["sh", "-c", "export NODE_ENV=production && yarn workspace @janhq/web build && cd web && npx serve out & cd server && node build/main.js"]
-
-# docker build -t jan .
-# docker run -p 1337:1337 -p 3000:3000 -p 3928:3928 jan
diff --git a/Dockerfile.gpu b/Dockerfile.gpu
deleted file mode 100644
index 41f9a898b..000000000
--- a/Dockerfile.gpu
+++ /dev/null
@@ -1,85 +0,0 @@
-# Please change the base image to the appropriate CUDA version base on NVIDIA Driver Compatibility
-# Run nvidia-smi to check the CUDA version and the corresponding driver version
-# Then update the base image to the appropriate CUDA version refer https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags
-
-FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base 
-
-# 1. Install dependencies only when needed
-FROM base AS builder
-
-# Install g++ 11
-RUN apt update && apt install -y gcc-11 g++-11 cpp-11 jq xsel curl gnupg make python3-dev && curl -sL https://deb.nodesource.com/setup_20.x | bash - && apt install nodejs -y && rm -rf /var/lib/apt/lists/*
-
-# Update alternatives for GCC and related tools
-RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 110 \
-                         --slave /usr/bin/g++ g++ /usr/bin/g++-11 \
-                         --slave /usr/bin/gcov gcov /usr/bin/gcov-11 \
-                         --slave /usr/bin/gcc-ar gcc-ar /usr/bin/gcc-ar-11 \
-                         --slave /usr/bin/gcc-ranlib gcc-ranlib /usr/bin/gcc-ranlib-11 && \
-    update-alternatives --install /usr/bin/cpp cpp /usr/bin/cpp-11 110
-
-RUN npm install -g yarn
-
-WORKDIR /app
-
-# Install dependencies based on the preferred package manager
-COPY . ./
-
-RUN export NITRO_VERSION=$(cat extensions/inference-nitro-extension/bin/version.txt) && \
-    jq --arg nitroVersion $NITRO_VERSION '(.scripts."downloadnitro:linux" | gsub("\\${NITRO_VERSION}"; $nitroVersion)) | gsub("\r"; "")' extensions/inference-nitro-extension/package.json > /tmp/newcommand.txt && export NEW_COMMAND=$(sed 's/^"//;s/"$//' /tmp/newcommand.txt) && jq --arg newCommand "$NEW_COMMAND" '.scripts."downloadnitro:linux" = $newCommand' extensions/inference-nitro-extension/package.json > /tmp/package.json && mv /tmp/package.json extensions/inference-nitro-extension/package.json
-RUN make install-and-build
-
-# # 2. Rebuild the source code only when needed
-FROM base AS runner
-
-# Install g++ 11
-RUN apt update && apt install -y gcc-11 g++-11 cpp-11 jq xsel curl gnupg make python3-dev && curl -sL https://deb.nodesource.com/setup_20.x | bash - && apt-get install nodejs -y && rm -rf /var/lib/apt/lists/*
-
-# Update alternatives for GCC and related tools
-RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 110 \
-                         --slave /usr/bin/g++ g++ /usr/bin/g++-11 \
-                         --slave /usr/bin/gcov gcov /usr/bin/gcov-11 \
-                         --slave /usr/bin/gcc-ar gcc-ar /usr/bin/gcc-ar-11 \
-                         --slave /usr/bin/gcc-ranlib gcc-ranlib /usr/bin/gcc-ranlib-11 && \
-    update-alternatives --install /usr/bin/cpp cpp /usr/bin/cpp-11 110
-
-RUN npm install -g yarn
-
-WORKDIR /app
-
-# Copy the package.json and yarn.lock of root yarn space to leverage Docker cache
-COPY --from=builder /app/package.json ./package.json
-COPY --from=builder /app/node_modules ./node_modules/
-COPY --from=builder /app/yarn.lock ./yarn.lock
-
-# Copy the package.json, yarn.lock, and build output of server yarn space to leverage Docker cache
-COPY --from=builder /app/core ./core/
-COPY --from=builder /app/server ./server/
-RUN cd core && yarn install && yarn run build
-
-# Copy pre-install dependencies
-COPY --from=builder /app/pre-install ./pre-install/
-
-# Copy the package.json, yarn.lock, and output of web yarn space to leverage Docker cache
-COPY --from=builder /app/joi ./joi/
-COPY --from=builder /app/web ./web/
-
-RUN yarn workspace @janhq/joi install && yarn workspace @janhq/joi build
-RUN yarn workspace @janhq/web install
-
-RUN npm install -g serve@latest
-
-EXPOSE 1337 3000 3928
-
-ENV LD_LIBRARY_PATH=/usr/local/cuda/targets/x86_64-linux/lib:/usr/local/cuda-12.0/compat${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
-
-ENV JAN_API_HOST 0.0.0.0
-ENV JAN_API_PORT 1337
-
-ENV API_BASE_URL http://localhost:1337
-
-CMD ["sh", "-c", "export NODE_ENV=production && yarn workspace @janhq/web build && cd web && npx serve out & cd server && node build/main.js"]
-
-# pre-requisites: nvidia-docker
-# docker build -t jan-gpu . -f Dockerfile.gpu
-# docker run -p 1337:1337 -p 3000:3000 -p 3928:3928 --gpus all jan-gpu
diff --git a/charts/server/Chart.lock b/charts/server/Chart.lock
deleted file mode 100644
index 915788d61..000000000
--- a/charts/server/Chart.lock
+++ /dev/null
@@ -1,6 +0,0 @@
-dependencies:
-- name: common
-  repository: oci://ghcr.io/janhq/charts
-  version: 0.1.2
-digest: sha256:35e98bde174130787755b0f8ea2359b7b6790d965a7157c2f7cabf1bc8c04471
-generated: "2024-02-20T16:20:37.6530108+07:00"
diff --git a/charts/server/Chart.yaml b/charts/server/Chart.yaml
deleted file mode 100644
index fb2e1c91b..000000000
--- a/charts/server/Chart.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-apiVersion: v2
-name: jan-server
-description: A Helm chart for Kubernetes
-type: application
-version: 0.1.0
-appVersion: '1.0.0'
-dependencies:
-  - name: common
-    version: 0.1.2 # common-chart-version
-    repository: oci://ghcr.io/janhq/charts
diff --git a/charts/server/charts/common-0.1.2.tgz b/charts/server/charts/common-0.1.2.tgz
deleted file mode 100644
index 946617eab..000000000
Binary files a/charts/server/charts/common-0.1.2.tgz and /dev/null differ
diff --git a/charts/server/config.json b/charts/server/config.json
deleted file mode 100644
index 62e9682fa..000000000
--- a/charts/server/config.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "image-list": "server=ghcr.io/janhq/jan-server",
-    "platforms": "linux/amd64"
-}
\ No newline at end of file
diff --git a/charts/server/values.yaml b/charts/server/values.yaml
deleted file mode 100644
index b31f47656..000000000
--- a/charts/server/values.yaml
+++ /dev/null
@@ -1,256 +0,0 @@
-common:
-  imageTag: v0.4.6-cpu
-  # DO NOT CHANGE THE LINE ABOVE. MAKE ALL CHANGES BELOW
-
-  # Global pvc for all workload
-  pvc:
-    enabled: false
-    name: 'janroot'
-    accessModes: 'ReadWriteOnce'
-    storageClassName: ''
-    capacity: '50Gi'
-
-  # Global image pull secret
-  imagePullSecrets: []
-
-  externalSecret:
-    create: false
-    name: ''
-    annotations: {}
-
-  nameOverride: 'jan-server'
-  fullnameOverride: 'jan-server'
-
-  serviceAccount:
-    create: true
-    annotations: {}
-    name: 'jan-server-service-account'
-
-  podDisruptionBudget:
-    create: false
-    minAvailable: 1
-
-  workloads:
-    - name: server
-      image:
-        repository: ghcr.io/janhq/jan-server
-        pullPolicy: Always
-
-      command: ['/bin/sh', '-c']
-      args: ['cd server && node build/main.js']
-
-      replicaCount: 1
-      ports:
-        containerPort: 1337
-
-      strategy:
-        canary:
-          steps:
-            - setWeight: 50
-            - pause: { duration: 1m }
-
-      ingress:
-        enabled: true
-        className: 'nginx'
-        annotations:
-          nginx.ingress.kubernetes.io/proxy-body-size: '100m'
-          nginx.ingress.kubernetes.io/proxy-read-timeout: '1800'
-          nginx.ingress.kubernetes.io/proxy-send-timeout: '1800'
-          # cert-manager.io/cluster-issuer: 'jan-ai-dns01-cluster-issuer'
-          # nginx.ingress.kubernetes.io/force-ssl-redirect: 'true'
-          nginx.ingress.kubernetes.io/backend-protocol: HTTP
-        hosts:
-          - host: server.local
-            paths:
-              - path: /
-                pathType: Prefix
-        tls:
-          []
-          # - hosts:
-          #     - server-dev.jan.ai
-          #   secretName: jan-server-prod-tls-v2
-
-      instrumentation:
-        enabled: false
-      podAnnotations: {}
-
-      podSecurityContext: {}
-
-      securityContext: {}
-
-      service:
-        externalLabel: {}
-        type: ClusterIP
-        port: 1337
-        targetPort: 1337
-
-      # If you want to use GPU, please uncomment the following lines and change imageTag to the one with GPU support
-      resources:
-        # limits:
-        #   nvidia.com/gpu: 1
-        requests:
-          cpu: 2000m
-          memory: 8192M
-
-      # If you want to use pv, please uncomment the following lines and enable pvc.enabled
-      volumes:
-        []
-        # - name: janroot
-        #   persistentVolumeClaim:
-        #     claimName: janroot
-
-      volumeMounts:
-        []
-        # - name: janroot
-        #   mountPath: /app/server/build/jan
-
-      # AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, S3_BUCKET_NAME, AWS_ENDPOINT, AWS_REGION should mount as a secret env instead of plain text here
-      # Change API_BASE_URL to your server's public domain
-      env:
-        - name: API_BASE_URL
-          value: 'http://server.local'
-
-      lifecycle: {}
-      autoscaling:
-        enabled: false
-        minReplicas: 2
-        maxReplicas: 3
-        targetCPUUtilizationPercentage: 95
-        targetMemoryUtilizationPercentage: 95
-
-      kedaScaling:
-        enabled: false # ignore if autoscaling.enable = true
-        cooldownPeriod: 30
-        pollingInterval: 2
-        minReplicas: 1
-        maxReplicas: 5
-        metricName: celery_queue_length
-        query: celery_queue_length{queue_name="myqueue"} # change queue_name here
-        serverAddress: http://prometheus-prod-kube-prome-prometheus.monitoring.svc:9090
-        threshold: '3'
-
-      nodeSelector: {}
-
-      tolerations: []
-
-      podSecurityGroup:
-        enabled: false
-        securitygroupid: []
-
-      # Reloader Option
-      reloader: 'false'
-      vpa:
-        enabled: false
-
-    - name: web
-      image:
-        repository: ghcr.io/janhq/jan-server
-        pullPolicy: Always
-
-      command: ['/bin/sh', '-c']
-      args:
-        [
-          'export NODE_ENV=production && yarn workspace @janhq/web build && cd web && npx serve out',
-        ]
-
-      replicaCount: 1
-      ports:
-        containerPort: 3000
-
-      strategy:
-        canary:
-          steps:
-            - setWeight: 50
-            - pause: { duration: 1m }
-
-      ingress:
-        enabled: true
-        className: 'nginx'
-        annotations:
-          nginx.ingress.kubernetes.io/proxy-body-size: '100m'
-          nginx.ingress.kubernetes.io/proxy-read-timeout: '1800'
-          nginx.ingress.kubernetes.io/proxy-send-timeout: '1800'
-          # cert-manager.io/cluster-issuer: 'jan-ai-dns01-cluster-issuer'
-          # nginx.ingress.kubernetes.io/force-ssl-redirect: 'true'
-          nginx.ingress.kubernetes.io/backend-protocol: HTTP
-        hosts:
-          - host: web.local
-            paths:
-              - path: /
-                pathType: Prefix
-        tls:
-          []
-          # - hosts:
-          #     - server-dev.jan.ai
-          #   secretName: jan-server-prod-tls-v2
-
-      instrumentation:
-        enabled: false
-      podAnnotations: {}
-
-      podSecurityContext: {}
-
-      securityContext: {}
-
-      service:
-        externalLabel: {}
-        type: ClusterIP
-        port: 3000
-        targetPort: 3000
-
-      resources:
-        limits:
-          cpu: 1000m
-          memory: 2048M
-        requests:
-          cpu: 50m
-          memory: 500M
-
-      volumes:
-        []
-        # - name: janroot
-        #   persistentVolumeClaim:
-        #     claimName: janroot
-
-      volumeMounts:
-        []
-        # - name: janroot
-        #   mountPath: /app/server/build/jan
-
-      # AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, S3_BUCKET_NAME, AWS_ENDPOINT, AWS_REGION should mount as a secret env instead of plain text here
-      # Change API_BASE_URL to your server's public domain
-      env:
-        - name: API_BASE_URL
-          value: 'http://server.local'
-
-      lifecycle: {}
-      autoscaling:
-        enabled: true
-        minReplicas: 1
-        maxReplicas: 3
-        targetCPUUtilizationPercentage: 95
-        targetMemoryUtilizationPercentage: 95
-
-      kedaScaling:
-        enabled: false # ignore if autoscaling.enable = true
-        cooldownPeriod: 30
-        pollingInterval: 2
-        minReplicas: 1
-        maxReplicas: 5
-        metricName: celery_queue_length
-        query: celery_queue_length{queue_name="myqueue"} # change queue_name here
-        serverAddress: http://prometheus-prod-kube-prome-prometheus.monitoring.svc:9090
-        threshold: '3'
-
-      nodeSelector: {}
-
-      tolerations: []
-
-      podSecurityGroup:
-        enabled: false
-        securitygroupid: []
-
-      # Reloader Option
-      reloader: 'false'
-      vpa:
-        enabled: false
diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
deleted file mode 100644
index 2e09d641b..000000000
--- a/docker-compose-dev.yml
+++ /dev/null
@@ -1,171 +0,0 @@
-# Docker Compose file for setting up Minio, createbuckets, app_cpu, and app_gpu services
-
-version: '3.7'
-
-services:
-  # Minio service for object storage
-  minio:
-    image: minio/minio
-    volumes:
-      - minio_data:/data
-    ports:
-      - '9000:9000'
-      - '9001:9001'
-    environment:
-      # Set the root user and password for Minio
-      MINIO_ROOT_USER: minioadmin # This acts as AWS_ACCESS_KEY
-      MINIO_ROOT_PASSWORD: minioadmin # This acts as AWS_SECRET_ACCESS_KEY
-    command: server --console-address ":9001" /data
-    restart: always
-    healthcheck:
-      test: ['CMD', 'curl', '-f', 'http://localhost:9000/minio/health/live']
-      interval: 30s
-      timeout: 20s
-      retries: 3
-    networks:
-      vpcbr:
-        ipv4_address: 10.5.0.2
-
-  # createbuckets service to create a bucket and set its policy
-  createbuckets:
-    image: minio/mc
-    depends_on:
-      - minio
-    entrypoint: >
-      /bin/sh -c "
-      /usr/bin/mc alias set myminio http://minio:9000 minioadmin minioadmin;
-      /usr/bin/mc mb myminio/mybucket;
-      /usr/bin/mc policy set public myminio/mybucket;
-      exit 0;
-      "
-    networks:
-      vpcbr:
-
-  # app_cpu service for running the CPU version of the application
-  app_cpu_s3fs:
-    image: jan:latest
-    volumes:
-      - app_data_cpu_s3fs:/app/server/build/jan
-    build:
-      context: .
-      dockerfile: Dockerfile
-    environment:
-      # Set the AWS access key, secret access key, bucket name, endpoint, and region for app_cpu
-      AWS_ACCESS_KEY_ID: minioadmin
-      AWS_SECRET_ACCESS_KEY: minioadmin
-      S3_BUCKET_NAME: mybucket
-      AWS_ENDPOINT: http://10.5.0.2:9000
-      AWS_REGION: us-east-1
-      API_BASE_URL: http://localhost:1337
-    restart: always
-    profiles:
-      - cpu-s3fs
-    ports:
-      - '3000:3000'
-      - '1337:1337'
-      - '3928:3928'
-    networks:
-      vpcbr:
-        ipv4_address: 10.5.0.3
-
-  # app_gpu service for running the GPU version of the application
-  app_gpu_s3fs:
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    image: jan-gpu:latest
-    volumes:
-      - app_data_gpu_s3fs:/app/server/build/jan
-    build:
-      context: .
-      dockerfile: Dockerfile.gpu
-    restart: always
-    environment:
-      # Set the AWS access key, secret access key, bucket name, endpoint, and region for app_gpu
-      AWS_ACCESS_KEY_ID: minioadmin
-      AWS_SECRET_ACCESS_KEY: minioadmin
-      S3_BUCKET_NAME: mybucket
-      AWS_ENDPOINT: http://10.5.0.2:9000
-      AWS_REGION: us-east-1
-      API_BASE_URL: http://localhost:1337
-    profiles:
-      - gpu-s3fs
-    ports:
-      - '3000:3000'
-      - '1337:1337'
-      - '3928:3928'
-    networks:
-      vpcbr:
-        ipv4_address: 10.5.0.4
-
-  app_cpu_fs:
-    image: jan:latest
-    volumes:
-      - app_data_cpu_fs:/app/server/build/jan
-    build:
-      context: .
-      dockerfile: Dockerfile
-    environment:
-      API_BASE_URL: http://localhost:1337
-    restart: always
-    profiles:
-      - cpu-fs
-    ports:
-      - '3000:3000'
-      - '1337:1337'
-      - '3928:3928'
-    networks:
-      vpcbr:
-        ipv4_address: 10.5.0.5
-
-  # app_gpu service for running the GPU version of the application
-  app_gpu_fs:
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    image: jan-gpu:latest
-    volumes:
-      - app_data_gpu_fs:/app/server/build/jan
-    build:
-      context: .
-      dockerfile: Dockerfile.gpu
-    restart: always
-    environment:
-      API_BASE_URL: http://localhost:1337
-    profiles:
-      - gpu-fs
-    ports:
-      - '3000:3000'
-      - '1337:1337'
-      - '3928:3928'
-    networks:
-      vpcbr:
-        ipv4_address: 10.5.0.6
-
-volumes:
-  minio_data:
-  app_data_cpu_s3fs:
-  app_data_gpu_s3fs:
-  app_data_cpu_fs:
-  app_data_gpu_fs:
-
-networks:
-  vpcbr:
-    driver: bridge
-    ipam:
-      config:
-        - subnet: 10.5.0.0/16
-          gateway: 10.5.0.1
-# Usage:
-# - Run 'docker compose -f docker-compose-dev.yml --profile cpu-s3fs up -d' to start the app_cpu service
-# - Run 'docker compose -f docker-compose-dev.yml --profile gpu-s3fs up -d' to start the app_gpu service
-# - Run 'docker compose -f docker-compose-dev.yml --profile cpu-fs up -d' to start the app_cpu service
-# - Run 'docker compose -f docker-compose-dev.yml --profile gpu-fs up -d' to start the app_gpu service
diff --git a/docker-compose.yml b/docker-compose.yml
deleted file mode 100644
index 1e5660c12..000000000
--- a/docker-compose.yml
+++ /dev/null
@@ -1,159 +0,0 @@
-# Docker Compose file for setting up Minio, createbuckets, app_cpu, and app_gpu services
-
-version: '3.7'
-
-services:
-  # Minio service for object storage
-  minio:
-    image: minio/minio
-    volumes:
-      - minio_data:/data
-    ports:
-      - '9000:9000'
-      - '9001:9001'
-    environment:
-      # Set the root user and password for Minio
-      MINIO_ROOT_USER: minioadmin # This acts as AWS_ACCESS_KEY
-      MINIO_ROOT_PASSWORD: minioadmin # This acts as AWS_SECRET_ACCESS_KEY
-    command: server --console-address ":9001" /data
-    restart: always
-    healthcheck:
-      test: ['CMD', 'curl', '-f', 'http://localhost:9000/minio/health/live']
-      interval: 30s
-      timeout: 20s
-      retries: 3
-    networks:
-      vpcbr:
-        ipv4_address: 10.5.0.2
-
-  # createbuckets service to create a bucket and set its policy
-  createbuckets:
-    image: minio/mc
-    depends_on:
-      - minio
-    entrypoint: >
-      /bin/sh -c "
-      /usr/bin/mc alias set myminio http://minio:9000 minioadmin minioadmin;
-      /usr/bin/mc mb myminio/mybucket;
-      /usr/bin/mc policy set public myminio/mybucket;
-      exit 0;
-      "
-    networks:
-      vpcbr:
-
-  # app_cpu service for running the CPU version of the application
-  app_cpu_s3fs:
-    volumes:
-      - app_data_cpu_s3fs:/app/server/build/jan
-    image: ghcr.io/janhq/jan-server:dev-cpu-latest
-    environment:
-      # Set the AWS access key, secret access key, bucket name, endpoint, and region for app_cpu
-      AWS_ACCESS_KEY_ID: minioadmin
-      AWS_SECRET_ACCESS_KEY: minioadmin
-      S3_BUCKET_NAME: mybucket
-      AWS_ENDPOINT: http://10.5.0.2:9000
-      AWS_REGION: us-east-1
-      API_BASE_URL: http://localhost:1337
-    restart: always
-    profiles:
-      - cpu-s3fs
-    ports:
-      - '3000:3000'
-      - '1337:1337'
-      - '3928:3928'
-    networks:
-      vpcbr:
-        ipv4_address: 10.5.0.3
-
-  # app_gpu service for running the GPU version of the application
-  app_gpu_s3fs:
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    image: ghcr.io/janhq/jan-server:dev-cuda-12.2-latest
-    volumes:
-      - app_data_gpu_s3fs:/app/server/build/jan
-    restart: always
-    environment:
-      # Set the AWS access key, secret access key, bucket name, endpoint, and region for app_gpu
-      AWS_ACCESS_KEY_ID: minioadmin
-      AWS_SECRET_ACCESS_KEY: minioadmin
-      S3_BUCKET_NAME: mybucket
-      AWS_ENDPOINT: http://10.5.0.2:9000
-      AWS_REGION: us-east-1
-      API_BASE_URL: http://localhost:1337
-    profiles:
-      - gpu-s3fs
-    ports:
-      - '3000:3000'
-      - '1337:1337'
-      - '3928:3928'
-    networks:
-      vpcbr:
-        ipv4_address: 10.5.0.4
-
-  app_cpu_fs:
-    image: ghcr.io/janhq/jan-server:dev-cpu-latest
-    volumes:
-      - app_data_cpu_fs:/app/server/build/jan
-    environment:
-      API_BASE_URL: http://localhost:1337
-    restart: always
-    profiles:
-      - cpu-fs
-    ports:
-      - '3000:3000'
-      - '1337:1337'
-      - '3928:3928'
-    networks:
-      vpcbr:
-        ipv4_address: 10.5.0.5
-
-  # app_gpu service for running the GPU version of the application
-  app_gpu_fs:
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    image: ghcr.io/janhq/jan-server:dev-cuda-12.2-latest
-    volumes:
-      - app_data_gpu_fs:/app/server/build/jan
-    restart: always
-    environment:
-      API_BASE_URL: http://localhost:1337
-    profiles:
-      - gpu-fs
-    ports:
-      - '3000:3000'
-      - '1337:1337'
-      - '3928:3928'
-    networks:
-      vpcbr:
-        ipv4_address: 10.5.0.6
-
-volumes:
-  minio_data:
-  app_data_cpu_s3fs:
-  app_data_gpu_s3fs:
-  app_data_cpu_fs:
-  app_data_gpu_fs:
-
-networks:
-  vpcbr:
-    driver: bridge
-    ipam:
-      config:
-        - subnet: 10.5.0.0/16
-          gateway: 10.5.0.1
-# Usage:
-# - Run 'docker compose --profile cpu-s3fs up -d' to start the app_cpu service
-# - Run 'docker compose --profile gpu-s3fs up -d' to start the app_gpu service
-# - Run 'docker compose --profile cpu-fs up -d' to start the app_cpu service
-# - Run 'docker compose --profile gpu-fs up -d' to start the app_gpu service