diff --git a/.github/workflows/jan-server-build-nightly.yml b/.github/workflows/jan-server-build-nightly.yml new file mode 100644 index 000000000..0d1bc3ca8 --- /dev/null +++ b/.github/workflows/jan-server-build-nightly.yml @@ -0,0 +1,40 @@ +name: Jan Build Docker Nightly or Manual + +on: + push: + branches: + - main + - feature/helmchart-and-ci-jan-server + paths-ignore: + - 'README.md' + - 'docs/**' + schedule: + - cron: '0 20 * * 1,2,3' # At 8 PM UTC on Monday, Tuesday, and Wednesday which is 3 AM UTC+7 Tuesday, Wednesday, and Thursday + workflow_dispatch: + +jobs: + # Job create Update app version based on latest release tag with build number and save to output + get-update-version: + uses: ./.github/workflows/template-get-update-version.yml + + build-cpu: + uses: ./.github/workflows/template-build-jan-server.yml + permissions: + packages: write + secrets: inherit + needs: [get-update-version] + with: + dockerfile_path: ./Dockerfile + docker_image_tag: "ghcr.io/janhq/jan-server:dev-cpu-latest,ghcr.io/janhq/jan-server:dev-cpu-${{ needs.get-update-version.outputs.new_version }}" + + build-gpu: + uses: ./.github/workflows/template-build-jan-server.yml + permissions: + packages: write + secrets: inherit + needs: [get-update-version] + with: + dockerfile_path: ./Dockerfile.gpu + docker_image_tag: "ghcr.io/janhq/jan-server:dev-cuda-12.2-latest,ghcr.io/janhq/jan-server:dev-cuda-12.2-${{ needs.get-update-version.outputs.new_version }}" + + diff --git a/.github/workflows/jan-server-build.yml b/.github/workflows/jan-server-build.yml new file mode 100644 index 000000000..0665838d6 --- /dev/null +++ b/.github/workflows/jan-server-build.yml @@ -0,0 +1,30 @@ +name: Jan Build Docker + +on: + push: + tags: ["v[0-9]+.[0-9]+.[0-9]+"] + +jobs: + # Job create Update app version based on latest release tag with build number and save to output + get-update-version: + uses: ./.github/workflows/template-get-update-version.yml + + build-cpu: + permissions: + packages: write + uses: ./.github/workflows/template-build-jan-server.yml + secrets: inherit + needs: [get-update-version] + with: + dockerfile_path: ./Dockerfile + docker_image_tag: "ghcr.io/janhq/jan-server:cpu-latest,ghcr.io/janhq/jan-server:cpu-${{ needs.get-update-version.outputs.new_version }}" + + build-gpu: + permissions: + packages: write + uses: ./.github/workflows/template-build-jan-server.yml + secrets: inherit + needs: [get-update-version] + with: + dockerfile_path: ./Dockerfile.gpu + docker_image_tag: "ghcr.io/janhq/jan-server:cuda-12.2-latest,ghcr.io/janhq/jan-server:cuda-12.2-${{ needs.get-update-version.outputs.new_version }}" diff --git a/.github/workflows/template-build-jan-server.yml b/.github/workflows/template-build-jan-server.yml new file mode 100644 index 000000000..9bb772605 --- /dev/null +++ b/.github/workflows/template-build-jan-server.yml @@ -0,0 +1,39 @@ +name: build-jan-server +on: + workflow_call: + inputs: + dockerfile_path: + required: false + type: string + default: './Dockerfile' + docker_image_tag: + required: true + type: string + default: 'ghcr.io/janhq/jan-server:dev-latest' + +jobs: + build: + runs-on: ubuntu-latest + env: + REGISTRY: ghcr.io + IMAGE_NAME: janhq/jan-server + permissions: + packages: write + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push Docker image + uses: docker/build-push-action@v3 + with: + context: . + file: ${{ inputs.dockerfile_path }} + push: true + tags: ${{ inputs.docker_image_tag }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 4540e5c7a..24748e08c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ error.log node_modules *.tgz +!charts/server/charts/*.tgz yarn.lock dist build diff --git a/README.md b/README.md index ed3d18985..29a9e65d9 100644 --- a/README.md +++ b/README.md @@ -76,31 +76,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
jan.exe
Intel
M1/M2
jan.deb
jan.AppImage
diff --git a/charts/server/Chart.lock b/charts/server/Chart.lock
new file mode 100644
index 000000000..915788d61
--- /dev/null
+++ b/charts/server/Chart.lock
@@ -0,0 +1,6 @@
+dependencies:
+- name: common
+ repository: oci://ghcr.io/janhq/charts
+ version: 0.1.2
+digest: sha256:35e98bde174130787755b0f8ea2359b7b6790d965a7157c2f7cabf1bc8c04471
+generated: "2024-02-20T16:20:37.6530108+07:00"
diff --git a/charts/server/Chart.yaml b/charts/server/Chart.yaml
new file mode 100644
index 000000000..fb2e1c91b
--- /dev/null
+++ b/charts/server/Chart.yaml
@@ -0,0 +1,10 @@
+apiVersion: v2
+name: jan-server
+description: A Helm chart for Kubernetes
+type: application
+version: 0.1.0
+appVersion: '1.0.0'
+dependencies:
+ - name: common
+ version: 0.1.2 # common-chart-version
+ repository: oci://ghcr.io/janhq/charts
diff --git a/charts/server/charts/common-0.1.2.tgz b/charts/server/charts/common-0.1.2.tgz
new file mode 100644
index 000000000..946617eab
Binary files /dev/null and b/charts/server/charts/common-0.1.2.tgz differ
diff --git a/charts/server/config.json b/charts/server/config.json
new file mode 100644
index 000000000..62e9682fa
--- /dev/null
+++ b/charts/server/config.json
@@ -0,0 +1,4 @@
+{
+ "image-list": "server=ghcr.io/janhq/jan-server",
+ "platforms": "linux/amd64"
+}
\ No newline at end of file
diff --git a/charts/server/values.yaml b/charts/server/values.yaml
new file mode 100644
index 000000000..70f463174
--- /dev/null
+++ b/charts/server/values.yaml
@@ -0,0 +1,256 @@
+common:
+ imageTag: v0.4.6-cpu
+ # DO NOT CHANGE THE LINE ABOVE. MAKE ALL CHANGES BELOW
+
+ # Global pvc for all workload
+ pvc:
+ enabled: false
+ name: 'janroot'
+ accessModes: 'ReadWriteOnce'
+ storageClassName: ''
+ capacity: '50Gi'
+
+ # Global image pull secret
+ imagePullSecrets: []
+
+ externalSecret:
+ create: false
+ name: ''
+ annotations: {}
+
+ nameOverride: 'jan-server'
+ fullnameOverride: 'jan-server'
+
+ serviceAccount:
+ create: true
+ annotations: {}
+ name: 'jan-server-service-account'
+
+ podDisruptionBudget:
+ create: false
+ minAvailable: 1
+
+ workloads:
+ - name: server
+ image:
+ repository: ghcr.io/janhq/jan-server
+ pullPolicy: Always
+
+ command: ['/bin/sh', '-c']
+ args: ['cd server && node build/main.js']
+
+ replicaCount: 1
+ ports:
+ containerPort: 1337
+
+ strategy:
+ canary:
+ steps:
+ - setWeight: 50
+ - pause: { duration: 1m }
+
+ ingress:
+ enabled: true
+ className: 'nginx'
+ annotations:
+ nginx.ingress.kubernetes.io/proxy-body-size: '100m'
+ nginx.ingress.kubernetes.io/proxy-read-timeout: '1800'
+ nginx.ingress.kubernetes.io/proxy-send-timeout: '1800'
+ # cert-manager.io/cluster-issuer: 'jan-ai-dns01-cluster-issuer'
+ # nginx.ingress.kubernetes.io/force-ssl-redirect: 'true'
+ nginx.ingress.kubernetes.io/backend-protocol: HTTP
+ hosts:
+ - host: server.local
+ paths:
+ - path: /
+ pathType: Prefix
+ tls:
+ []
+ # - hosts:
+ # - server-dev.jan.ai
+ # secretName: jan-server-prod-tls-v2
+
+ instrumentation:
+ enabled: false
+ podAnnotations: {}
+
+ podSecurityContext: {}
+
+ securityContext: {}
+
+ service:
+ extenalLabel: {}
+ type: ClusterIP
+ port: 1337
+ targetPort: 1337
+
+ # If you want to use GPU, please uncomment the following lines and change imageTag to the one with GPU support
+ resources:
+ # limits:
+ # nvidia.com/gpu: 1
+ requests:
+ cpu: 2000m
+ memory: 8192M
+
+ # If you want to use pv, please uncomment the following lines and enable pvc.enabled
+ volumes:
+ []
+ # - name: janroot
+ # persistentVolumeClaim:
+ # claimName: janroot
+
+ volumeMounts:
+ []
+ # - name: janroot
+ # mountPath: /app/server/build/jan
+
+ # AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, S3_BUCKET_NAME, AWS_ENDPOINT, AWS_REGION should mount as a secret env instead of plain text here
+ # Change API_BASE_URL to your server's public domain
+ env:
+ - name: API_BASE_URL
+ value: 'http://server.local'
+
+ lifecycle: {}
+ autoscaling:
+ enabled: false
+ minReplicas: 2
+ maxReplicas: 3
+ targetCPUUtilizationPercentage: 95
+ targetMemoryUtilizationPercentage: 95
+
+ kedaScaling:
+ enabled: false # ignore if autoscaling.enable = true
+ cooldownPeriod: 30
+ pollingInterval: 2
+ minReplicas: 1
+ maxReplicas: 5
+ metricName: celery_queue_length
+ query: celery_queue_length{queue_name="myqueue"} # change queue_name here
+ serverAddress: http://prometheus-prod-kube-prome-prometheus.monitoring.svc:9090
+ threshold: '3'
+
+ nodeSelector: {}
+
+ tolerations: []
+
+ podSecurityGroup:
+ enabled: false
+ securitygroupid: []
+
+ # Reloader Option
+ reloader: 'false'
+ vpa:
+ enabled: false
+
+ - name: web
+ image:
+ repository: ghcr.io/janhq/jan-server
+ pullPolicy: Always
+
+ command: ['/bin/sh', '-c']
+ args:
+ [
+ 'export NODE_ENV=production && yarn workspace jan-web build && cd web && npx serve out',
+ ]
+
+ replicaCount: 1
+ ports:
+ containerPort: 3000
+
+ strategy:
+ canary:
+ steps:
+ - setWeight: 50
+ - pause: { duration: 1m }
+
+ ingress:
+ enabled: true
+ className: 'nginx'
+ annotations:
+ nginx.ingress.kubernetes.io/proxy-body-size: '100m'
+ nginx.ingress.kubernetes.io/proxy-read-timeout: '1800'
+ nginx.ingress.kubernetes.io/proxy-send-timeout: '1800'
+ # cert-manager.io/cluster-issuer: 'jan-ai-dns01-cluster-issuer'
+ # nginx.ingress.kubernetes.io/force-ssl-redirect: 'true'
+ nginx.ingress.kubernetes.io/backend-protocol: HTTP
+ hosts:
+ - host: web.local
+ paths:
+ - path: /
+ pathType: Prefix
+ tls:
+ []
+ # - hosts:
+ # - server-dev.jan.ai
+ # secretName: jan-server-prod-tls-v2
+
+ instrumentation:
+ enabled: false
+ podAnnotations: {}
+
+ podSecurityContext: {}
+
+ securityContext: {}
+
+ service:
+ extenalLabel: {}
+ type: ClusterIP
+ port: 3000
+ targetPort: 3000
+
+ resources:
+ limits:
+ cpu: 1000m
+ memory: 2048M
+ requests:
+ cpu: 50m
+ memory: 500M
+
+ volumes:
+ []
+ # - name: janroot
+ # persistentVolumeClaim:
+ # claimName: janroot
+
+ volumeMounts:
+ []
+ # - name: janroot
+ # mountPath: /app/server/build/jan
+
+ # AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, S3_BUCKET_NAME, AWS_ENDPOINT, AWS_REGION should mount as a secret env instead of plain text here
+ # Change API_BASE_URL to your server's public domain
+ env:
+ - name: API_BASE_URL
+ value: 'http://server.local'
+
+ lifecycle: {}
+ autoscaling:
+ enabled: true
+ minReplicas: 1
+ maxReplicas: 3
+ targetCPUUtilizationPercentage: 95
+ targetMemoryUtilizationPercentage: 95
+
+ kedaScaling:
+ enabled: false # ignore if autoscaling.enable = true
+ cooldownPeriod: 30
+ pollingInterval: 2
+ minReplicas: 1
+ maxReplicas: 5
+ metricName: celery_queue_length
+ query: celery_queue_length{queue_name="myqueue"} # change queue_name here
+ serverAddress: http://prometheus-prod-kube-prome-prometheus.monitoring.svc:9090
+ threshold: '3'
+
+ nodeSelector: {}
+
+ tolerations: []
+
+ podSecurityGroup:
+ enabled: false
+ securitygroupid: []
+
+ # Reloader Option
+ reloader: 'false'
+ vpa:
+ enabled: false
diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index cb9b88bed..979b4cfac 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -154,7 +154,10 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
})
if (nitroInitResult?.error) {
- events.emit(ModelEvent.OnModelFail, model)
+ events.emit(ModelEvent.OnModelFail, {
+ ...model,
+ error: nitroInitResult.error,
+ })
return
}
diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts
index 25f571c81..32a12cf8a 100644
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@@ -310,9 +310,15 @@ async function killSubprocess(): Promise{`Apologies, something’s amiss!`}
-
- Jan’s in beta. Access
- setModalTroubleShooting(true)}
+ <>
+ {loadModelError === PORT_NOT_AVAILABLE ? (
+
+ Port 3928 is currently unavailable. Check for conflicting apps,
+ or access
+ setModalTroubleShooting(true)}
+ >
+ troubleshooting assistance
+
+ for further support.
+
{`Apologies, something’s amiss!`}
++ Jan’s in beta. Access + setModalTroubleShooting(true)} + > + troubleshooting assistance + + now. +
+