diff --git a/.github/workflows/jan-server-build-nightly.yml b/.github/workflows/jan-server-build-nightly.yml new file mode 100644 index 000000000..0d1bc3ca8 --- /dev/null +++ b/.github/workflows/jan-server-build-nightly.yml @@ -0,0 +1,40 @@ +name: Jan Build Docker Nightly or Manual + +on: + push: + branches: + - main + - feature/helmchart-and-ci-jan-server + paths-ignore: + - 'README.md' + - 'docs/**' + schedule: + - cron: '0 20 * * 1,2,3' # At 8 PM UTC on Monday, Tuesday, and Wednesday which is 3 AM UTC+7 Tuesday, Wednesday, and Thursday + workflow_dispatch: + +jobs: + # Job create Update app version based on latest release tag with build number and save to output + get-update-version: + uses: ./.github/workflows/template-get-update-version.yml + + build-cpu: + uses: ./.github/workflows/template-build-jan-server.yml + permissions: + packages: write + secrets: inherit + needs: [get-update-version] + with: + dockerfile_path: ./Dockerfile + docker_image_tag: "ghcr.io/janhq/jan-server:dev-cpu-latest,ghcr.io/janhq/jan-server:dev-cpu-${{ needs.get-update-version.outputs.new_version }}" + + build-gpu: + uses: ./.github/workflows/template-build-jan-server.yml + permissions: + packages: write + secrets: inherit + needs: [get-update-version] + with: + dockerfile_path: ./Dockerfile.gpu + docker_image_tag: "ghcr.io/janhq/jan-server:dev-cuda-12.2-latest,ghcr.io/janhq/jan-server:dev-cuda-12.2-${{ needs.get-update-version.outputs.new_version }}" + + diff --git a/.github/workflows/jan-server-build.yml b/.github/workflows/jan-server-build.yml new file mode 100644 index 000000000..0665838d6 --- /dev/null +++ b/.github/workflows/jan-server-build.yml @@ -0,0 +1,30 @@ +name: Jan Build Docker + +on: + push: + tags: ["v[0-9]+.[0-9]+.[0-9]+"] + +jobs: + # Job create Update app version based on latest release tag with build number and save to output + get-update-version: + uses: ./.github/workflows/template-get-update-version.yml + + build-cpu: + permissions: + packages: write + uses: ./.github/workflows/template-build-jan-server.yml + secrets: inherit + needs: [get-update-version] + with: + dockerfile_path: ./Dockerfile + docker_image_tag: "ghcr.io/janhq/jan-server:cpu-latest,ghcr.io/janhq/jan-server:cpu-${{ needs.get-update-version.outputs.new_version }}" + + build-gpu: + permissions: + packages: write + uses: ./.github/workflows/template-build-jan-server.yml + secrets: inherit + needs: [get-update-version] + with: + dockerfile_path: ./Dockerfile.gpu + docker_image_tag: "ghcr.io/janhq/jan-server:cuda-12.2-latest,ghcr.io/janhq/jan-server:cuda-12.2-${{ needs.get-update-version.outputs.new_version }}" diff --git a/.github/workflows/template-build-jan-server.yml b/.github/workflows/template-build-jan-server.yml new file mode 100644 index 000000000..9bb772605 --- /dev/null +++ b/.github/workflows/template-build-jan-server.yml @@ -0,0 +1,39 @@ +name: build-jan-server +on: + workflow_call: + inputs: + dockerfile_path: + required: false + type: string + default: './Dockerfile' + docker_image_tag: + required: true + type: string + default: 'ghcr.io/janhq/jan-server:dev-latest' + +jobs: + build: + runs-on: ubuntu-latest + env: + REGISTRY: ghcr.io + IMAGE_NAME: janhq/jan-server + permissions: + packages: write + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push Docker image + uses: docker/build-push-action@v3 + with: + context: . + file: ${{ inputs.dockerfile_path }} + push: true + tags: ${{ inputs.docker_image_tag }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 4540e5c7a..24748e08c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ error.log node_modules *.tgz +!charts/server/charts/*.tgz yarn.lock dist build diff --git a/README.md b/README.md index ed3d18985..29a9e65d9 100644 --- a/README.md +++ b/README.md @@ -76,31 +76,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute Experimental (Nightly Build) - + jan.exe - + Intel - + M1/M2 - + jan.deb - + jan.AppImage diff --git a/charts/server/Chart.lock b/charts/server/Chart.lock new file mode 100644 index 000000000..915788d61 --- /dev/null +++ b/charts/server/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: common + repository: oci://ghcr.io/janhq/charts + version: 0.1.2 +digest: sha256:35e98bde174130787755b0f8ea2359b7b6790d965a7157c2f7cabf1bc8c04471 +generated: "2024-02-20T16:20:37.6530108+07:00" diff --git a/charts/server/Chart.yaml b/charts/server/Chart.yaml new file mode 100644 index 000000000..fb2e1c91b --- /dev/null +++ b/charts/server/Chart.yaml @@ -0,0 +1,10 @@ +apiVersion: v2 +name: jan-server +description: A Helm chart for Kubernetes +type: application +version: 0.1.0 +appVersion: '1.0.0' +dependencies: + - name: common + version: 0.1.2 # common-chart-version + repository: oci://ghcr.io/janhq/charts diff --git a/charts/server/charts/common-0.1.2.tgz b/charts/server/charts/common-0.1.2.tgz new file mode 100644 index 000000000..946617eab Binary files /dev/null and b/charts/server/charts/common-0.1.2.tgz differ diff --git a/charts/server/config.json b/charts/server/config.json new file mode 100644 index 000000000..62e9682fa --- /dev/null +++ b/charts/server/config.json @@ -0,0 +1,4 @@ +{ + "image-list": "server=ghcr.io/janhq/jan-server", + "platforms": "linux/amd64" +} \ No newline at end of file diff --git a/charts/server/values.yaml b/charts/server/values.yaml new file mode 100644 index 000000000..70f463174 --- /dev/null +++ b/charts/server/values.yaml @@ -0,0 +1,256 @@ +common: + imageTag: v0.4.6-cpu + # DO NOT CHANGE THE LINE ABOVE. MAKE ALL CHANGES BELOW + + # Global pvc for all workload + pvc: + enabled: false + name: 'janroot' + accessModes: 'ReadWriteOnce' + storageClassName: '' + capacity: '50Gi' + + # Global image pull secret + imagePullSecrets: [] + + externalSecret: + create: false + name: '' + annotations: {} + + nameOverride: 'jan-server' + fullnameOverride: 'jan-server' + + serviceAccount: + create: true + annotations: {} + name: 'jan-server-service-account' + + podDisruptionBudget: + create: false + minAvailable: 1 + + workloads: + - name: server + image: + repository: ghcr.io/janhq/jan-server + pullPolicy: Always + + command: ['/bin/sh', '-c'] + args: ['cd server && node build/main.js'] + + replicaCount: 1 + ports: + containerPort: 1337 + + strategy: + canary: + steps: + - setWeight: 50 + - pause: { duration: 1m } + + ingress: + enabled: true + className: 'nginx' + annotations: + nginx.ingress.kubernetes.io/proxy-body-size: '100m' + nginx.ingress.kubernetes.io/proxy-read-timeout: '1800' + nginx.ingress.kubernetes.io/proxy-send-timeout: '1800' + # cert-manager.io/cluster-issuer: 'jan-ai-dns01-cluster-issuer' + # nginx.ingress.kubernetes.io/force-ssl-redirect: 'true' + nginx.ingress.kubernetes.io/backend-protocol: HTTP + hosts: + - host: server.local + paths: + - path: / + pathType: Prefix + tls: + [] + # - hosts: + # - server-dev.jan.ai + # secretName: jan-server-prod-tls-v2 + + instrumentation: + enabled: false + podAnnotations: {} + + podSecurityContext: {} + + securityContext: {} + + service: + extenalLabel: {} + type: ClusterIP + port: 1337 + targetPort: 1337 + + # If you want to use GPU, please uncomment the following lines and change imageTag to the one with GPU support + resources: + # limits: + # nvidia.com/gpu: 1 + requests: + cpu: 2000m + memory: 8192M + + # If you want to use pv, please uncomment the following lines and enable pvc.enabled + volumes: + [] + # - name: janroot + # persistentVolumeClaim: + # claimName: janroot + + volumeMounts: + [] + # - name: janroot + # mountPath: /app/server/build/jan + + # AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, S3_BUCKET_NAME, AWS_ENDPOINT, AWS_REGION should mount as a secret env instead of plain text here + # Change API_BASE_URL to your server's public domain + env: + - name: API_BASE_URL + value: 'http://server.local' + + lifecycle: {} + autoscaling: + enabled: false + minReplicas: 2 + maxReplicas: 3 + targetCPUUtilizationPercentage: 95 + targetMemoryUtilizationPercentage: 95 + + kedaScaling: + enabled: false # ignore if autoscaling.enable = true + cooldownPeriod: 30 + pollingInterval: 2 + minReplicas: 1 + maxReplicas: 5 + metricName: celery_queue_length + query: celery_queue_length{queue_name="myqueue"} # change queue_name here + serverAddress: http://prometheus-prod-kube-prome-prometheus.monitoring.svc:9090 + threshold: '3' + + nodeSelector: {} + + tolerations: [] + + podSecurityGroup: + enabled: false + securitygroupid: [] + + # Reloader Option + reloader: 'false' + vpa: + enabled: false + + - name: web + image: + repository: ghcr.io/janhq/jan-server + pullPolicy: Always + + command: ['/bin/sh', '-c'] + args: + [ + 'export NODE_ENV=production && yarn workspace jan-web build && cd web && npx serve out', + ] + + replicaCount: 1 + ports: + containerPort: 3000 + + strategy: + canary: + steps: + - setWeight: 50 + - pause: { duration: 1m } + + ingress: + enabled: true + className: 'nginx' + annotations: + nginx.ingress.kubernetes.io/proxy-body-size: '100m' + nginx.ingress.kubernetes.io/proxy-read-timeout: '1800' + nginx.ingress.kubernetes.io/proxy-send-timeout: '1800' + # cert-manager.io/cluster-issuer: 'jan-ai-dns01-cluster-issuer' + # nginx.ingress.kubernetes.io/force-ssl-redirect: 'true' + nginx.ingress.kubernetes.io/backend-protocol: HTTP + hosts: + - host: web.local + paths: + - path: / + pathType: Prefix + tls: + [] + # - hosts: + # - server-dev.jan.ai + # secretName: jan-server-prod-tls-v2 + + instrumentation: + enabled: false + podAnnotations: {} + + podSecurityContext: {} + + securityContext: {} + + service: + extenalLabel: {} + type: ClusterIP + port: 3000 + targetPort: 3000 + + resources: + limits: + cpu: 1000m + memory: 2048M + requests: + cpu: 50m + memory: 500M + + volumes: + [] + # - name: janroot + # persistentVolumeClaim: + # claimName: janroot + + volumeMounts: + [] + # - name: janroot + # mountPath: /app/server/build/jan + + # AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, S3_BUCKET_NAME, AWS_ENDPOINT, AWS_REGION should mount as a secret env instead of plain text here + # Change API_BASE_URL to your server's public domain + env: + - name: API_BASE_URL + value: 'http://server.local' + + lifecycle: {} + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 3 + targetCPUUtilizationPercentage: 95 + targetMemoryUtilizationPercentage: 95 + + kedaScaling: + enabled: false # ignore if autoscaling.enable = true + cooldownPeriod: 30 + pollingInterval: 2 + minReplicas: 1 + maxReplicas: 5 + metricName: celery_queue_length + query: celery_queue_length{queue_name="myqueue"} # change queue_name here + serverAddress: http://prometheus-prod-kube-prome-prometheus.monitoring.svc:9090 + threshold: '3' + + nodeSelector: {} + + tolerations: [] + + podSecurityGroup: + enabled: false + securitygroupid: [] + + # Reloader Option + reloader: 'false' + vpa: + enabled: false diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index cb9b88bed..979b4cfac 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -154,7 +154,10 @@ export default class JanInferenceNitroExtension extends InferenceExtension { }) if (nitroInitResult?.error) { - events.emit(ModelEvent.OnModelFail, model) + events.emit(ModelEvent.OnModelFail, { + ...model, + error: nitroInitResult.error, + }) return } diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts index 25f571c81..32a12cf8a 100644 --- a/extensions/inference-nitro-extension/src/node/index.ts +++ b/extensions/inference-nitro-extension/src/node/index.ts @@ -310,9 +310,15 @@ async function killSubprocess(): Promise { subprocess?.kill() subprocess = undefined }) - .catch(() => {}) + .catch(() => {}) // Do nothing with this attempt .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000)) .then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) + .catch((err) => { + log( + `[NITRO]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}` + ) + throw 'PORT_NOT_AVAILABLE' + }) } /** diff --git a/web/containers/DropdownListSidebar/index.tsx b/web/containers/DropdownListSidebar/index.tsx index 75bbe073c..191c7bcbe 100644 --- a/web/containers/DropdownListSidebar/index.tsx +++ b/web/containers/DropdownListSidebar/index.tsx @@ -195,7 +195,12 @@ const DropdownListSidebar = ({ - +
    {engineOptions.map((name, i) => { diff --git a/web/containers/Layout/BottomBar/index.tsx b/web/containers/Layout/BottomBar/index.tsx index d6a7c05d2..5d6b7e79c 100644 --- a/web/containers/Layout/BottomBar/index.tsx +++ b/web/containers/Layout/BottomBar/index.tsx @@ -152,7 +152,7 @@ const BottomBar = () => { {gpus.length > 0 && ( -
    +
    { - const errorMessage = `${res.error}` - console.error('Failed to load model: ' + errorMessage) + const errorMessage = res?.error ?? res + console.error('Failed to load model: ', errorMessage) setStateModel(() => ({ state: 'start', loading: false, diff --git a/web/screens/Chat/ErrorMessage/index.tsx b/web/screens/Chat/ErrorMessage/index.tsx index 56ea4847c..5aa0cd6ce 100644 --- a/web/screens/Chat/ErrorMessage/index.tsx +++ b/web/screens/Chat/ErrorMessage/index.tsx @@ -7,6 +7,7 @@ import ModalTroubleShooting, { modalTroubleShootingAtom, } from '@/containers/ModalTroubleShoot' +import { loadModelErrorAtom } from '@/hooks/useActiveModel' import useSendChatMessage from '@/hooks/useSendChatMessage' import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom' @@ -15,6 +16,8 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => { const messages = useAtomValue(getCurrentChatMessagesAtom) const { resendChatMessage } = useSendChatMessage() const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom) + const loadModelError = useAtomValue(loadModelErrorAtom) + const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE' const regenerateMessage = async () => { const lastMessageIndex = messages.length - 1 @@ -23,9 +26,9 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => { } return ( - <> +
    {message.status === MessageStatus.Stopped && ( -
    +
    Oops! The generation was interrupted. Let's give it another go! @@ -41,25 +44,47 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
    )} {message.status === MessageStatus.Error && ( -
    -

    {`Apologies, something’s amiss!`}

    -

    - Jan’s in beta. Access  - setModalTroubleShooting(true)} + <> + {loadModelError === PORT_NOT_AVAILABLE ? ( +

    - troubleshooting assistance - -  now. -

    - -
    +

    + Port 3928 is currently unavailable. Check for conflicting apps, + or access  + setModalTroubleShooting(true)} + > + troubleshooting assistance + +  for further support. +

    + +
    + ) : ( +
    +

    {`Apologies, something’s amiss!`}

    +

    + Jan’s in beta. Access  + setModalTroubleShooting(true)} + > + troubleshooting assistance + +  now. +

    + +
    + )} + )} - +
    ) } export default ErrorMessage diff --git a/web/screens/Chat/Sidebar/index.tsx b/web/screens/Chat/Sidebar/index.tsx index 8d4f2bb4c..7187c84b3 100644 --- a/web/screens/Chat/Sidebar/index.tsx +++ b/web/screens/Chat/Sidebar/index.tsx @@ -197,7 +197,7 @@ const Sidebar: React.FC = () => {
    {activeThread?.assistants[0]?.tools && componentDataAssistantSetting.length > 0 && ( - +