diff --git a/.github/workflows/jan-server-build-nightly.yml b/.github/workflows/jan-server-build-nightly.yml new file mode 100644 index 000000000..0d1bc3ca8 --- /dev/null +++ b/.github/workflows/jan-server-build-nightly.yml @@ -0,0 +1,40 @@ +name: Jan Build Docker Nightly or Manual + +on: + push: + branches: + - main + - feature/helmchart-and-ci-jan-server + paths-ignore: + - 'README.md' + - 'docs/**' + schedule: + - cron: '0 20 * * 1,2,3' # At 8 PM UTC on Monday, Tuesday, and Wednesday which is 3 AM UTC+7 Tuesday, Wednesday, and Thursday + workflow_dispatch: + +jobs: + # Job create Update app version based on latest release tag with build number and save to output + get-update-version: + uses: ./.github/workflows/template-get-update-version.yml + + build-cpu: + uses: ./.github/workflows/template-build-jan-server.yml + permissions: + packages: write + secrets: inherit + needs: [get-update-version] + with: + dockerfile_path: ./Dockerfile + docker_image_tag: "ghcr.io/janhq/jan-server:dev-cpu-latest,ghcr.io/janhq/jan-server:dev-cpu-${{ needs.get-update-version.outputs.new_version }}" + + build-gpu: + uses: ./.github/workflows/template-build-jan-server.yml + permissions: + packages: write + secrets: inherit + needs: [get-update-version] + with: + dockerfile_path: ./Dockerfile.gpu + docker_image_tag: "ghcr.io/janhq/jan-server:dev-cuda-12.2-latest,ghcr.io/janhq/jan-server:dev-cuda-12.2-${{ needs.get-update-version.outputs.new_version }}" + + diff --git a/.github/workflows/jan-server-build.yml b/.github/workflows/jan-server-build.yml new file mode 100644 index 000000000..0665838d6 --- /dev/null +++ b/.github/workflows/jan-server-build.yml @@ -0,0 +1,30 @@ +name: Jan Build Docker + +on: + push: + tags: ["v[0-9]+.[0-9]+.[0-9]+"] + +jobs: + # Job create Update app version based on latest release tag with build number and save to output + get-update-version: + uses: ./.github/workflows/template-get-update-version.yml + + build-cpu: + permissions: + packages: write + uses: ./.github/workflows/template-build-jan-server.yml + secrets: inherit + needs: [get-update-version] + with: + dockerfile_path: ./Dockerfile + docker_image_tag: "ghcr.io/janhq/jan-server:cpu-latest,ghcr.io/janhq/jan-server:cpu-${{ needs.get-update-version.outputs.new_version }}" + + build-gpu: + permissions: + packages: write + uses: ./.github/workflows/template-build-jan-server.yml + secrets: inherit + needs: [get-update-version] + with: + dockerfile_path: ./Dockerfile.gpu + docker_image_tag: "ghcr.io/janhq/jan-server:cuda-12.2-latest,ghcr.io/janhq/jan-server:cuda-12.2-${{ needs.get-update-version.outputs.new_version }}" diff --git a/.github/workflows/template-build-jan-server.yml b/.github/workflows/template-build-jan-server.yml new file mode 100644 index 000000000..9bb772605 --- /dev/null +++ b/.github/workflows/template-build-jan-server.yml @@ -0,0 +1,39 @@ +name: build-jan-server +on: + workflow_call: + inputs: + dockerfile_path: + required: false + type: string + default: './Dockerfile' + docker_image_tag: + required: true + type: string + default: 'ghcr.io/janhq/jan-server:dev-latest' + +jobs: + build: + runs-on: ubuntu-latest + env: + REGISTRY: ghcr.io + IMAGE_NAME: janhq/jan-server + permissions: + packages: write + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push Docker image + uses: docker/build-push-action@v3 + with: + context: . + file: ${{ inputs.dockerfile_path }} + push: true + tags: ${{ inputs.docker_image_tag }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 4540e5c7a..62878011e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ error.log node_modules *.tgz +!charts/server/charts/*.tgz yarn.lock dist build @@ -28,4 +29,5 @@ extensions/inference-nitro-extension/bin/*/*.exp extensions/inference-nitro-extension/bin/*/*.lib extensions/inference-nitro-extension/bin/saved-* extensions/inference-nitro-extension/bin/*.tar.gz - +extensions/inference-nitro-extension/bin/vulkaninfoSDK.exe +extensions/inference-nitro-extension/bin/vulkaninfo diff --git a/Dockerfile b/Dockerfile index 913a93a11..48b2d254f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,9 +14,6 @@ COPY . ./ RUN export NITRO_VERSION=$(cat extensions/inference-nitro-extension/bin/version.txt) && \ jq --arg nitroVersion $NITRO_VERSION '(.scripts."downloadnitro:linux" | gsub("\\${NITRO_VERSION}"; $nitroVersion)) | gsub("\r"; "")' extensions/inference-nitro-extension/package.json > /tmp/newcommand.txt && export NEW_COMMAND=$(sed 's/^"//;s/"$//' /tmp/newcommand.txt) && jq --arg newCommand "$NEW_COMMAND" '.scripts."downloadnitro:linux" = $newCommand' extensions/inference-nitro-extension/package.json > /tmp/package.json && mv /tmp/package.json extensions/inference-nitro-extension/package.json RUN make install-and-build -RUN yarn workspace jan-web install - -RUN export NODE_ENV=production && yarn workspace jan-web build # # 2. Rebuild the source code only when needed FROM base AS runner @@ -42,12 +39,13 @@ COPY --from=builder /app/docs/openapi ./docs/openapi/ COPY --from=builder /app/pre-install ./pre-install/ # Copy the package.json, yarn.lock, and output of web yarn space to leverage Docker cache -COPY --from=builder /app/web/out ./web/out/ -COPY --from=builder /app/web/.next ./web/.next/ -COPY --from=builder /app/web/package.json ./web/package.json -COPY --from=builder /app/web/yarn.lock ./web/yarn.lock +COPY --from=builder /app/uikit ./uikit/ +COPY --from=builder /app/web ./web/ COPY --from=builder /app/models ./models/ +RUN yarn workspace @janhq/uikit install && yarn workspace @janhq/uikit build +RUN yarn workspace jan-web install + RUN npm install -g serve@latest EXPOSE 1337 3000 3928 @@ -55,7 +53,9 @@ EXPOSE 1337 3000 3928 ENV JAN_API_HOST 0.0.0.0 ENV JAN_API_PORT 1337 -CMD ["sh", "-c", "cd server && node build/main.js & cd web && npx serve out"] +ENV API_BASE_URL http://localhost:1337 + +CMD ["sh", "-c", "export NODE_ENV=production && yarn workspace jan-web build && cd web && npx serve out & cd server && node build/main.js"] # docker build -t jan . # docker run -p 1337:1337 -p 3000:3000 -p 3928:3928 jan diff --git a/Dockerfile.gpu b/Dockerfile.gpu index d5ea70499..832e2c18c 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -28,9 +28,6 @@ COPY . ./ RUN export NITRO_VERSION=$(cat extensions/inference-nitro-extension/bin/version.txt) && \ jq --arg nitroVersion $NITRO_VERSION '(.scripts."downloadnitro:linux" | gsub("\\${NITRO_VERSION}"; $nitroVersion)) | gsub("\r"; "")' extensions/inference-nitro-extension/package.json > /tmp/newcommand.txt && export NEW_COMMAND=$(sed 's/^"//;s/"$//' /tmp/newcommand.txt) && jq --arg newCommand "$NEW_COMMAND" '.scripts."downloadnitro:linux" = $newCommand' extensions/inference-nitro-extension/package.json > /tmp/package.json && mv /tmp/package.json extensions/inference-nitro-extension/package.json RUN make install-and-build -RUN yarn workspace jan-web install - -RUN export NODE_ENV=production && yarn workspace jan-web build # # 2. Rebuild the source code only when needed FROM base AS runner @@ -66,12 +63,13 @@ COPY --from=builder /app/docs/openapi ./docs/openapi/ COPY --from=builder /app/pre-install ./pre-install/ # Copy the package.json, yarn.lock, and output of web yarn space to leverage Docker cache -COPY --from=builder /app/web/out ./web/out/ -COPY --from=builder /app/web/.next ./web/.next/ -COPY --from=builder /app/web/package.json ./web/package.json -COPY --from=builder /app/web/yarn.lock ./web/yarn.lock +COPY --from=builder /app/uikit ./uikit/ +COPY --from=builder /app/web ./web/ COPY --from=builder /app/models ./models/ +RUN yarn workspace @janhq/uikit install && yarn workspace @janhq/uikit build +RUN yarn workspace jan-web install + RUN npm install -g serve@latest EXPOSE 1337 3000 3928 @@ -81,7 +79,9 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/targets/x86_64-linux/lib:/usr/local/cuda-12. ENV JAN_API_HOST 0.0.0.0 ENV JAN_API_PORT 1337 -CMD ["sh", "-c", "cd server && node build/main.js & cd web && npx serve out"] +ENV API_BASE_URL http://localhost:1337 + +CMD ["sh", "-c", "export NODE_ENV=production && yarn workspace jan-web build && cd web && npx serve out & cd server && node build/main.js"] # pre-requisites: nvidia-docker # docker build -t jan-gpu . -f Dockerfile.gpu diff --git a/Makefile b/Makefile index ffb1abee2..a45477b29 100644 --- a/Makefile +++ b/Makefile @@ -52,18 +52,28 @@ build: check-file-counts clean: ifeq ($(OS),Windows_NT) - powershell -Command "Get-ChildItem -Path . -Include node_modules, .next, dist -Recurse -Directory | Remove-Item -Recurse -Force" + powershell -Command "Get-ChildItem -Path . -Include node_modules, .next, dist, build, out -Recurse -Directory | Remove-Item -Recurse -Force" + powershell -Command "Remove-Item -Recurse -Force ./pre-install/*.tgz" + powershell -Command "Remove-Item -Recurse -Force ./electron/pre-install/*.tgz" rmdir /s /q "%USERPROFILE%\jan\extensions" else ifeq ($(shell uname -s),Linux) find . -name "node_modules" -type d -prune -exec rm -rf '{}' + find . -name ".next" -type d -exec rm -rf '{}' + find . -name "dist" -type d -exec rm -rf '{}' + + find . -name "build" -type d -exec rm -rf '{}' + + find . -name "out" -type d -exec rm -rf '{}' + + rm -rf ./pre-install/*.tgz + rm -rf ./electron/pre-install/*.tgz rm -rf "~/jan/extensions" rm -rf "~/.cache/jan*" else find . -name "node_modules" -type d -prune -exec rm -rf '{}' + find . -name ".next" -type d -exec rm -rf '{}' + find . -name "dist" -type d -exec rm -rf '{}' + + find . -name "build" -type d -exec rm -rf '{}' + + find . -name "out" -type d -exec rm -rf '{}' + + rm -rf ./pre-install/*.tgz + rm -rf ./electron/pre-install/*.tgz rm -rf ~/jan/extensions rm -rf ~/Library/Caches/jan* endif diff --git a/README.md b/README.md index 994723788..1b7ad119d 100644 --- a/README.md +++ b/README.md @@ -76,31 +76,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute Experimental (Nightly Build) - + jan.exe - + Intel - + M1/M2 - + jan.deb - + jan.AppImage @@ -235,61 +235,84 @@ This will build the app MacOS m1/m2 for production (with code signing already do - Run Jan in Docker mode - - **Option 1**: Run Jan in CPU mode +| Docker compose Profile | Description | +| ---------------------- | -------------------------------------------- | +| `cpu-fs` | Run Jan in CPU mode with default file system | +| `cpu-s3fs` | Run Jan in CPU mode with S3 file system | +| `gpu-fs` | Run Jan in GPU mode with default file system | +| `gpu-s3fs` | Run Jan in GPU mode with S3 file system | + +| Environment Variable | Description | +| ----------------------- | ------------------------------------------------------------------------------------------------------- | +| `S3_BUCKET_NAME` | S3 bucket name - leave blank for default file system | +| `AWS_ACCESS_KEY_ID` | AWS access key ID - leave blank for default file system | +| `AWS_SECRET_ACCESS_KEY` | AWS secret access key - leave blank for default file system | +| `AWS_ENDPOINT` | AWS endpoint URL - leave blank for default file system | +| `AWS_REGION` | AWS region - leave blank for default file system | +| `API_BASE_URL` | Jan Server URL, please modify it as your public ip address or domain name default http://localhost:1377 | + +- **Option 1**: Run Jan in CPU mode + + ```bash + # cpu mode with default file system + docker compose --profile cpu-fs up -d + + # cpu mode with S3 file system + docker compose --profile cpu-s3fs up -d + ``` + +- **Option 2**: Run Jan in GPU mode + + - **Step 1**: Check CUDA compatibility with your NVIDIA driver by running `nvidia-smi` and check the CUDA version in the output ```bash - docker compose --profile cpu up -d + nvidia-smi + + # Output + +---------------------------------------------------------------------------------------+ + | NVIDIA-SMI 531.18 Driver Version: 531.18 CUDA Version: 12.1 | + |-----------------------------------------+----------------------+----------------------+ + | GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC | + | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | + | | | MIG M. | + |=========================================+======================+======================| + | 0 NVIDIA GeForce RTX 4070 Ti WDDM | 00000000:01:00.0 On | N/A | + | 0% 44C P8 16W / 285W| 1481MiB / 12282MiB | 2% Default | + | | | N/A | + +-----------------------------------------+----------------------+----------------------+ + | 1 NVIDIA GeForce GTX 1660 Ti WDDM | 00000000:02:00.0 Off | N/A | + | 0% 49C P8 14W / 120W| 0MiB / 6144MiB | 0% Default | + | | | N/A | + +-----------------------------------------+----------------------+----------------------+ + | 2 NVIDIA GeForce GTX 1660 Ti WDDM | 00000000:05:00.0 Off | N/A | + | 29% 38C P8 11W / 120W| 0MiB / 6144MiB | 0% Default | + | | | N/A | + +-----------------------------------------+----------------------+----------------------+ + + +---------------------------------------------------------------------------------------+ + | Processes: | + | GPU GI CI PID Type Process name GPU Memory | + | ID ID Usage | + |=======================================================================================| ``` - - **Option 2**: Run Jan in GPU mode + - **Step 2**: Visit [NVIDIA NGC Catalog ](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags) and find the smallest minor version of image tag that matches your CUDA version (e.g., 12.1 -> 12.1.0) - - **Step 1**: Check CUDA compatibility with your NVIDIA driver by running `nvidia-smi` and check the CUDA version in the output + - **Step 3**: Update the `Dockerfile.gpu` line number 5 with the latest minor version of the image tag from step 2 (e.g. change `FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base` to `FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS base`) - ```bash - nvidia-smi + - **Step 4**: Run command to start Jan in GPU mode - # Output - +---------------------------------------------------------------------------------------+ - | NVIDIA-SMI 531.18 Driver Version: 531.18 CUDA Version: 12.1 | - |-----------------------------------------+----------------------+----------------------+ - | GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC | - | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | - | | | MIG M. | - |=========================================+======================+======================| - | 0 NVIDIA GeForce RTX 4070 Ti WDDM | 00000000:01:00.0 On | N/A | - | 0% 44C P8 16W / 285W| 1481MiB / 12282MiB | 2% Default | - | | | N/A | - +-----------------------------------------+----------------------+----------------------+ - | 1 NVIDIA GeForce GTX 1660 Ti WDDM | 00000000:02:00.0 Off | N/A | - | 0% 49C P8 14W / 120W| 0MiB / 6144MiB | 0% Default | - | | | N/A | - +-----------------------------------------+----------------------+----------------------+ - | 2 NVIDIA GeForce GTX 1660 Ti WDDM | 00000000:05:00.0 Off | N/A | - | 29% 38C P8 11W / 120W| 0MiB / 6144MiB | 0% Default | - | | | N/A | - +-----------------------------------------+----------------------+----------------------+ + ```bash + # GPU mode with default file system + docker compose --profile gpu up -d - +---------------------------------------------------------------------------------------+ - | Processes: | - | GPU GI CI PID Type Process name GPU Memory | - | ID ID Usage | - |=======================================================================================| - ``` + # GPU mode with S3 file system + docker compose --profile gpu-s3fs up -d + ``` - - **Step 2**: Visit [NVIDIA NGC Catalog ](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags) and find the smallest minor version of image tag that matches your CUDA version (e.g., 12.1 -> 12.1.0) +This will start the web server and you can access Jan at `http://localhost:3000`. - - **Step 3**: Update the `Dockerfile.gpu` line number 5 with the latest minor version of the image tag from step 2 (e.g. change `FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base` to `FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS base`) - - - **Step 4**: Run command to start Jan in GPU mode - - ```bash - # GPU mode - docker compose --profile gpu up -d - ``` - - This will start the web server and you can access Jan at `http://localhost:3000`. - - > Note: Currently, Docker mode is only work for development and localhost, production is not supported yet. RAG feature is not supported in Docker mode yet. +> Note: RAG feature is not supported in Docker mode with s3fs yet. ## Acknowledgements diff --git a/charts/server/Chart.lock b/charts/server/Chart.lock new file mode 100644 index 000000000..915788d61 --- /dev/null +++ b/charts/server/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: common + repository: oci://ghcr.io/janhq/charts + version: 0.1.2 +digest: sha256:35e98bde174130787755b0f8ea2359b7b6790d965a7157c2f7cabf1bc8c04471 +generated: "2024-02-20T16:20:37.6530108+07:00" diff --git a/charts/server/Chart.yaml b/charts/server/Chart.yaml new file mode 100644 index 000000000..fb2e1c91b --- /dev/null +++ b/charts/server/Chart.yaml @@ -0,0 +1,10 @@ +apiVersion: v2 +name: jan-server +description: A Helm chart for Kubernetes +type: application +version: 0.1.0 +appVersion: '1.0.0' +dependencies: + - name: common + version: 0.1.2 # common-chart-version + repository: oci://ghcr.io/janhq/charts diff --git a/charts/server/charts/common-0.1.2.tgz b/charts/server/charts/common-0.1.2.tgz new file mode 100644 index 000000000..946617eab Binary files /dev/null and b/charts/server/charts/common-0.1.2.tgz differ diff --git a/charts/server/config.json b/charts/server/config.json new file mode 100644 index 000000000..62e9682fa --- /dev/null +++ b/charts/server/config.json @@ -0,0 +1,4 @@ +{ + "image-list": "server=ghcr.io/janhq/jan-server", + "platforms": "linux/amd64" +} \ No newline at end of file diff --git a/charts/server/values.yaml b/charts/server/values.yaml new file mode 100644 index 000000000..70f463174 --- /dev/null +++ b/charts/server/values.yaml @@ -0,0 +1,256 @@ +common: + imageTag: v0.4.6-cpu + # DO NOT CHANGE THE LINE ABOVE. MAKE ALL CHANGES BELOW + + # Global pvc for all workload + pvc: + enabled: false + name: 'janroot' + accessModes: 'ReadWriteOnce' + storageClassName: '' + capacity: '50Gi' + + # Global image pull secret + imagePullSecrets: [] + + externalSecret: + create: false + name: '' + annotations: {} + + nameOverride: 'jan-server' + fullnameOverride: 'jan-server' + + serviceAccount: + create: true + annotations: {} + name: 'jan-server-service-account' + + podDisruptionBudget: + create: false + minAvailable: 1 + + workloads: + - name: server + image: + repository: ghcr.io/janhq/jan-server + pullPolicy: Always + + command: ['/bin/sh', '-c'] + args: ['cd server && node build/main.js'] + + replicaCount: 1 + ports: + containerPort: 1337 + + strategy: + canary: + steps: + - setWeight: 50 + - pause: { duration: 1m } + + ingress: + enabled: true + className: 'nginx' + annotations: + nginx.ingress.kubernetes.io/proxy-body-size: '100m' + nginx.ingress.kubernetes.io/proxy-read-timeout: '1800' + nginx.ingress.kubernetes.io/proxy-send-timeout: '1800' + # cert-manager.io/cluster-issuer: 'jan-ai-dns01-cluster-issuer' + # nginx.ingress.kubernetes.io/force-ssl-redirect: 'true' + nginx.ingress.kubernetes.io/backend-protocol: HTTP + hosts: + - host: server.local + paths: + - path: / + pathType: Prefix + tls: + [] + # - hosts: + # - server-dev.jan.ai + # secretName: jan-server-prod-tls-v2 + + instrumentation: + enabled: false + podAnnotations: {} + + podSecurityContext: {} + + securityContext: {} + + service: + extenalLabel: {} + type: ClusterIP + port: 1337 + targetPort: 1337 + + # If you want to use GPU, please uncomment the following lines and change imageTag to the one with GPU support + resources: + # limits: + # nvidia.com/gpu: 1 + requests: + cpu: 2000m + memory: 8192M + + # If you want to use pv, please uncomment the following lines and enable pvc.enabled + volumes: + [] + # - name: janroot + # persistentVolumeClaim: + # claimName: janroot + + volumeMounts: + [] + # - name: janroot + # mountPath: /app/server/build/jan + + # AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, S3_BUCKET_NAME, AWS_ENDPOINT, AWS_REGION should mount as a secret env instead of plain text here + # Change API_BASE_URL to your server's public domain + env: + - name: API_BASE_URL + value: 'http://server.local' + + lifecycle: {} + autoscaling: + enabled: false + minReplicas: 2 + maxReplicas: 3 + targetCPUUtilizationPercentage: 95 + targetMemoryUtilizationPercentage: 95 + + kedaScaling: + enabled: false # ignore if autoscaling.enable = true + cooldownPeriod: 30 + pollingInterval: 2 + minReplicas: 1 + maxReplicas: 5 + metricName: celery_queue_length + query: celery_queue_length{queue_name="myqueue"} # change queue_name here + serverAddress: http://prometheus-prod-kube-prome-prometheus.monitoring.svc:9090 + threshold: '3' + + nodeSelector: {} + + tolerations: [] + + podSecurityGroup: + enabled: false + securitygroupid: [] + + # Reloader Option + reloader: 'false' + vpa: + enabled: false + + - name: web + image: + repository: ghcr.io/janhq/jan-server + pullPolicy: Always + + command: ['/bin/sh', '-c'] + args: + [ + 'export NODE_ENV=production && yarn workspace jan-web build && cd web && npx serve out', + ] + + replicaCount: 1 + ports: + containerPort: 3000 + + strategy: + canary: + steps: + - setWeight: 50 + - pause: { duration: 1m } + + ingress: + enabled: true + className: 'nginx' + annotations: + nginx.ingress.kubernetes.io/proxy-body-size: '100m' + nginx.ingress.kubernetes.io/proxy-read-timeout: '1800' + nginx.ingress.kubernetes.io/proxy-send-timeout: '1800' + # cert-manager.io/cluster-issuer: 'jan-ai-dns01-cluster-issuer' + # nginx.ingress.kubernetes.io/force-ssl-redirect: 'true' + nginx.ingress.kubernetes.io/backend-protocol: HTTP + hosts: + - host: web.local + paths: + - path: / + pathType: Prefix + tls: + [] + # - hosts: + # - server-dev.jan.ai + # secretName: jan-server-prod-tls-v2 + + instrumentation: + enabled: false + podAnnotations: {} + + podSecurityContext: {} + + securityContext: {} + + service: + extenalLabel: {} + type: ClusterIP + port: 3000 + targetPort: 3000 + + resources: + limits: + cpu: 1000m + memory: 2048M + requests: + cpu: 50m + memory: 500M + + volumes: + [] + # - name: janroot + # persistentVolumeClaim: + # claimName: janroot + + volumeMounts: + [] + # - name: janroot + # mountPath: /app/server/build/jan + + # AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, S3_BUCKET_NAME, AWS_ENDPOINT, AWS_REGION should mount as a secret env instead of plain text here + # Change API_BASE_URL to your server's public domain + env: + - name: API_BASE_URL + value: 'http://server.local' + + lifecycle: {} + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 3 + targetCPUUtilizationPercentage: 95 + targetMemoryUtilizationPercentage: 95 + + kedaScaling: + enabled: false # ignore if autoscaling.enable = true + cooldownPeriod: 30 + pollingInterval: 2 + minReplicas: 1 + maxReplicas: 5 + metricName: celery_queue_length + query: celery_queue_length{queue_name="myqueue"} # change queue_name here + serverAddress: http://prometheus-prod-kube-prome-prometheus.monitoring.svc:9090 + threshold: '3' + + nodeSelector: {} + + tolerations: [] + + podSecurityGroup: + enabled: false + securitygroupid: [] + + # Reloader Option + reloader: 'false' + vpa: + enabled: false diff --git a/core/src/node/api/processors/app.ts b/core/src/node/api/processors/app.ts index a4b1a5a06..c62b5011d 100644 --- a/core/src/node/api/processors/app.ts +++ b/core/src/node/api/processors/app.ts @@ -1,6 +1,5 @@ import { basename, isAbsolute, join, relative } from 'path' -import { AppRoute } from '../../../api' import { Processor } from './Processor' import { getAppConfigurations as appConfiguration, updateAppConfiguration } from '../../helper' import { log as writeLog, logServer as writeServerLog } from '../../helper/log' diff --git a/core/src/node/api/restful/common.ts b/core/src/node/api/restful/common.ts index b87bc946d..433632989 100644 --- a/core/src/node/api/restful/common.ts +++ b/core/src/node/api/restful/common.ts @@ -8,7 +8,7 @@ import { createMessage, createThread, getMessages, - retrieveMesasge, + retrieveMessage, updateThread, } from './helper/builder' @@ -17,10 +17,18 @@ import { startModel, stopModel } from './helper/startStopModel' import { ModelSettingParams } from '../../../types' export const commonRouter = async (app: HttpServer) => { + const normalizeData = (data: any) => { + return { + object: 'list', + data, + } + } // Common Routes // Read & Delete :: Threads | Models | Assistants Object.keys(JanApiRouteConfiguration).forEach((key) => { - app.get(`/${key}`, async (_request) => getBuilder(JanApiRouteConfiguration[key])) + app.get(`/${key}`, async (_request) => + getBuilder(JanApiRouteConfiguration[key]).then(normalizeData) + ) app.get(`/${key}/:id`, async (request: any) => retrieveBuilder(JanApiRouteConfiguration[key], request.params.id) @@ -34,10 +42,12 @@ export const commonRouter = async (app: HttpServer) => { // Threads app.post(`/threads/`, async (req, res) => createThread(req.body)) - app.get(`/threads/:threadId/messages`, async (req, res) => getMessages(req.params.threadId)) + app.get(`/threads/:threadId/messages`, async (req, res) => + getMessages(req.params.threadId).then(normalizeData) + ) app.get(`/threads/:threadId/messages/:messageId`, async (req, res) => - retrieveMesasge(req.params.threadId, req.params.messageId) + retrieveMessage(req.params.threadId, req.params.messageId) ) app.post(`/threads/:threadId/messages`, async (req, res) => diff --git a/core/src/node/api/restful/helper/builder.ts b/core/src/node/api/restful/helper/builder.ts index a8124a74a..7001c0c76 100644 --- a/core/src/node/api/restful/helper/builder.ts +++ b/core/src/node/api/restful/helper/builder.ts @@ -125,7 +125,7 @@ export const getMessages = async (threadId: string): Promise => } } -export const retrieveMesasge = async (threadId: string, messageId: string) => { +export const retrieveMessage = async (threadId: string, messageId: string) => { const messages = await getMessages(threadId) const filteredMessages = messages.filter((m) => m.id === messageId) if (!filteredMessages || filteredMessages.length === 0) { @@ -318,13 +318,6 @@ export const chatCompletions = async (request: any, reply: any) => { apiUrl = engineConfiguration.full_url } - reply.raw.writeHead(200, { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', - 'Access-Control-Allow-Origin': '*', - }) - const headers: Record = { 'Content-Type': 'application/json', } @@ -343,8 +336,14 @@ export const chatCompletions = async (request: any, reply: any) => { }) if (response.status !== 200) { console.error(response) - return + reply.code(400).send(response) } else { + reply.raw.writeHead(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'Access-Control-Allow-Origin': '*', + }) response.body.pipe(reply.raw) } } diff --git a/core/src/node/helper/config.ts b/core/src/node/helper/config.ts index a47875e68..71e721578 100644 --- a/core/src/node/helper/config.ts +++ b/core/src/node/helper/config.ts @@ -1,8 +1,7 @@ -import { AppConfiguration, SystemResourceInfo } from '../../types' +import { AppConfiguration } from '../../types' import { join } from 'path' import fs from 'fs' import os from 'os' -import { log, logServer } from './log' import childProcess from 'child_process' // TODO: move this to core diff --git a/docker-compose.yml b/docker-compose.yml index 4195a3294..1691a841a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -42,10 +42,10 @@ services: vpcbr: # app_cpu service for running the CPU version of the application - app_cpu: + app_cpu_s3fs: image: jan:latest volumes: - - app_data:/app/server/build/jan + - app_data_cpu_s3fs:/app/server/build/jan build: context: . dockerfile: Dockerfile @@ -56,9 +56,10 @@ services: S3_BUCKET_NAME: mybucket AWS_ENDPOINT: http://10.5.0.2:9000 AWS_REGION: us-east-1 + API_BASE_URL: http://localhost:1337 restart: always profiles: - - cpu + - cpu-s3fs ports: - "3000:3000" - "1337:1337" @@ -68,7 +69,7 @@ services: ipv4_address: 10.5.0.3 # app_gpu service for running the GPU version of the application - app_gpu: + app_gpu_s3fs: deploy: resources: reservations: @@ -78,7 +79,7 @@ services: capabilities: [gpu] image: jan-gpu:latest volumes: - - app_data:/app/server/build/jan + - app_data_gpu_s3fs:/app/server/build/jan build: context: . dockerfile: Dockerfile.gpu @@ -90,8 +91,9 @@ services: S3_BUCKET_NAME: mybucket AWS_ENDPOINT: http://10.5.0.2:9000 AWS_REGION: us-east-1 + API_BASE_URL: http://localhost:1337 profiles: - - gpu + - gpu-s3fs ports: - "3000:3000" - "1337:1337" @@ -100,9 +102,60 @@ services: vpcbr: ipv4_address: 10.5.0.4 + app_cpu_fs: + image: jan:latest + volumes: + - app_data_cpu_fs:/app/server/build/jan + build: + context: . + dockerfile: Dockerfile + environment: + API_BASE_URL: http://localhost:1337 + restart: always + profiles: + - cpu-fs + ports: + - "3000:3000" + - "1337:1337" + - "3928:3928" + networks: + vpcbr: + ipv4_address: 10.5.0.5 + + # app_gpu service for running the GPU version of the application + app_gpu_fs: + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + image: jan-gpu:latest + volumes: + - app_data_gpu_fs:/app/server/build/jan + build: + context: . + dockerfile: Dockerfile.gpu + restart: always + environment: + API_BASE_URL: http://localhost:1337 + profiles: + - gpu-fs + ports: + - "3000:3000" + - "1337:1337" + - "3928:3928" + networks: + vpcbr: + ipv4_address: 10.5.0.6 + volumes: minio_data: - app_data: + app_data_cpu_s3fs: + app_data_gpu_s3fs: + app_data_cpu_fs: + app_data_gpu_fs: networks: vpcbr: @@ -113,5 +166,7 @@ networks: gateway: 10.5.0.1 # Usage: -# - Run 'docker-compose --profile cpu up -d' to start the app_cpu service -# - Run 'docker-compose --profile gpu up -d' to start the app_gpu service +# - Run 'docker compose --profile cpu-s3fs up -d' to start the app_cpu service +# - Run 'docker compose --profile gpu-s3fs up -d' to start the app_gpu service +# - Run 'docker compose --profile cpu-fs up -d' to start the app_cpu service +# - Run 'docker compose --profile gpu-fs up -d' to start the app_gpu service diff --git a/docs/docs/guides/04-using-models/02-import-manually.mdx b/docs/docs/guides/04-using-models/02-import-manually.mdx index 68142a8af..7c446ea1c 100644 --- a/docs/docs/guides/04-using-models/02-import-manually.mdx +++ b/docs/docs/guides/04-using-models/02-import-manually.mdx @@ -29,6 +29,10 @@ In this section, we will show you how to import a GGUF model from [HuggingFace]( > We are fast shipping a UI to make this easier, but it's a bit manual for now. Apologies. +## Import Models Using Absolute Filepath (version 0.4.7) + +Starting from version 0.4.7, Jan has introduced the capability to import models using an absolute file path. It allows you to import models from any directory on your computer. Please check the [import models using absolute filepath](../import-models-using-absolute-filepath) guide for more information. + ## Manually Importing a Downloaded Model (nightly versions and v0.4.4+) ### 1. Create a Model Folder @@ -186,7 +190,6 @@ This means that you can easily reconfigure your models, export them, and share y Edit `model.json` and include the following configurations: -- Ensure the filename must be `model.json`. - Ensure the `id` property matches the folder name you created. - Ensure the GGUF filename should match the `id` property exactly. - Ensure the `source.url` property is the direct binary download link ending in `.gguf`. In HuggingFace, you can find the direct links in the `Files and versions` tab. diff --git a/docs/docs/guides/04-using-models/03-import-models-using-absolute-filepath.mdx b/docs/docs/guides/04-using-models/03-import-models-using-absolute-filepath.mdx new file mode 100644 index 000000000..490f68cd6 --- /dev/null +++ b/docs/docs/guides/04-using-models/03-import-models-using-absolute-filepath.mdx @@ -0,0 +1,84 @@ +--- +title: Import Models Using Absolute Filepath +slug: /guides/using-models/import-models-using-absolute-filepath +description: Guide to import model using absolute filepath in Jan. +keywords: + [ + Jan AI, + Jan, + ChatGPT alternative, + local AI, + private AI, + conversational AI, + no-subscription fee, + large language model, + import-models-manually, + absolute-filepath, + ] +--- + +In this guide, we will walk you through the process of importing a model using an absolute filepath in Jan, using our latest model, [Trinity](https://huggingface.co/janhq/trinity-v1-GGUF), as an example. + +### 1. Get the Absolute Filepath of the Model + +After downloading .gguf model, you can get the absolute filepath of the model file. + +### 2. Configure the Model JSON + +1. Navigate to the `~/jan/models` folder. +2. Create a folder named ``, for example, `tinyllama`. +3. Create a `model.json` file inside the folder, including the following configurations: + +- Ensure the `id` property matches the folder name you created. +- Ensure the `url` property is the direct binary download link ending in `.gguf`. Now, you can use the absolute filepath of the model file. +- Ensure the `engine` property is set to `nitro`. + +```json +{ + "sources": [ + { + "filename": "tinyllama.gguf", + // highlight-next-line + "url": "" + } + ], + "id": "tinyllama-1.1b", + "object": "model", + "name": "(Absolute Path) TinyLlama Chat 1.1B Q4", + "version": "1.0", + "description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>", + "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 2048, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "TinyLlama", + "tags": ["Tiny", "Foundation Model"], + "size": 669000000 + }, + "engine": "nitro" +} +``` + +:::warning + +- If you are using Windows, you need to use double backslashes in the url property, for example: `C:\\Users\\username\\filename.gguf`. + +::: + +### 3. Start the Model + +Restart Jan and navigate to the Hub. Locate your model and click the Use button. + +![Demo](assets/03-demo-absolute-filepath.gif) \ No newline at end of file diff --git a/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx b/docs/docs/guides/04-using-models/04-integrate-with-remote-server.mdx similarity index 97% rename from docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx rename to docs/docs/guides/04-using-models/04-integrate-with-remote-server.mdx index f0db1bd55..3632a40b0 100644 --- a/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx +++ b/docs/docs/guides/04-using-models/04-integrate-with-remote-server.mdx @@ -88,7 +88,7 @@ You can find your API keys in the [OpenAI Platform](https://platform.openai.com/ Restart Jan and navigate to the Hub. Then, select your configured model and start the model. -![image-01](assets/03-openai-platform-configuration.png) +![image-01](assets/04-openai-platform-configuration.png) ## Engines with OAI Compatible Configuration @@ -159,7 +159,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `mistral-ins-7b-q4` Restart Jan and navigate to the Hub. Locate your model and click the Use button. -![image-02](assets/03-oai-compatible-configuration.png) +![image-02](assets/04-oai-compatible-configuration.png) ## Assistance and Support diff --git a/docs/docs/guides/04-using-models/04-customize-engine-settings.mdx b/docs/docs/guides/04-using-models/05-customize-engine-settings.mdx similarity index 100% rename from docs/docs/guides/04-using-models/04-customize-engine-settings.mdx rename to docs/docs/guides/04-using-models/05-customize-engine-settings.mdx diff --git a/docs/docs/guides/04-using-models/assets/03-demo-absolute-filepath.gif b/docs/docs/guides/04-using-models/assets/03-demo-absolute-filepath.gif new file mode 100644 index 000000000..24dcc251a Binary files /dev/null and b/docs/docs/guides/04-using-models/assets/03-demo-absolute-filepath.gif differ diff --git a/docs/docs/guides/04-using-models/assets/03-oai-compatible-configuration.png b/docs/docs/guides/04-using-models/assets/04-oai-compatible-configuration.png similarity index 100% rename from docs/docs/guides/04-using-models/assets/03-oai-compatible-configuration.png rename to docs/docs/guides/04-using-models/assets/04-oai-compatible-configuration.png diff --git a/docs/docs/guides/04-using-models/assets/03-openai-platform-configuration.png b/docs/docs/guides/04-using-models/assets/04-openai-platform-configuration.png similarity index 100% rename from docs/docs/guides/04-using-models/assets/03-openai-platform-configuration.png rename to docs/docs/guides/04-using-models/assets/04-openai-platform-configuration.png diff --git a/docs/docs/guides/05-using-server/01-start-server.md b/docs/docs/guides/05-using-server/01-start-server.md index c8e5cdba3..2433fd80a 100644 --- a/docs/docs/guides/05-using-server/01-start-server.md +++ b/docs/docs/guides/05-using-server/01-start-server.md @@ -1,6 +1,6 @@ --- title: Start Local Server -slug: /guides/using-server/server +slug: /guides/using-server/start-server description: How to run Jan's built-in API server. keywords: [ diff --git a/docs/docs/guides/07-integrations/01-integrate-continue.mdx b/docs/docs/guides/07-integrations/01-integrate-continue.mdx index b3722874f..1fa0397e2 100644 --- a/docs/docs/guides/07-integrations/01-integrate-continue.mdx +++ b/docs/docs/guides/07-integrations/01-integrate-continue.mdx @@ -35,7 +35,7 @@ To get started with Continue in VS Code, please follow this [guide to install Co ### 2. Enable Jan API Server -To configure the Continue to use Jan's Local Server, you need to enable Jan API Server with your preferred model, please follow this [guide to enable Jan API Server](../05-using-server/01-server.md) +To configure the Continue to use Jan's Local Server, you need to enable Jan API Server with your preferred model, please follow this [guide to enable Jan API Server](/guides/using-server/start-server). ### 3. Configure Continue to Use Jan's Local Server diff --git a/docs/docs/guides/07-integrations/04-integrate-mistral-ai.mdx b/docs/docs/guides/07-integrations/04-integrate-mistral-ai.mdx new file mode 100644 index 000000000..14ddeaa75 --- /dev/null +++ b/docs/docs/guides/07-integrations/04-integrate-mistral-ai.mdx @@ -0,0 +1,89 @@ +--- +title: Integrate Mistral AI with Jan +slug: /guides/integrations/mistral-ai +description: Guide to integrate Mistral AI with Jan +keywords: + [ + Jan AI, + Jan, + ChatGPT alternative, + local AI, + private AI, + conversational AI, + no-subscription fee, + large language model, + Mistral integration, + ] +--- + +## Quick Introduction + +[Mistral AI](https://docs.mistral.ai/) currently provides two ways of accessing their Large Language Models (LLM) - via their API or via open source models available on Hugging Face. In this guide, we will show you how to integrate Mistral AI with Jan using the API method. + +## Steps to Integrate Mistral AI with Jan + +### 1. Configure Mistral API key + +You can find your API keys in the [Mistral API Key](https://console.mistral.ai/user/api-keys/) and set the Mistral AI API key in `~/jan/engines/openai.json` file. + +```json title="~/jan/engines/openai.json" +{ + // highlight-start + "full_url": "https://api.mistral.ai/v1/chat/completions", + "api_key": "" + // highlight-end +} +``` + +### 2. Modify a Model JSON + +Navigate to the `~/jan/models` folder. Create a folder named ``, for example, `mistral-tiny` and create a `model.json` file inside the folder including the following configurations: + +- Ensure the filename must be `model.json`. +- Ensure the `id` property is set to the model id from Mistral AI. +- Ensure the `format` property is set to `api`. +- Ensure the `engine` property is set to `openai`. +- Ensure the `state` property is set to `ready`. + +```json title="~/jan/models/mistral-tiny/model.json" +{ + "sources": [ + { + "filename": "mistral-tiny", + "url": "https://mistral.ai/" + } + ], + "id": "mistral-tiny", + "object": "model", + "name": "Mistral-7B-v0.2 (Tiny Endpoint)", + "version": "1.0", + "description": "Currently powered by Mistral-7B-v0.2, a better fine-tuning of the initial Mistral-7B released, inspired by the fantastic work of the community.", + // highlight-next-line + "format": "api", + "settings": {}, + "parameters": {}, + "metadata": { + "author": "Mistral AI", + "tags": ["General", "Big Context Length"] + }, + // highlight-start + "engine": "openai" + // highlight-end +} +``` + +:::tip + +Mistral AI provides different endpoints. Please check out their [endpoint documentation](https://docs.mistral.ai/platform/endpoints/) to find the one that suits your needs. In this example, we will use the `mistral-tiny` model. + +::: + +### 3. Start the Model + +Restart Jan and navigate to the Hub. Locate your model and click the Use button. + +![Mitral AI Tiny Model](assets/04-mistral-ai-tiny-hub.png) + +### 4. Try Out the Integration of Jan and Mistral AI + +![Mistral AI Integration Demo](assets/04-mistral-ai-integration-demo.gif) diff --git a/docs/docs/guides/07-integrations/05-integrate-lmstudio.mdx b/docs/docs/guides/07-integrations/05-integrate-lmstudio.mdx new file mode 100644 index 000000000..58e2f0be9 --- /dev/null +++ b/docs/docs/guides/07-integrations/05-integrate-lmstudio.mdx @@ -0,0 +1,184 @@ +--- +title: Integrate LM Studio with Jan +slug: /guides/integrations/lmstudio +description: Guide to integrate LM Studio with Jan +keywords: + [ + Jan AI, + Jan, + ChatGPT alternative, + local AI, + private AI, + conversational AI, + no-subscription fee, + large language model, + LM Studio integration, + ] +--- + +## Quick Introduction + +With [LM Studio](https://lmstudio.ai/), you can discover, download, and run local Large Language Models (LLMs). In this guide, we will show you how to integrate and use your current models on LM Studio with Jan using 2 methods. The first method is integrating LM Studio server with Jan UI. The second method is migrating your downloaded model from LM Studio to Jan. We will use the [Phi 2 - GGUF](https://huggingface.co/TheBloke/phi-2-GGUF) model on Hugging Face as an example. + +## Steps to Integrate LM Studio Server with Jan UI + +### 1. Start the LM Studio Server + +1. Navigate to the `Local Inference Server` on the LM Studio application. +2. Select the model you want to use. +3. Start the server after configuring the server port and options. + +![LM Studio Server](assets/05-setting-lmstudio-server.gif) + +

+ +Modify the `openai.json` file in the `~/jan/engines` folder to include the full URL of the LM Studio server. + +```json title="~/jan/engines/openai.json" +{ + "full_url": "http://localhost:/v1/chat/completions" +} +``` + +:::tip + +- Replace `` with the port number you set in the LM Studio server. The default port is `1234`. + +::: + +### 2. Modify a Model JSON + +Navigate to the `~/jan/models` folder. Create a folder named ``, for example, `lmstudio-phi-2` and create a `model.json` file inside the folder including the following configurations: + +- Set the `format` property to `api`. +- Set the `engine` property to `openai`. +- Set the `state` property to `ready`. + +```json title="~/jan/models/lmstudio-phi-2/model.json" +{ + "sources": [ + { + "filename": "phi-2-GGUF", + "url": "https://huggingface.co/TheBloke/phi-2-GGUF" + } + ], + "id": "lmstudio-phi-2", + "object": "model", + "name": "LM Studio - Phi 2 - GGUF", + "version": "1.0", + "description": "TheBloke/phi-2-GGUF", + // highlight-next-line + "format": "api", + "settings": {}, + "parameters": {}, + "metadata": { + "author": "Microsoft", + "tags": ["General", "Big Context Length"] + }, + // highlight-start + "engine": "openai" + // highlight-end +} +``` + +### 3. Start the Model + +1. Restart Jan and navigate to the **Hub**. +2. Locate your model and click the **Use** button. + +![LM Studio Model](assets/05-lmstudio-run.png) + +### 4. Try Out the Integration of Jan and LM Studio + +![LM Studio Integration Demo](assets/05-lmstudio-integration-demo.gif) + +## Steps to Migrate Your Downloaded Model from LM Studio to Jan (version 0.4.6 and older) + +### 1. Migrate Your Downloaded Model + +1. Navigate to `My Models` in the LM Studio application and reveal the model folder. + +![Reveal-model-folder-lmstudio](assets/05-reveal-model-folder-lmstudio.gif) + +2. Copy the model folder that you want to migrate to `~/jan/models` folder. + +3. Ensure the folder name property is the same as the model name of `.gguf` filename by changing the folder name if necessary. For example, in this case, we changed foldername from `TheBloke` to `phi-2.Q4_K_S`. + +### 2. Start the Model + +1. Restart Jan and navigate to the **Hub**. Jan will automatically detect the model and display it in the **Hub**. +2. Locate your model and click the **Use** button to try the migrating model. + +![Demo](assets/05-demo-migrating-model.gif) + +## Steps to Pointing to the Downloaded Model of LM Studio from Jan (version 0.4.7+) + +Starting from version 0.4.7, Jan supports importing models using an absolute filepath, so you can directly use the model from the LM Studio folder. + +### 1. Reveal the Model Absolute Path + +Navigate to `My Models` in the LM Studio application and reveal the model folder. Then, you can get the absolute path of your model. + +![Reveal-model-folder-lmstudio](assets/05-reveal-model-folder-lmstudio.gif) + +### 2. Modify a Model JSON + +Navigate to the `~/jan/models` folder. Create a folder named ``, for example, `phi-2.Q4_K_S` and create a `model.json` file inside the folder including the following configurations: + +- Ensure the `id` property matches the folder name you created. +- Ensure the `url` property is the direct binary download link ending in `.gguf`. Now, you can use the absolute filepath of the model file. In this example, the absolute filepath is `/Users//.cache/lm-studio/models/TheBloke/phi-2-GGUF/phi-2.Q4_K_S.gguf`. +- Ensure the `engine` property is set to `nitro`. + +```json +{ + "object": "model", + "version": 1, + "format": "gguf", + "sources": [ + { + "filename": "phi-2.Q4_K_S.gguf", + "url": "" + } + ], + "id": "phi-2.Q4_K_S", + "name": "phi-2.Q4_K_S", + "created": 1708308111506, + "description": "phi-2.Q4_K_S - user self import model", + "settings": { + "ctx_len": 4096, + "embedding": false, + "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:", + "llama_model_path": "phi-2.Q4_K_S.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 2048, + "stop": [""], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "size": 1615568736, + "author": "User", + "tags": [] + }, + "engine": "nitro" +} +``` + +:::warning + +- If you are using Windows, you need to use double backslashes in the url property, for example: `C:\\Users\\username\\filename.gguf`. + +::: + + +### 3. Start the Model + +1. Restart Jan and navigate to the **Hub**. +2. Jan will automatically detect the model and display it in the **Hub**. +3. Locate your model and click the **Use** button to try the migrating model. + +![Demo](assets/05-demo-pointing-model.gif) diff --git a/docs/docs/guides/07-integrations/06-integrate-ollama.mdx b/docs/docs/guides/07-integrations/06-integrate-ollama.mdx new file mode 100644 index 000000000..e55c3e49f --- /dev/null +++ b/docs/docs/guides/07-integrations/06-integrate-ollama.mdx @@ -0,0 +1,90 @@ +--- +title: Integrate Ollama with Jan +slug: /guides/integrations/ollama +description: Guide to integrate Ollama with Jan +keywords: + [ + Jan AI, + Jan, + ChatGPT alternative, + local AI, + private AI, + conversational AI, + no-subscription fee, + large language model, + Ollama integration, + ] +--- + +## Quick Introduction + +With [Ollama](https://ollama.com/), you can run large language models locally. In this guide, we will show you how to integrate and use your current models on Ollama with Jan using 2 methods. The first method is integrating Ollama server with Jan UI. The second method is migrating your downloaded model from Ollama to Jan. We will use the [llama2](https://ollama.com/library/llama2) model as an example. + +## Steps to Integrate Ollama Server with Jan UI + +### 1. Start the Ollama Server + +1. Select the model you want to use from the [Ollama library](https://ollama.com/library). +2. Run your model by using the following command: + +```bash +ollama run +``` + +3. According to the [Ollama documentation on OpenAI compatibility](https://github.com/ollama/ollama/blob/main/docs/openai.md), you can use the `http://localhost:11434/v1/chat/completions` endpoint to interact with the Ollama server. Thus, modify the `openai.json` file in the `~/jan/engines` folder to include the full URL of the Ollama server. + +```json title="~/jan/engines/openai.json" +{ + "full_url": "http://localhost:11434/v1/chat/completions" +} +``` + +### 2. Modify a Model JSON + +1. Navigate to the `~/jan/models` folder. +2. Create a folder named ``, for example, `lmstudio-phi-2`. +3. Create a `model.json` file inside the folder including the following configurations: + +- Set the `id` property to the model name as Ollama model name. +- Set the `format` property to `api`. +- Set the `engine` property to `openai`. +- Set the `state` property to `ready`. + +```json title="~/jan/models/llama2/model.json" +{ + "sources": [ + { + "filename": "llama2", + "url": "https://ollama.com/library/llama2" + } + ], + // highlight-next-line + "id": "llama2", + "object": "model", + "name": "Ollama - Llama2", + "version": "1.0", + "description": "Llama 2 is a collection of foundation language models ranging from 7B to 70B parameters.", + // highlight-next-line + "format": "api", + "settings": {}, + "parameters": {}, + "metadata": { + "author": "Meta", + "tags": ["General", "Big Context Length"] + }, + // highlight-next-line + "engine": "openai" +} +``` + +### 3. Start the Model + +1. Restart Jan and navigate to the **Hub**. +2. Locate your model and click the **Use** button. + +![Ollama Model](assets/06-ollama-run.png) + +### 4. Try Out the Integration of Jan and Ollama + +![Ollama Integration Demo](assets/06-ollama-integration-demo.gif) + diff --git a/docs/docs/guides/07-integrations/assets/04-mistral-ai-integration-demo.gif b/docs/docs/guides/07-integrations/assets/04-mistral-ai-integration-demo.gif new file mode 100644 index 000000000..015167e2a Binary files /dev/null and b/docs/docs/guides/07-integrations/assets/04-mistral-ai-integration-demo.gif differ diff --git a/docs/docs/guides/07-integrations/assets/04-mistral-ai-tiny-hub.png b/docs/docs/guides/07-integrations/assets/04-mistral-ai-tiny-hub.png new file mode 100644 index 000000000..1ae377d70 Binary files /dev/null and b/docs/docs/guides/07-integrations/assets/04-mistral-ai-tiny-hub.png differ diff --git a/docs/docs/guides/07-integrations/assets/05-demo-migrating-model.gif b/docs/docs/guides/07-integrations/assets/05-demo-migrating-model.gif new file mode 100644 index 000000000..985755e47 Binary files /dev/null and b/docs/docs/guides/07-integrations/assets/05-demo-migrating-model.gif differ diff --git a/docs/docs/guides/07-integrations/assets/05-demo-pointing-model.gif b/docs/docs/guides/07-integrations/assets/05-demo-pointing-model.gif new file mode 100644 index 000000000..137fb955a Binary files /dev/null and b/docs/docs/guides/07-integrations/assets/05-demo-pointing-model.gif differ diff --git a/docs/docs/guides/07-integrations/assets/05-lmstudio-integration-demo.gif b/docs/docs/guides/07-integrations/assets/05-lmstudio-integration-demo.gif new file mode 100644 index 000000000..445ea3416 Binary files /dev/null and b/docs/docs/guides/07-integrations/assets/05-lmstudio-integration-demo.gif differ diff --git a/docs/docs/guides/07-integrations/assets/05-lmstudio-run.png b/docs/docs/guides/07-integrations/assets/05-lmstudio-run.png new file mode 100644 index 000000000..721581f72 Binary files /dev/null and b/docs/docs/guides/07-integrations/assets/05-lmstudio-run.png differ diff --git a/docs/docs/guides/07-integrations/assets/05-reveal-model-folder-lmstudio.gif b/docs/docs/guides/07-integrations/assets/05-reveal-model-folder-lmstudio.gif new file mode 100644 index 000000000..4c1ee85fc Binary files /dev/null and b/docs/docs/guides/07-integrations/assets/05-reveal-model-folder-lmstudio.gif differ diff --git a/docs/docs/guides/07-integrations/assets/05-setting-lmstudio-server.gif b/docs/docs/guides/07-integrations/assets/05-setting-lmstudio-server.gif new file mode 100644 index 000000000..63084be01 Binary files /dev/null and b/docs/docs/guides/07-integrations/assets/05-setting-lmstudio-server.gif differ diff --git a/docs/docs/guides/07-integrations/assets/06-ollama-integration-demo.gif b/docs/docs/guides/07-integrations/assets/06-ollama-integration-demo.gif new file mode 100644 index 000000000..708f2058a Binary files /dev/null and b/docs/docs/guides/07-integrations/assets/06-ollama-integration-demo.gif differ diff --git a/docs/docs/guides/07-integrations/assets/06-ollama-run.png b/docs/docs/guides/07-integrations/assets/06-ollama-run.png new file mode 100644 index 000000000..7f18e1b15 Binary files /dev/null and b/docs/docs/guides/07-integrations/assets/06-ollama-run.png differ diff --git a/docs/docs/guides/09-advanced-settings/README.mdx b/docs/docs/guides/09-advanced-settings/README.mdx index a02e44e1d..ba3da9bb1 100644 --- a/docs/docs/guides/09-advanced-settings/README.mdx +++ b/docs/docs/guides/09-advanced-settings/README.mdx @@ -16,6 +16,50 @@ keywords: ] --- -import DocCardList from "@theme/DocCardList"; +This guide will show you how to use the advanced settings in Jan. - +## Keyboard Shortcuts + +Keyboard shortcuts are a great way to speed up your workflow. Here are some of the keyboard shortcuts that you can use in Jan. + +| Combination | Description | +| --------------- | -------------------------------------------------- | +| `⌘ E` | Show list your models | +| `⌘ K` | Show list navigation pages | +| `⌘ B` | Toggle collapsible left panel | +| `⌘ ,` | Navigate to setting page | +| `Enter` | Send a message | +| `Shift + Enter` | Insert new line in input box | +| `Arrow Up` | Navigate to previous option (within search dialog) | +| `Arrow Down` | Navigate to next option (within search dialog) | + +

+ +:::note +`⌘` is the command key on macOS, and `Ctrl` on Windows. +::: + +## Experimental Mode + +Experimental mode allows you to enable experimental features that may be unstable tested. + +## Jan Data Folder + +The Jan data folder is the location where messages, model configurations, and other user data are placed. You can change the location of the data folder to a different location. + +![00-changing-folder](./assets/00-changing-folder.gif) + +## HTTPS Proxy & Ignore SSL Certificate + +HTTPS Proxy allows you to use a proxy server to connect to the internet. You can also ignore SSL certificates if you are using a self-signed certificate. +Please check out the guide on [how to set up your own HTTPS proxy server and configure Jan to use it](../advanced-settings/https-proxy) for more information. + +## Clear Logs + +Clear logs will remove all logs from the Jan application. + +## Reset To Factory Default + +Reset the application to its original state, deleting all your usage data, including model customizations and conversation history. This action is irreversible and recommended only if the application is in a corrupted state. + +![00-reset-factory-settings](./assets/00-reset-factory-settings.gif) diff --git a/docs/docs/guides/09-advanced-settings/assets/00-changing-folder.gif b/docs/docs/guides/09-advanced-settings/assets/00-changing-folder.gif new file mode 100644 index 000000000..ac280a5c3 Binary files /dev/null and b/docs/docs/guides/09-advanced-settings/assets/00-changing-folder.gif differ diff --git a/docs/docs/guides/09-advanced-settings/assets/00-reset-factory-settings.gif b/docs/docs/guides/09-advanced-settings/assets/00-reset-factory-settings.gif new file mode 100644 index 000000000..81760848d Binary files /dev/null and b/docs/docs/guides/09-advanced-settings/assets/00-reset-factory-settings.gif differ diff --git a/docs/docs/guides/09-advanced-settings/assets/01-https-proxy-jan-configure.png b/docs/docs/guides/09-advanced-settings/assets/01-https-proxy-jan-configure.png index e22dfb2cd..25e0f7660 100644 Binary files a/docs/docs/guides/09-advanced-settings/assets/01-https-proxy-jan-configure.png and b/docs/docs/guides/09-advanced-settings/assets/01-https-proxy-jan-configure.png differ diff --git a/electron/handlers/update.ts b/electron/handlers/update.ts index cfd738f78..0d8cc4cc0 100644 --- a/electron/handlers/update.ts +++ b/electron/handlers/update.ts @@ -11,7 +11,8 @@ export function handleAppUpdates() { /* New Update Available */ autoUpdater.on('update-available', async (_info: any) => { const action = await dialog.showMessageBox({ - message: `Update available. Do you want to download the latest update?`, + title: 'Update Available', + message: 'Would you like to download and install it now?', buttons: ['Download', 'Later'], }) if (action.response === 0) await autoUpdater.downloadUpdate() diff --git a/electron/main.ts b/electron/main.ts index 13e181cdf..de18b8f9d 100644 --- a/electron/main.ts +++ b/electron/main.ts @@ -83,6 +83,7 @@ function createMainWindow() { /* Enable dev tools for development */ if (!app.isPackaged) mainWindow.webContents.openDevTools() + log(`Version: ${app.getVersion()}`) } /** diff --git a/electron/package.json b/electron/package.json index deff3826a..a89803077 100644 --- a/electron/package.json +++ b/electron/package.json @@ -4,6 +4,7 @@ "main": "./build/main.js", "author": "Jan ", "license": "MIT", + "productName": "Jan", "homepage": "https://github.com/janhq/jan/tree/main/electron", "description": "Use offline LLMs with your own data. Run open source models like Llama2 or Falcon on your internal computers/servers.", "build": { diff --git a/electron/utils/menu.ts b/electron/utils/menu.ts index 4825991ee..893907c48 100644 --- a/electron/utils/menu.ts +++ b/electron/utils/menu.ts @@ -1,18 +1,41 @@ // @ts-nocheck -import { app, Menu, shell } from 'electron' -const isMac = process.platform === 'darwin' +import { app, Menu, shell, dialog } from 'electron' import { autoUpdater } from 'electron-updater' +import { log } from '@janhq/core/node' +const isMac = process.platform === 'darwin' const template: (Electron.MenuItemConstructorOptions | Electron.MenuItem)[] = [ { label: app.name, submenu: [ - { role: 'about' }, + { + label: `About ${app.name}`, + click: () => + dialog.showMessageBox({ + title: `Jan`, + message: `Jan Version v${app.getVersion()}\n\nCopyright © 2024 Jan`, + }), + }, { label: 'Check for Updates...', click: () => // Check for updates and notify user if there are any - autoUpdater.checkForUpdatesAndNotify(), + autoUpdater + .checkForUpdatesAndNotify() + .then((updateCheckResult) => { + if ( + !updateCheckResult?.updateInfo || + updateCheckResult?.updateInfo.version === app.getVersion() + ) { + dialog.showMessageBox({ + message: `No updates available.`, + }) + return + } + }) + .catch((error) => { + log('Error checking for updates:' + JSON.stringify(error)) + }), }, { type: 'separator' }, { role: 'services' }, diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat index 22e1c85b3..2ef3165c1 100644 --- a/extensions/inference-nitro-extension/download.bat +++ b/extensions/inference-nitro-extension/download.bat @@ -1,3 +1,3 @@ @echo off set /p NITRO_VERSION=<./bin/version.txt -.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu +.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan && .\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index b65cf445f..ba6b473eb 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -8,7 +8,7 @@ "license": "AGPL-3.0", "scripts": { "build": "tsc --module commonjs && rollup -c rollup.config.ts", - "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro", + "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro && download https://delta.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo", "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro", "downloadnitro:win32": "download.bat", "downloadnitro": "run-script-os", diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index b88501936..979b4cfac 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -88,7 +88,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension { const electronApi = window?.electronAPI this.inferenceUrl = INFERENCE_URL if (!electronApi) { - this.inferenceUrl = JAN_SERVER_INFERENCE_URL + this.inferenceUrl = `${window.core?.api?.baseApiUrl}/v1/chat/completions` } console.debug('Inference url: ', this.inferenceUrl) @@ -154,7 +154,10 @@ export default class JanInferenceNitroExtension extends InferenceExtension { }) if (nitroInitResult?.error) { - events.emit(ModelEvent.OnModelFail, model) + events.emit(ModelEvent.OnModelFail, { + ...model, + error: nitroInitResult.error, + }) return } diff --git a/extensions/inference-nitro-extension/src/node/nvidia.ts b/extensions/inference-nitro-extension/src/node/accelerator.ts similarity index 55% rename from extensions/inference-nitro-extension/src/node/nvidia.ts rename to extensions/inference-nitro-extension/src/node/accelerator.ts index 60468f0c9..972f88681 100644 --- a/extensions/inference-nitro-extension/src/node/nvidia.ts +++ b/extensions/inference-nitro-extension/src/node/accelerator.ts @@ -1,10 +1,11 @@ import { writeFileSync, existsSync, readFileSync } from 'fs' -import { exec } from 'child_process' +import { exec, spawn } from 'child_process' import path from 'path' -import { getJanDataFolderPath } from '@janhq/core/node' +import { getJanDataFolderPath, log } from '@janhq/core/node' /** * Default GPU settings + * TODO: This needs to be refactored to support multiple accelerators **/ const DEFALT_SETTINGS = { notify: true, @@ -21,12 +22,17 @@ const DEFALT_SETTINGS = { gpu_highest_vram: '', gpus_in_use: [], is_initial: true, + // TODO: This needs to be set based on user toggle in settings + vulkan: { + enabled: true, + gpu_in_use: '1', + }, } /** * Path to the settings file **/ -export const NVIDIA_INFO_FILE = path.join( +export const GPU_INFO_FILE = path.join( getJanDataFolderPath(), 'settings', 'settings.json' @@ -52,10 +58,10 @@ export async function updateNvidiaInfo() { if (process.platform !== 'darwin') { let data try { - data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8')) + data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8')) } catch (error) { data = DEFALT_SETTINGS - writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2)) + writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2)) } updateNvidiaDriverInfo() updateGpuInfo() @@ -79,7 +85,7 @@ export async function updateNvidiaDriverInfo(): Promise { exec( 'nvidia-smi --query-gpu=driver_version --format=csv,noheader', (error, stdout) => { - let data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8')) + let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8')) if (!error) { const firstLine = stdout.split('\n')[0].trim() @@ -89,7 +95,7 @@ export async function updateNvidiaDriverInfo(): Promise { data['nvidia_driver'].exist = false } - writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2)) + writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2)) Promise.resolve() } ) @@ -158,42 +164,77 @@ export function updateCudaExistence( * Get GPU information */ export async function updateGpuInfo(): Promise { - exec( - 'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits', - (error, stdout) => { - let data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8')) + let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8')) - if (!error) { - // Get GPU info and gpu has higher memory first - let highestVram = 0 - let highestVramId = '0' - let gpus = stdout - .trim() - .split('\n') - .map((line) => { - let [id, vram, name] = line.split(', ') - vram = vram.replace(/\r/g, '') - if (parseFloat(vram) > highestVram) { - highestVram = parseFloat(vram) - highestVramId = id - } - return { id, vram, name } - }) + // Cuda + if (data['vulkan'] === true) { + // Vulkan + exec( + process.platform === 'win32' + ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary` + : `${__dirname}/../bin/vulkaninfo --summary`, + (error, stdout) => { + if (!error) { + const output = stdout.toString() + log(output) + const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g - data.gpus = gpus - data.gpu_highest_vram = highestVramId - } else { - data.gpus = [] - data.gpu_highest_vram = '' + let gpus = [] + let match + while ((match = gpuRegex.exec(output)) !== null) { + const id = match[1] + const name = match[2] + gpus.push({ id, vram: 0, name }) + } + data.gpus = gpus + + if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) { + data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0'] + } + + data = updateCudaExistence(data) + writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2)) + } + Promise.resolve() } + ) + } else { + exec( + 'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits', + (error, stdout) => { + if (!error) { + log(stdout) + // Get GPU info and gpu has higher memory first + let highestVram = 0 + let highestVramId = '0' + let gpus = stdout + .trim() + .split('\n') + .map((line) => { + let [id, vram, name] = line.split(', ') + vram = vram.replace(/\r/g, '') + if (parseFloat(vram) > highestVram) { + highestVram = parseFloat(vram) + highestVramId = id + } + return { id, vram, name } + }) - if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) { - data.gpus_in_use = [data['gpu_highest_vram']] + data.gpus = gpus + data.gpu_highest_vram = highestVramId + } else { + data.gpus = [] + data.gpu_highest_vram = '' + } + + if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) { + data.gpus_in_use = [data['gpu_highest_vram']] + } + + data = updateCudaExistence(data) + writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2)) + Promise.resolve() } - - data = updateCudaExistence(data) - writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2)) - Promise.resolve() - } - ) + ) + } } diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-nitro-extension/src/node/execute.ts index 795c83ded..08baba0d5 100644 --- a/extensions/inference-nitro-extension/src/node/execute.ts +++ b/extensions/inference-nitro-extension/src/node/execute.ts @@ -1,10 +1,11 @@ import { readFileSync } from 'fs' import * as path from 'path' -import { NVIDIA_INFO_FILE } from './nvidia' +import { GPU_INFO_FILE } from './accelerator' export interface NitroExecutableOptions { executablePath: string cudaVisibleDevices: string + vkVisibleDevices: string } /** * Find which executable file to run based on the current platform. @@ -13,24 +14,30 @@ export interface NitroExecutableOptions { export const executableNitroFile = (): NitroExecutableOptions => { let binaryFolder = path.join(__dirname, '..', 'bin') // Current directory by default let cudaVisibleDevices = '' + let vkVisibleDevices = '' let binaryName = 'nitro' /** * The binary folder is different for each platform. */ if (process.platform === 'win32') { /** - * For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0 + * For Windows: win-cpu, win-vulkan, win-cuda-11-7, win-cuda-12-0 */ - let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8')) - if (nvidiaInfo['run_mode'] === 'cpu') { + let gpuInfo = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8')) + if (gpuInfo['run_mode'] === 'cpu') { binaryFolder = path.join(binaryFolder, 'win-cpu') } else { - if (nvidiaInfo['cuda'].version === '11') { + if (gpuInfo['cuda']?.version === '11') { binaryFolder = path.join(binaryFolder, 'win-cuda-11-7') } else { binaryFolder = path.join(binaryFolder, 'win-cuda-12-0') } - cudaVisibleDevices = nvidiaInfo['gpus_in_use'].join(',') + cudaVisibleDevices = gpuInfo['gpus_in_use'].join(',') + } + if (gpuInfo['vulkan'] === true) { + binaryFolder = path.join(__dirname, '..', 'bin') + binaryFolder = path.join(binaryFolder, 'win-vulkan') + vkVisibleDevices = gpuInfo['gpus_in_use'].toString() } binaryName = 'nitro.exe' } else if (process.platform === 'darwin') { @@ -44,22 +51,29 @@ export const executableNitroFile = (): NitroExecutableOptions => { } } else { /** - * For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0 + * For Linux: linux-cpu, linux-vulkan, linux-cuda-11-7, linux-cuda-12-0 */ - let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8')) - if (nvidiaInfo['run_mode'] === 'cpu') { + let gpuInfo = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8')) + if (gpuInfo['run_mode'] === 'cpu') { binaryFolder = path.join(binaryFolder, 'linux-cpu') } else { - if (nvidiaInfo['cuda'].version === '11') { + if (gpuInfo['cuda']?.version === '11') { binaryFolder = path.join(binaryFolder, 'linux-cuda-11-7') } else { binaryFolder = path.join(binaryFolder, 'linux-cuda-12-0') } - cudaVisibleDevices = nvidiaInfo['gpus_in_use'].join(',') + cudaVisibleDevices = gpuInfo['gpus_in_use'].join(',') + } + + if (gpuInfo['vulkan'] === true) { + binaryFolder = path.join(__dirname, '..', 'bin') + binaryFolder = path.join(binaryFolder, 'win-vulkan') + vkVisibleDevices = gpuInfo['gpus_in_use'].toString() } } return { executablePath: path.join(binaryFolder, binaryName), cudaVisibleDevices, + vkVisibleDevices, } } diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts index 25f571c81..9b2684a6c 100644 --- a/extensions/inference-nitro-extension/src/node/index.ts +++ b/extensions/inference-nitro-extension/src/node/index.ts @@ -4,7 +4,7 @@ import { ChildProcessWithoutNullStreams, spawn } from 'child_process' import tcpPortUsed from 'tcp-port-used' import fetchRT from 'fetch-retry' import { log, getSystemResourceInfo } from '@janhq/core/node' -import { getNitroProcessInfo, updateNvidiaInfo } from './nvidia' +import { getNitroProcessInfo, updateNvidiaInfo } from './accelerator' import { Model, InferenceEngine, @@ -310,9 +310,15 @@ async function killSubprocess(): Promise { subprocess?.kill() subprocess = undefined }) - .catch(() => {}) + .catch(() => {}) // Do nothing with this attempt .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000)) .then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) + .catch((err) => { + log( + `[NITRO]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}` + ) + throw 'PORT_NOT_AVAILABLE' + }) } /** @@ -339,6 +345,10 @@ function spawnNitroProcess(): Promise { env: { ...process.env, CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, + // Vulkan - Support 1 device at a time for now + ...(executableOptions.vkVisibleDevices?.length > 0 && { + GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], + }), }, } ) diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json index 5d1674007..e99122bcf 100644 --- a/extensions/model-extension/package.json +++ b/extensions/model-extension/package.json @@ -1,6 +1,6 @@ { "name": "@janhq/model-extension", - "version": "1.0.23", + "version": "1.0.25", "description": "Model Management Extension provides model exploration and seamless downloads", "main": "dist/index.js", "module": "dist/module.js", diff --git a/extensions/monitoring-extension/src/module.ts b/extensions/monitoring-extension/src/module.ts index ea7319b47..27781a5d6 100644 --- a/extensions/monitoring-extension/src/module.ts +++ b/extensions/monitoring-extension/src/module.ts @@ -37,10 +37,10 @@ const getCurrentLoad = () => } if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) { const gpuIds = data['gpus_in_use'].join(',') - if (gpuIds !== '') { + if (gpuIds !== '' && data['vulkan'] !== true) { exec( `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`, - (error, stdout, stderr) => { + (error, stdout, _) => { if (error) { console.error(`exec error: ${error}`) reject(error) diff --git a/models/dolphin-phi-2/model.json b/models/dolphin-phi-2/model.json new file mode 100644 index 000000000..c25ff8f69 --- /dev/null +++ b/models/dolphin-phi-2/model.json @@ -0,0 +1,32 @@ +{ + "sources": [ + { + "url": "https://huggingface.co/TheBloke/dolphin-2_6-phi-2-GGUF/resolve/main/dolphin-2_6-phi-2.Q8_0.gguf", + "filename": "dolphin-2_6-phi-2.Q8_0.gguf" + } + ], + "id": "dolphin-phi-2", + "object": "model", + "name": "Dolphin Phi-2 2.7B Q8", + "version": "1.0", + "description": "Dolphin Phi-2 is a 2.7B model, fine-tuned for chat, excelling in common sense and logical reasoning benchmarks.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "dolphin-2_6-phi-2.Q8_0.gguf" + }, + "parameters": { + "max_tokens": 4096, + "stop": ["<|im_end|>"] + }, + "metadata": { + "author": "Cognitive Computations, Microsoft", + "tags": [ + "3B", + "Finetuned" + ], + "size": 2960000000 + }, + "engine": "nitro" + } diff --git a/models/llamacorn-1.1b/model.json b/models/llamacorn-1.1b/model.json new file mode 100644 index 000000000..056fb9050 --- /dev/null +++ b/models/llamacorn-1.1b/model.json @@ -0,0 +1,37 @@ +{ + "sources": [ + { + "url":"https://huggingface.co/janhq/llamacorn-1.1b-chat-GGUF/resolve/main/llamacorn-1.1b-chat.Q8_0.gguf", + "filename": "llamacorn-1.1b-chat.Q8_0.gguf" + } + ], + "id": "llamacorn-1.1b", + "object": "model", + "name": "LlamaCorn 1.1B Q8", + "version": "1.0", + "description": "LlamaCorn is designed to improve chat functionality from TinyLlama.", + "format": "gguf", + "settings": { + "ctx_len": 2048, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "llamacorn-1.1b-chat.Q8_0.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 2048, + "stop": [], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "Jan", + "tags": [ + "Tiny", + "Finetuned" + ], + "size": 1170000000 + }, + "engine": "nitro" + } \ No newline at end of file diff --git a/models/mistral-ins-7b-q4/model.json b/models/mistral-ins-7b-q4/model.json index bfdaffa90..75e0cbf9f 100644 --- a/models/mistral-ins-7b-q4/model.json +++ b/models/mistral-ins-7b-q4/model.json @@ -29,7 +29,7 @@ "author": "MistralAI, The Bloke", "tags": ["Featured", "7B", "Foundational Model"], "size": 4370000000, - "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png" + "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png" }, "engine": "nitro" } diff --git a/models/openchat-3.5-7b/model.json b/models/openchat-3.5-7b/model.json index 294f7d269..18db33f8e 100644 --- a/models/openchat-3.5-7b/model.json +++ b/models/openchat-3.5-7b/model.json @@ -1,8 +1,8 @@ { "sources": [ { - "filename": "openchat-3.5-1210.Q4_K_M.gguf", - "url": "https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q4_K_M.gguf" + "filename": "openchat-3.5-0106.Q4_K_M.gguf", + "url": "https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF/resolve/main/openchat-3.5-0106.Q4_K_M.gguf" } ], "id": "openchat-3.5-7b", @@ -14,7 +14,7 @@ "settings": { "ctx_len": 4096, "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:", - "llama_model_path": "openchat-3.5-1210.Q4_K_M.gguf" + "llama_model_path": "openchat-3.5-0106.Q4_K_M.gguf" }, "parameters": { "temperature": 0.7, diff --git a/models/openhermes-neural-7b/model.json b/models/openhermes-neural-7b/model.json index 87e1df143..a13a0f2b8 100644 --- a/models/openhermes-neural-7b/model.json +++ b/models/openhermes-neural-7b/model.json @@ -28,7 +28,7 @@ "author": "Intel, Jan", "tags": ["7B", "Merged", "Featured"], "size": 4370000000, - "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/openhermes-neural-7b/cover.png" + "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/openhermes-neural-7b/cover.png" }, "engine": "nitro" } diff --git a/models/stable-zephyr-3b/model.json b/models/stable-zephyr-3b/model.json new file mode 100644 index 000000000..724299ea5 --- /dev/null +++ b/models/stable-zephyr-3b/model.json @@ -0,0 +1,34 @@ +{ + "sources": [ + { + "url": "https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/stablelm-zephyr-3b.Q8_0.gguf", + "filename": "stablelm-zephyr-3b.Q8_0.gguf" + } + ], + "id": "stable-zephyr-3b", + "object": "model", + "name": "Stable Zephyr 3B Q8", + "version": "1.0", + "description": "StableLM Zephyr 3B is trained for safe and reliable chatting.", + "format": "gguf", + "settings": { + "ctx_len": 4096, + "prompt_template": "<|user|>\n{prompt}<|endoftext|>\n<|assistant|>", + "llama_model_path": "stablelm-zephyr-3b.Q8_0.gguf" + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 4096, + "stop": ["<|endoftext|>"], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "StabilityAI", + "tags": ["3B", "Finetuned"], + "size": 2970000000 + }, + "engine": "nitro" + } \ No newline at end of file diff --git a/models/trinity-v1.2-7b/model.json b/models/trinity-v1.2-7b/model.json index 2dda120e6..947629642 100644 --- a/models/trinity-v1.2-7b/model.json +++ b/models/trinity-v1.2-7b/model.json @@ -28,7 +28,7 @@ "author": "Jan", "tags": ["7B", "Merged", "Featured"], "size": 4370000000, - "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png" + "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png" }, "engine": "nitro" } diff --git a/models/yarn-mistral-7b/model.json b/models/yarn-mistral-7b/model.json deleted file mode 100644 index ee6de1319..000000000 --- a/models/yarn-mistral-7b/model.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "sources": [ - { - "url": "https://huggingface.co/TheBloke/Yarn-Mistral-7B-128k-GGUF/resolve/main/yarn-mistral-7b-128k.Q4_K_M.gguf" - } - ], - "id": "yarn-mistral-7b", - "object": "model", - "name": "Yarn Mistral 7B Q4", - "version": "1.0", - "description": "Yarn Mistral 7B is a language model for long context and supports a 128k token context window.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "{prompt}" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "NousResearch, The Bloke", - "tags": ["7B", "Finetuned"], - "size": 4370000000 - }, - "engine": "nitro" -} diff --git a/server/helpers/setup.ts b/server/helpers/setup.ts index e6fab7a25..7d8f8914a 100644 --- a/server/helpers/setup.ts +++ b/server/helpers/setup.ts @@ -31,6 +31,32 @@ export async function setup() { 'utf-8' ) + if (!existsSync(join(appDir, 'settings'))) { + console.debug('Writing nvidia config file...') + mkdirSync(join(appDir, 'settings')) + writeFileSync( + join(appDir, 'settings', 'settings.json'), + JSON.stringify( + { + notify: true, + run_mode: 'cpu', + nvidia_driver: { + exist: false, + version: '', + }, + cuda: { + exist: false, + version: '', + }, + gpus: [], + gpu_highest_vram: '', + gpus_in_use: [], + is_initial: true, + }), + 'utf-8' + ) + } + /** * Install extensions */ diff --git a/server/index.ts b/server/index.ts index 8e63eb46a..98cc8385d 100644 --- a/server/index.ts +++ b/server/index.ts @@ -7,6 +7,7 @@ import { getJanExtensionsPath, } from '@janhq/core/node' import { join } from 'path' +import tcpPortUsed from 'tcp-port-used' // Load environment variables dotenv.config() @@ -46,6 +47,15 @@ export interface ServerConfig { * @param configs - Server configurations */ export const startServer = async (configs?: ServerConfig): Promise => { + if (configs?.port && configs?.host) { + const inUse = await tcpPortUsed.check(Number(configs.port), configs.host) + if (inUse) { + const errorMessage = `Port ${configs.port} is already in use.` + logServer(errorMessage) + throw new Error(errorMessage) + } + } + // Update server settings isVerbose = configs?.isVerboseEnabled ?? true hostSetting = configs?.host ?? JAN_API_HOST diff --git a/server/package.json b/server/package.json index a7cc09b4f..d9a2bbc9a 100644 --- a/server/package.json +++ b/server/package.json @@ -27,19 +27,20 @@ "@npmcli/arborist": "^7.3.1", "dotenv": "^16.3.1", "fastify": "^4.24.3", - "request": "^2.88.2", "fetch-retry": "^5.0.6", - "tcp-port-used": "^1.0.2", - "request-progress": "^3.0.0" + "node-fetch": "2", + "request": "^2.88.2", + "request-progress": "^3.0.0", + "tcp-port-used": "^1.0.2" }, "devDependencies": { "@types/body-parser": "^1.19.5", "@types/npmcli__arborist": "^5.6.4", + "@types/tcp-port-used": "^1.0.4", "@typescript-eslint/eslint-plugin": "^6.7.3", "@typescript-eslint/parser": "^6.7.3", "eslint-plugin-react": "^7.33.2", "run-script-os": "^1.1.6", - "@types/tcp-port-used": "^1.0.4", "typescript": "^5.2.2" } } diff --git a/web/containers/CardSidebar/index.tsx b/web/containers/CardSidebar/index.tsx index f500141f0..89ff60e66 100644 --- a/web/containers/CardSidebar/index.tsx +++ b/web/containers/CardSidebar/index.tsx @@ -22,6 +22,7 @@ interface Props { rightAction?: ReactNode title: string asChild?: boolean + isShow?: boolean hideMoreVerticalAction?: boolean } export default function CardSidebar({ @@ -30,8 +31,9 @@ export default function CardSidebar({ asChild, rightAction, hideMoreVerticalAction, + isShow, }: Props) { - const [show, setShow] = useState(false) + const [show, setShow] = useState(isShow ?? false) const [more, setMore] = useState(false) const [menu, setMenu] = useState(null) const [toggle, setToggle] = useState(null) @@ -67,8 +69,8 @@ export default function CardSidebar({ show && 'rotate-180' )} /> + {title} - {title}
{rightAction && rightAction} diff --git a/web/containers/DropdownListSidebar/index.tsx b/web/containers/DropdownListSidebar/index.tsx index 75bbe073c..191c7bcbe 100644 --- a/web/containers/DropdownListSidebar/index.tsx +++ b/web/containers/DropdownListSidebar/index.tsx @@ -195,7 +195,12 @@ const DropdownListSidebar = ({ - +
    {engineOptions.map((name, i) => { diff --git a/web/containers/Layout/BottomBar/index.tsx b/web/containers/Layout/BottomBar/index.tsx index d6a7c05d2..5d6b7e79c 100644 --- a/web/containers/Layout/BottomBar/index.tsx +++ b/web/containers/Layout/BottomBar/index.tsx @@ -152,7 +152,7 @@ const BottomBar = () => { {gpus.length > 0 && ( -
    +
    { if (mainViewState === state) return if (serverEnabled && state === MainViewState.Thread) return setMainViewState(state) + setEditMessage('') } const primaryMenus = [ diff --git a/web/containers/OpenAiKeyInput/index.tsx b/web/containers/OpenAiKeyInput/index.tsx index 444c8074f..7ef97cf38 100644 --- a/web/containers/OpenAiKeyInput/index.tsx +++ b/web/containers/OpenAiKeyInput/index.tsx @@ -30,7 +30,7 @@ const OpenAiKeyInput: React.FC = () => { } return ( -
    +