diff --git a/.github/scripts/rename-app-beta.sh b/.github/scripts/rename-app-beta.sh deleted file mode 100644 index a12d1d635..000000000 --- a/.github/scripts/rename-app-beta.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash - -# Check if the correct number of arguments is provided -if [ "$#" -ne 1 ]; then - echo "Usage: $0 " - exit 1 -fi - -INPUT_JSON_FILE="$1" - -# Check if the input file exists -if [ ! -f "$INPUT_JSON_FILE" ]; then - echo "Input file not found: $INPUT_JSON_FILE" - exit 1 -fi - -# Use jq to transform the content -jq ' - .name = "jan-beta" | - .productName = "Jan-beta" | - .build.appId = "jan-beta.ai.app" | - .build.productName = "Jan-beta" | - .build.appId = "jan-beta.ai.app" | - .build.protocols[0].name = "Jan-beta" | - .build.protocols[0].schemes = ["jan-beta"] | - .build.artifactName = "jan-beta-${os}-${arch}-${version}.${ext}" | - .build.publish[0].channel = "beta" -' "$INPUT_JSON_FILE" > ./package.json.tmp - -cat ./package.json.tmp - -rm $INPUT_JSON_FILE -mv ./package.json.tmp $INPUT_JSON_FILE - -# Update the layout file -LAYOUT_FILE_PATH="web/app/layout.tsx" - -if [ ! -f "$LAYOUT_FILE_PATH" ]; then - echo "File does not exist: $LAYOUT_FILE_PATH" - exit 1 -fi - -# Perform the replacements -sed -i -e "s#Jan#Jan-beta#g" "$LAYOUT_FILE_PATH" - -# Notify completion -echo "File has been updated: $LAYOUT_FILE_PATH" \ No newline at end of file diff --git a/.github/scripts/rename-app.sh b/.github/scripts/rename-app.sh new file mode 100644 index 000000000..7c2ad6ef3 --- /dev/null +++ b/.github/scripts/rename-app.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# Check if the correct number of arguments is provided +if [ "$#" -ne 2 ]; then + echo "Usage: $0 " + exit 1 +fi + +INPUT_JSON_FILE="$1" + +CHANNEL="$2" + +if [ "$CHANNEL" == "nightly" ]; then + UPDATER="latest" +else + UPDATER="beta" +fi + +# Check if the input file exists +if [ ! -f "$INPUT_JSON_FILE" ]; then + echo "Input file not found: $INPUT_JSON_FILE" + exit 1 +fi + +# Use jq to transform the content +jq --arg channel "$CHANNEL" --arg updater "$UPDATER" ' + .name = "jan-\($channel)" | + .productName = "Jan-\($channel)" | + .build.appId = "jan-\($channel).ai.app" | + .build.productName = "Jan-\($channel)" | + .build.appId = "jan-\($channel).ai.app" | + .build.protocols[0].name = "Jan-\($channel)" | + .build.protocols[0].schemes = ["jan-\($channel)"] | + .build.artifactName = "jan-\($channel)-${os}-${arch}-${version}.${ext}" | + .build.publish[0].channel = $updater +' "$INPUT_JSON_FILE" > ./package.json.tmp + +cat ./package.json.tmp + +rm $INPUT_JSON_FILE +mv ./package.json.tmp $INPUT_JSON_FILE + +# Update the layout file +LAYOUT_FILE_PATH="web/app/layout.tsx" + +if [ ! -f "$LAYOUT_FILE_PATH" ]; then + echo "File does not exist: $LAYOUT_FILE_PATH" + exit 1 +fi + +# Perform the replacements +sed -i -e "s#Jan#Jan-$CHANNEL#g" "$LAYOUT_FILE_PATH" + +# Notify completion +echo "File has been updated: $LAYOUT_FILE_PATH" diff --git a/.github/scripts/rename-uninstaller-beta.sh b/.github/scripts/rename-uninstaller.sh similarity index 61% rename from .github/scripts/rename-uninstaller-beta.sh rename to .github/scripts/rename-uninstaller.sh index c322825da..7d3992fd0 100644 --- a/.github/scripts/rename-uninstaller-beta.sh +++ b/.github/scripts/rename-uninstaller.sh @@ -3,6 +3,14 @@ # File path to be modified FILE_PATH="electron/scripts/uninstaller.nsh" +# Check if the correct number of arguments is provided +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +CHANNEL="$1" + # Check if the file exists if [ ! -f "$FILE_PATH" ]; then echo "File does not exist: $FILE_PATH" @@ -10,7 +18,7 @@ if [ ! -f "$FILE_PATH" ]; then fi # Perform the replacements -sed -i -e "s#jan#jan-beta#g" "$FILE_PATH" +sed -i -e "s#jan#jan-$CHANNEL#g" "$FILE_PATH" # Notify completion echo "File has been updated: $FILE_PATH" \ No newline at end of file diff --git a/.github/scripts/rename-workspace-beta.sh b/.github/scripts/rename-workspace.sh similarity index 74% rename from .github/scripts/rename-workspace-beta.sh rename to .github/scripts/rename-workspace.sh index 6286d1889..420042e2c 100644 --- a/.github/scripts/rename-workspace-beta.sh +++ b/.github/scripts/rename-workspace.sh @@ -3,6 +3,8 @@ # File path to be modified FILE_PATH="$1" +CHANNEL="$2" + # Check if the file exists if [ ! -f "$FILE_PATH" ]; then echo "File does not exist: $FILE_PATH" @@ -10,7 +12,7 @@ if [ ! -f "$FILE_PATH" ]; then fi # Perform the replacements -sed -i -e 's/yarn workspace jan/yarn workspace jan-beta/g' "$FILE_PATH" +sed -i -e "s/yarn workspace jan/yarn workspace jan-$CHANNEL/g" "$FILE_PATH" # Notify completion echo "File has been updated: $FILE_PATH" \ No newline at end of file diff --git a/.github/workflows/jan-docs-new-release.yaml b/.github/workflows/jan-docs-new-release.yaml index 2acca92de..a8e94b6d7 100644 --- a/.github/workflows/jan-docs-new-release.yaml +++ b/.github/workflows/jan-docs-new-release.yaml @@ -58,6 +58,6 @@ jobs: accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }} directory: ./docs/out - branch: dev + branch: main # Optional: Enable this if you want to have GitHub Deployments triggered gitHubToken: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/jan-docs.yml b/.github/workflows/jan-docs.yml index ada038e83..9b5fd97f1 100644 --- a/.github/workflows/jan-docs.yml +++ b/.github/workflows/jan-docs.yml @@ -83,6 +83,6 @@ jobs: accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }} directory: ./docs/out - branch: dev + branch: main # Optional: Enable this if you want to have GitHub Deployments triggered gitHubToken: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/jan-electron-build-beta.yml b/.github/workflows/jan-electron-build-beta.yml index 4f2625266..67496b355 100644 --- a/.github/workflows/jan-electron-build-beta.yml +++ b/.github/workflows/jan-electron-build-beta.yml @@ -133,4 +133,26 @@ jobs: run: | gh release edit v${{ needs.create-draft-release.outputs.version }} --draft=false --prerelease env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + noti-discord-and-update-url-readme: + needs: [build-macos-x64, build-macos-arm64, create-draft-release, build-windows-x64, build-linux-x64, combine-beta-mac-yml] + runs-on: ubuntu-latest + steps: + - name: Set version to environment variable + run: | + echo "VERSION=${{ needs.create-draft-release.outputs.version }}" >> $GITHUB_ENV + + - name: Notify Discord + uses: Ilshidur/action-discord@master + with: + args: | + Jan-beta App version {{ VERSION }}, has been released, use the following links to download the app with faster speed or visit the Github release page for more information: + - Windows: https://delta.jan.ai/beta/jan-beta-win-x64-{{ VERSION }}.exe + - macOS Intel: https://delta.jan.ai/beta/jan-beta-mac-x64-{{ VERSION }}.dmg + - macOS Apple Silicon: https://delta.jan.ai/beta/jan-beta-mac-arm64-{{ VERSION }}.dmg + - Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb + - Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage + - Github Release URL: https://github.com/janhq/jan/releases/tag/v{{ VERSION }} + env: + DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }} \ No newline at end of file diff --git a/.github/workflows/jan-electron-linter-and-test.yml b/.github/workflows/jan-electron-linter-and-test.yml index 4e20d6c5f..b2105acb4 100644 --- a/.github/workflows/jan-electron-linter-and-test.yml +++ b/.github/workflows/jan-electron-linter-and-test.yml @@ -319,6 +319,13 @@ jobs: # TURBO_TEAM: 'linux' # TURBO_TOKEN: '${{ secrets.TURBO_TOKEN }}' + - uses: actions/upload-artifact@v4 + if: always() + with: + name: playwright-report + path: electron/playwright-report/ + retention-days: 2 + coverage-check: runs-on: [self-hosted, Linux, ubuntu-desktop] needs: base_branch_cov diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux-x64.yml index 496d153ae..92188c364 100644 --- a/.github/workflows/template-build-linux-x64.yml +++ b/.github/workflows/template-build-linux-x64.yml @@ -60,18 +60,25 @@ jobs: mv /tmp/package.json electron/package.json jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json mv /tmp/package.json web/package.json - jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json + jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json mv /tmp/package.json electron/package.json cat electron/package.json + # chmod +x .github/scripts/rename-app.sh + # .github/scripts/rename-app.sh ./electron/package.json nightly + # chmod +x .github/scripts/rename-workspace.sh + # .github/scripts/rename-workspace.sh ./package.json nightly + # echo "------------------------" + # cat ./electron/package.json + # echo "------------------------" - name: Change App Name for beta version if: inputs.beta == true shell: bash run: | - chmod +x .github/scripts/rename-app-beta.sh - .github/scripts/rename-app-beta.sh ./electron/package.json - chmod +x .github/scripts/rename-workspace-beta.sh - .github/scripts/rename-workspace-beta.sh ./package.json + chmod +x .github/scripts/rename-app.sh + .github/scripts/rename-app.sh ./electron/package.json beta + chmod +x .github/scripts/rename-workspace.sh + .github/scripts/rename-workspace.sh ./package.json beta echo "------------------------" cat ./electron/package.json echo "------------------------" diff --git a/.github/workflows/template-build-macos-arm64.yml b/.github/workflows/template-build-macos-arm64.yml index 40cdda627..a23e34cf9 100644 --- a/.github/workflows/template-build-macos-arm64.yml +++ b/.github/workflows/template-build-macos-arm64.yml @@ -72,22 +72,29 @@ jobs: jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json mv /tmp/package.json web/package.json - jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json + jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json mv /tmp/package.json electron/package.json jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json mv /tmp/package.json electron/package.json - cat electron/package.json + # cat electron/package.json + # chmod +x .github/scripts/rename-app.sh + # .github/scripts/rename-app.sh ./electron/package.json nightly + # chmod +x .github/scripts/rename-workspace.sh + # .github/scripts/rename-workspace.sh ./package.json nightly + # echo "------------------------" + # cat ./electron/package.json + # echo "------------------------" - name: Change App Name for beta version if: inputs.beta == true shell: bash run: | - chmod +x .github/scripts/rename-app-beta.sh - .github/scripts/rename-app-beta.sh ./electron/package.json - chmod +x .github/scripts/rename-workspace-beta.sh - .github/scripts/rename-workspace-beta.sh ./package.json + chmod +x .github/scripts/rename-app.sh + .github/scripts/rename-app.sh ./electron/package.json beta + chmod +x .github/scripts/rename-workspace.sh + .github/scripts/rename-workspace.sh ./package.json beta echo "------------------------" cat ./electron/package.json echo "------------------------" @@ -186,7 +193,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: jan-mac-arm64-${{ inputs.new_version }} - path: ./electron/dist/jan-mac-arm64-${{ inputs.new_version }}.dmg + path: ./electron/dist/*.dmg - name: Upload Artifact if: inputs.beta == false diff --git a/.github/workflows/template-build-macos-x64.yml b/.github/workflows/template-build-macos-x64.yml index f139797af..18309fca0 100644 --- a/.github/workflows/template-build-macos-x64.yml +++ b/.github/workflows/template-build-macos-x64.yml @@ -72,22 +72,29 @@ jobs: jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json mv /tmp/package.json web/package.json - jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json + jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json mv /tmp/package.json electron/package.json jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json mv /tmp/package.json electron/package.json - cat electron/package.json + # cat electron/package.json + # chmod +x .github/scripts/rename-app.sh + # .github/scripts/rename-app.sh ./electron/package.json nightly + # chmod +x .github/scripts/rename-workspace.sh + # .github/scripts/rename-workspace.sh ./package.json nightly + # echo "------------------------" + # cat ./electron/package.json + # echo "------------------------" - name: Change App Name for beta version if: inputs.beta == true shell: bash run: | - chmod +x .github/scripts/rename-app-beta.sh - .github/scripts/rename-app-beta.sh ./electron/package.json - chmod +x .github/scripts/rename-workspace-beta.sh - .github/scripts/rename-workspace-beta.sh ./package.json + chmod +x .github/scripts/rename-app.sh + .github/scripts/rename-app.sh ./electron/package.json beta + chmod +x .github/scripts/rename-workspace.sh + .github/scripts/rename-workspace.sh ./package.json beta echo "------------------------" cat ./electron/package.json echo "------------------------" @@ -186,7 +193,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: jan-mac-x64-${{ inputs.new_version }} - path: ./electron/dist/jan-mac-x64-${{ inputs.new_version }}.dmg + path: ./electron/dist/*.dmg - name: Upload Artifact if: inputs.beta == false diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml index ffe94fecc..2a1d3f15b 100644 --- a/.github/workflows/template-build-windows-x64.yml +++ b/.github/workflows/template-build-windows-x64.yml @@ -73,23 +73,35 @@ jobs: jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json mv /tmp/package.json web/package.json - jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json + jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json mv /tmp/package.json electron/package.json jq '.build.win.sign = "./sign.js"' electron/package.json > /tmp/package.json mv /tmp/package.json electron/package.json cat electron/package.json + # chmod +x .github/scripts/rename-app.sh + # .github/scripts/rename-app.sh ./electron/package.json nightly + # chmod +x .github/scripts/rename-workspace.sh + # .github/scripts/rename-workspace.sh ./package.json nightly + # chmod +x .github/scripts/rename-uninstaller.sh + # .github/scripts/rename-uninstaller.sh nightly + # echo "------------------------" + # cat ./electron/package.json + # echo "------------------------" + # cat ./package.json + # echo "------------------------" + - name: Change App Name for beta version if: inputs.beta == true shell: bash run: | - chmod +x .github/scripts/rename-app-beta.sh - .github/scripts/rename-app-beta.sh ./electron/package.json - chmod +x .github/scripts/rename-workspace-beta.sh - .github/scripts/rename-workspace-beta.sh ./package.json - chmod +x .github/scripts/rename-uninstaller-beta.sh - .github/scripts/rename-uninstaller-beta.sh + chmod +x .github/scripts/rename-app.sh + .github/scripts/rename-app.sh ./electron/package.json beta + chmod +x .github/scripts/rename-workspace.sh + .github/scripts/rename-workspace.sh ./package.json beta + chmod +x .github/scripts/rename-uninstaller.sh + .github/scripts/rename-uninstaller.sh beta echo "------------------------" cat ./electron/package.json echo "------------------------" diff --git a/.husky/pre-commit b/.husky/pre-commit index a4aa5add4..53c4e577e 100644 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -1 +1 @@ -npm run lint --fix \ No newline at end of file +npx oxlint@latest --fix \ No newline at end of file diff --git a/JanBanner.png b/JanBanner.png new file mode 100644 index 000000000..165831871 Binary files /dev/null and b/JanBanner.png differ diff --git a/README.md b/README.md index ecede5bd9..043960537 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# Jan - Turn your computer into an AI computer +# Jan - Local AI Assistant -![Jan banner](https://github.com/janhq/jan/assets/89722390/35daac7d-b895-487c-a6ac-6663daaad78e) +![Jan banner](./JanBanner.png)

@@ -12,18 +12,22 @@

- Getting Started + Getting Started - Docs - Changelog - Bug reports - Discord

-> [!Warning] >**Jan is currently in Development**: Expect breaking changes and bugs! +

+⚠️ Jan is currently in Development: Expect breaking changes and bugs! +

-Jan is an open-source ChatGPT alternative that runs 100% offline on your computer. -**Jan runs on any hardware.** From PCs to multi-GPU clusters, Jan supports universal architectures: +Jan is a ChatGPT-alternative that runs 100% offline on your device. Our goal is to make it easy for a layperson to download and run LLMs and use AI with **full control** and **privacy**. + +Jan is powered by [Cortex](https://github.com/janhq/cortex.cpp), our embeddable local AI engine that runs on any hardware. +From PCs to multi-GPU clusters, Jan & Cortex supports universal architectures: - [x] NVIDIA GPUs (fast) - [x] Apple M-series (fast) @@ -31,6 +35,12 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute - [x] Linux Debian - [x] Windows x64 +#### Features: +- [Model Library](https://jan.ai/docs/models/manage-models#add-models) with popular LLMs like Llama, Gemma, Mistral, or Qwen +- Connect to [Remote AI APIs](https://jan.ai/docs/remote-models/openai) like Groq and OpenRouter +- Local API Server with OpenAI-equivalent API +- [Extensions](https://jan.ai/docs/extensions) for customizing Jan + ## Download @@ -74,7 +84,40 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute - + + + + + + + + +
Experimental (Nightly Build)Beta (Preview) + + + jan.exe + + + + + Intel + + + + + M1/M2/M3/M4 + + + + + jan.deb + + + + + jan.AppImage + +
Nightly Build (Experimental) @@ -108,79 +151,64 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
-Download the latest version of Jan at https://jan.ai/ or visit the **[GitHub Releases](https://github.com/janhq/jan/releases)** to download any previous release. +Download the latest version of Jan at https://jan.ai/ or visit the [GitHub Releases](https://github.com/janhq/jan/releases) to download any previous release. ## Demo -![Demo](/demo.gif) +https://github.com/user-attachments/assets/c3592fa2-c504-4d9d-a885-7e00122a50f3 -_Realtime Video: Jan v0.4.3-nightly on a Mac M1, 16GB Sonoma 14_ +*Real-time Video: Jan v0.5.7 on a Mac M2, 16GB Sonoma 14.2* ## Quicklinks -#### Jan +### Jan -- [Jan website](https://jan.ai/) +- [Jan Website](https://jan.ai/) - [Jan GitHub](https://github.com/janhq/jan) -- [User Guides](https://jan.ai/guides/) -- [Developer docs](https://jan.ai/developer/) -- [API reference](https://jan.ai/api-reference/) -- [Specs](https://jan.ai/docs/) +- [Documentation](https://jan.ai/docs) +- [Jan Changelog](https://jan.ai/changelog) +- [Jan Blog](https://jan.ai/blog) -#### Nitro +### Cortex.cpp +Jan is powered by **Cortex.cpp**. It is a C++ command-line interface (CLI) designed as an alternative to [Ollama](https://ollama.com/). By default, it runs on the llama.cpp engine but also supports other engines, including ONNX and TensorRT-LLM, making it a multi-engine platform. -Nitro is a high-efficiency C++ inference engine for edge computing. It is lightweight and embeddable, and can be used on its own within your own projects. - -- [Nitro Website](https://nitro.jan.ai) -- [Nitro GitHub](https://github.com/janhq/nitro) -- [Documentation](https://nitro.jan.ai/docs) -- [API Reference](https://nitro.jan.ai/api-reference) - -## Troubleshooting - -As Jan is in development mode, you might get stuck on a broken build. - -To reset your installation: - -1. Use the following commands to remove any dangling backend processes: - - ```sh - ps aux | grep nitro - ``` - - Look for processes like "nitro" and "nitro_arm_64," and kill them one by one with: - - ```sh - kill -9 - ``` - -2. **Remove Jan from your Applications folder and Cache folder** - - ```bash - make clean - ``` - - This will remove all build artifacts and cached files: - - - Delete Jan extension from your `~/jan/extensions` folder - - Delete all `node_modules` in current folder - - Clear Application cache in `~/Library/Caches/jan` +- [Cortex Website](https://cortex.so/) +- [Cortex GitHub](https://github.com/janhq/cortex.cpp) +- [Documentation](https://cortex.so/docs/) +- [Models Library](https://cortex.so/models) +- API Reference: *Under development* + ## Requirements for running Jan -- MacOS: 13 or higher -- Windows: +- **MacOS**: 13 or higher +- **Windows**: - Windows 10 or higher - To enable GPU support: - Nvidia GPU with CUDA Toolkit 11.7 or higher - Nvidia driver 470.63.01 or higher -- Linux: +- **Linux**: - glibc 2.27 or higher (check with `ldd --version`) - gcc 11, g++ 11, cpp 11 or higher, refer to this [link](https://jan.ai/guides/troubleshooting/gpu-not-used/#specific-requirements-for-linux) for more information - To enable GPU support: - Nvidia GPU with CUDA Toolkit 11.7 or higher - Nvidia driver 470.63.01 or higher +## Troubleshooting + +As Jan is in development mode, you might get stuck on a some common issues: +- [Troubleshooting a broken build](https://jan.ai/docs/troubleshooting#broken-build) +- [Troubleshooting NVIDIA GPU](https://jan.ai/docs/troubleshooting#troubleshooting-nvidia-gpu) +- [Troubleshooting Something's Amiss](https://jan.ai/docs/troubleshooting#somethings-amiss) + + +If you can't find what you need in our troubleshooting guide, feel free reach out to us for extra help: +1. Copy your [error logs & device specifications](https://jan.ai/docs/troubleshooting#how-to-get-error-logs). +2. Go to our [Discord](https://discord.com/invite/FTk2MvZwJH) & send it to **#🆘|get-help** channel for further support. + +*Check the logs to ensure the information is what you intend to send. Note that we retain your logs for only 24 hours, so report any issues promptly.* + + ## Contributing Contributions are welcome! Please read the [CONTRIBUTING.md](CONTRIBUTING.md) file @@ -209,11 +237,7 @@ Contributions are welcome! Please read the [CONTRIBUTING.md](CONTRIBUTING.md) fi This will start the development server and open the desktop app. -3. (Optional) **Run the API server without frontend** - ```bash - yarn dev:server - ``` ### For production build @@ -225,102 +249,6 @@ make build This will build the app MacOS m1/m2 for production (with code signing already done) and put the result in `dist` folder. -### Docker mode - -- Supported OS: Linux, WSL2 Docker -- Pre-requisites: - - - Docker Engine and Docker Compose are required to run Jan in Docker mode. Follow the [instructions](https://docs.docker.com/engine/install/ubuntu/) below to get started with Docker Engine on Ubuntu. - - ```bash - curl -fsSL https://get.docker.com -o get-docker.sh - sudo sh ./get-docker.sh --dry-run - ``` - - - If you intend to run Jan in GPU mode, you need to install `nvidia-driver` and `nvidia-docker2`. Follow the instruction [here](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) for installation. - -- Run Jan in Docker mode - > User can choose between `docker-compose.yml` with latest prebuilt docker image or `docker-compose-dev.yml` with local docker build - -| Docker compose Profile | Description | -| ---------------------- | -------------------------------------------- | -| `cpu-fs` | Run Jan in CPU mode with default file system | -| `cpu-s3fs` | Run Jan in CPU mode with S3 file system | -| `gpu-fs` | Run Jan in GPU mode with default file system | -| `gpu-s3fs` | Run Jan in GPU mode with S3 file system | - -| Environment Variable | Description | -| ----------------------- | ------------------------------------------------------------------------------------------------------- | -| `S3_BUCKET_NAME` | S3 bucket name - leave blank for default file system | -| `AWS_ACCESS_KEY_ID` | AWS access key ID - leave blank for default file system | -| `AWS_SECRET_ACCESS_KEY` | AWS secret access key - leave blank for default file system | -| `AWS_ENDPOINT` | AWS endpoint URL - leave blank for default file system | -| `AWS_REGION` | AWS region - leave blank for default file system | -| `API_BASE_URL` | Jan Server URL, please modify it as your public ip address or domain name default http://localhost:1377 | - -- **Option 1**: Run Jan in CPU mode - - ```bash - # cpu mode with default file system - docker compose --profile cpu-fs up -d - - # cpu mode with S3 file system - docker compose --profile cpu-s3fs up -d - ``` - -- **Option 2**: Run Jan in GPU mode - - - **Step 1**: Check CUDA compatibility with your NVIDIA driver by running `nvidia-smi` and check the CUDA version in the output - - ```bash - nvidia-smi - - # Output - +---------------------------------------------------------------------------------------+ - | NVIDIA-SMI 531.18 Driver Version: 531.18 CUDA Version: 12.1 | - |-----------------------------------------+----------------------+----------------------+ - | GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC | - | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | - | | | MIG M. | - |=========================================+======================+======================| - | 0 NVIDIA GeForce RTX 4070 Ti WDDM | 00000000:01:00.0 On | N/A | - | 0% 44C P8 16W / 285W| 1481MiB / 12282MiB | 2% Default | - | | | N/A | - +-----------------------------------------+----------------------+----------------------+ - | 1 NVIDIA GeForce GTX 1660 Ti WDDM | 00000000:02:00.0 Off | N/A | - | 0% 49C P8 14W / 120W| 0MiB / 6144MiB | 0% Default | - | | | N/A | - +-----------------------------------------+----------------------+----------------------+ - | 2 NVIDIA GeForce GTX 1660 Ti WDDM | 00000000:05:00.0 Off | N/A | - | 29% 38C P8 11W / 120W| 0MiB / 6144MiB | 0% Default | - | | | N/A | - +-----------------------------------------+----------------------+----------------------+ - - +---------------------------------------------------------------------------------------+ - | Processes: | - | GPU GI CI PID Type Process name GPU Memory | - | ID ID Usage | - |=======================================================================================| - ``` - - - **Step 2**: Visit [NVIDIA NGC Catalog ](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags) and find the smallest minor version of image tag that matches your CUDA version (e.g., 12.1 -> 12.1.0) - - - **Step 3**: Update the `Dockerfile.gpu` line number 5 with the latest minor version of the image tag from step 2 (e.g. change `FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base` to `FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS base`) - - - **Step 4**: Run command to start Jan in GPU mode - - ```bash - # GPU mode with default file system - docker compose --profile gpu-fs up -d - - # GPU mode with S3 file system - docker compose --profile gpu-s3fs up -d - ``` - -This will start the web server and you can access Jan at `http://localhost:3000`. - -> Note: RAG feature is not supported in Docker mode with s3fs yet. - ## Acknowledgements Jan builds on top of other open-source projects: @@ -334,18 +262,18 @@ Jan builds on top of other open-source projects: - Bugs & requests: file a GitHub ticket - For discussion: join our Discord [here](https://discord.gg/FTk2MvZwJH) -- For business inquiries: email hello@jan.ai +- For business inquiries: email hello@jan.ai - For jobs: please email hr@jan.ai ## Trust & Safety -Beware of scams. +Beware of scams! -- We will never ask you for personal info -- We are a free product; there's no paid version -- We don't have a token or ICO -- We are not actively fundraising or seeking donations +- We will never request your personal information. +- Our product is completely free; no paid version exists. +- We do not have a token or ICO. +- We are a [bootstrapped company](https://en.wikipedia.org/wiki/Bootstrapping), and don't have any external investors (*yet*). We're open to exploring opportunities with strategic partners want to tackle [our mission](https://jan.ai/about#mission) together. ## License -Jan is free and open source, under the AGPLv3 license. +Jan is free and open source, under the **AGPLv3** license. diff --git a/core/src/browser/extension.ts b/core/src/browser/extension.ts index 603445745..d934e1c06 100644 --- a/core/src/browser/extension.ts +++ b/core/src/browser/extension.ts @@ -1,6 +1,8 @@ -import { SettingComponentProps } from '../types' +import { Model, ModelEvent, SettingComponentProps } from '../types' import { getJanDataFolderPath, joinPath } from './core' +import { events } from './events' import { fs } from './fs' +import { ModelManager } from './models' export enum ExtensionTypeEnum { Assistant = 'assistant', @@ -103,6 +105,22 @@ export abstract class BaseExtension implements ExtensionType { return undefined } + /** + * Registers models - it persists in-memory shared ModelManager instance's data map. + * @param models + */ + async registerModels(models: Model[]): Promise { + for (const model of models) { + ModelManager.instance().register(model) + } + events.emit(ModelEvent.OnModelsUpdate, {}) + } + + /** + * Register settings for the extension. + * @param settings + * @returns + */ async registerSettings(settings: SettingComponentProps[]): Promise { if (!this.name) { console.error('Extension name is not defined') @@ -139,6 +157,12 @@ export abstract class BaseExtension implements ExtensionType { } } + /** + * Get the setting value for the key. + * @param key + * @param defaultValue + * @returns + */ async getSetting(key: string, defaultValue: T) { const keySetting = (await this.getSettings()).find((setting) => setting.key === key) @@ -168,6 +192,10 @@ export abstract class BaseExtension implements ExtensionType { return } + /** + * Get the settings for the extension. + * @returns + */ async getSettings(): Promise { if (!this.name) return [] @@ -189,6 +217,11 @@ export abstract class BaseExtension implements ExtensionType { } } + /** + * Update the settings for the extension. + * @param componentProps + * @returns + */ async updateSettings(componentProps: Partial[]): Promise { if (!this.name) return diff --git a/core/src/browser/extensions/engines/AIEngine.test.ts b/core/src/browser/extensions/engines/AIEngine.test.ts index 59dad280f..ab3280e1c 100644 --- a/core/src/browser/extensions/engines/AIEngine.test.ts +++ b/core/src/browser/extensions/engines/AIEngine.test.ts @@ -1,8 +1,6 @@ import { AIEngine } from './AIEngine' import { events } from '../../events' -import { ModelEvent, Model, ModelFile, InferenceEngine } from '../../../types' -import { EngineManager } from './EngineManager' -import { fs } from '../../fs' +import { ModelEvent, Model } from '../../../types' jest.mock('../../events') jest.mock('./EngineManager') @@ -26,7 +24,7 @@ describe('AIEngine', () => { }) it('should load model if provider matches', async () => { - const model: ModelFile = { id: 'model1', engine: 'test-provider' } as any + const model: any = { id: 'model1', engine: 'test-provider' } as any await engine.loadModel(model) @@ -34,7 +32,7 @@ describe('AIEngine', () => { }) it('should not load model if provider does not match', async () => { - const model: ModelFile = { id: 'model1', engine: 'other-provider' } as any + const model: any = { id: 'model1', engine: 'other-provider' } as any await engine.loadModel(model) diff --git a/core/src/browser/extensions/engines/AIEngine.ts b/core/src/browser/extensions/engines/AIEngine.ts index 75354de88..d0528b0ab 100644 --- a/core/src/browser/extensions/engines/AIEngine.ts +++ b/core/src/browser/extensions/engines/AIEngine.ts @@ -1,17 +1,14 @@ -import { getJanDataFolderPath, joinPath } from '../../core' import { events } from '../../events' import { BaseExtension } from '../../extension' -import { fs } from '../../fs' -import { MessageRequest, Model, ModelEvent, ModelFile } from '../../../types' +import { MessageRequest, Model, ModelEvent } from '../../../types' import { EngineManager } from './EngineManager' +import { ModelManager } from '../../models/manager' /** * Base AIEngine * Applicable to all AI Engines */ export abstract class AIEngine extends BaseExtension { - private static modelsFolder = 'models' - // The inference engine abstract provider: string @@ -21,7 +18,7 @@ export abstract class AIEngine extends BaseExtension { override onLoad() { this.registerEngine() - events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model)) + events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model)) events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model)) } @@ -32,53 +29,10 @@ export abstract class AIEngine extends BaseExtension { EngineManager.instance().register(this) } - async registerModels(models: Model[]): Promise { - const modelFolderPath = await joinPath([await getJanDataFolderPath(), AIEngine.modelsFolder]) - - let shouldNotifyModelUpdate = false - for (const model of models) { - const modelPath = await joinPath([modelFolderPath, model.id]) - const isExist = await fs.existsSync(modelPath) - - if (isExist) { - await this.migrateModelIfNeeded(model, modelPath) - continue - } - - await fs.mkdir(modelPath) - await fs.writeFileSync( - await joinPath([modelPath, 'model.json']), - JSON.stringify(model, null, 2) - ) - shouldNotifyModelUpdate = true - } - - if (shouldNotifyModelUpdate) { - events.emit(ModelEvent.OnModelsUpdate, {}) - } - } - - async migrateModelIfNeeded(model: Model, modelPath: string): Promise { - try { - const modelJson = await fs.readFileSync(await joinPath([modelPath, 'model.json']), 'utf-8') - const currentModel: Model = JSON.parse(modelJson) - if (currentModel.version !== model.version) { - await fs.writeFileSync( - await joinPath([modelPath, 'model.json']), - JSON.stringify(model, null, 2) - ) - - events.emit(ModelEvent.OnModelsUpdate, {}) - } - } catch (error) { - console.warn('Error while try to migrating model', error) - } - } - /** * Loads the model. */ - async loadModel(model: ModelFile): Promise { + async loadModel(model: Model): Promise { if (model.engine.toString() !== this.provider) return Promise.resolve() events.emit(ModelEvent.OnModelReady, model) return Promise.resolve() diff --git a/core/src/browser/extensions/engines/EngineManager.ts b/core/src/browser/extensions/engines/EngineManager.ts index 2980c5c65..90ce75ac5 100644 --- a/core/src/browser/extensions/engines/EngineManager.ts +++ b/core/src/browser/extensions/engines/EngineManager.ts @@ -1,3 +1,4 @@ +import { InferenceEngine } from '../../../types' import { AIEngine } from './AIEngine' /** @@ -20,6 +21,22 @@ export class EngineManager { * @returns The engine, if found. */ get(provider: string): T | undefined { + // Backward compatible provider + // nitro is migrated to cortex + if ( + [ + InferenceEngine.nitro, + InferenceEngine.cortex, + InferenceEngine.cortex_llamacpp, + InferenceEngine.cortex_onnx, + InferenceEngine.cortex_tensorrtllm, + InferenceEngine.cortex_onnx, + ] + .map((e) => e.toString()) + .includes(provider) + ) + provider = InferenceEngine.cortex + return this.engines.get(provider) as T | undefined } @@ -27,6 +44,6 @@ export class EngineManager { * The instance of the engine manager. */ static instance(): EngineManager { - return window.core?.engineManager as EngineManager ?? new EngineManager() + return (window.core?.engineManager as EngineManager) ?? new EngineManager() } } diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts index 4ae81496f..08fd947da 100644 --- a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts +++ b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts @@ -3,7 +3,7 @@ */ import { LocalOAIEngine } from './LocalOAIEngine' import { events } from '../../events' -import { ModelEvent, ModelFile, Model } from '../../../types' +import { ModelEvent, Model } from '../../../types' import { executeOnMain, systemInformation, dirName } from '../../core' jest.mock('../../core', () => ({ @@ -43,7 +43,7 @@ describe('LocalOAIEngine', () => { }) it('should load model correctly', async () => { - const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any + const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any const modelFolder = 'path/to' const systemInfo = { os: 'testOS' } const res = { error: null } @@ -66,7 +66,7 @@ describe('LocalOAIEngine', () => { }) it('should handle load model error', async () => { - const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any + const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any const modelFolder = 'path/to' const systemInfo = { os: 'testOS' } const res = { error: 'load error' } @@ -91,9 +91,7 @@ describe('LocalOAIEngine', () => { it('should not unload model if engine does not match', async () => { const model: Model = { engine: 'otherProvider' } as any - await engine.unloadModel(model) - expect(executeOnMain).not.toHaveBeenCalled() expect(events.emit).not.toHaveBeenCalledWith(ModelEvent.OnModelStopped, {}) }) diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.ts b/core/src/browser/extensions/engines/LocalOAIEngine.ts index 123b9a593..b54f8fbde 100644 --- a/core/src/browser/extensions/engines/LocalOAIEngine.ts +++ b/core/src/browser/extensions/engines/LocalOAIEngine.ts @@ -1,6 +1,6 @@ -import { executeOnMain, systemInformation, dirName } from '../../core' +import { executeOnMain, systemInformation, dirName, joinPath, getJanDataFolderPath } from '../../core' import { events } from '../../events' -import { Model, ModelEvent, ModelFile } from '../../../types' +import { Model, ModelEvent } from '../../../types' import { OAIEngine } from './OAIEngine' /** @@ -22,16 +22,16 @@ export abstract class LocalOAIEngine extends OAIEngine { override onLoad() { super.onLoad() // These events are applicable to local inference providers - events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model)) + events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model)) events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model)) } /** * Load the model. */ - override async loadModel(model: ModelFile): Promise { + override async loadModel(model: Model & { file_path?: string }): Promise { if (model.engine.toString() !== this.provider) return - const modelFolder = await dirName(model.file_path) + const modelFolder = 'file_path' in model && model.file_path ? await dirName(model.file_path) : await this.getModelFilePath(model.id) const systemInfo = await systemInformation() const res = await executeOnMain( this.nodeModule, @@ -63,4 +63,12 @@ export abstract class LocalOAIEngine extends OAIEngine { events.emit(ModelEvent.OnModelStopped, {}) }) } + + /// Legacy + private getModelFilePath = async ( + id: string, + ): Promise => { + return joinPath([await getJanDataFolderPath(), 'models', id]) + } + /// } diff --git a/core/src/browser/extensions/engines/OAIEngine.ts b/core/src/browser/extensions/engines/OAIEngine.ts index a8dde4677..694a87264 100644 --- a/core/src/browser/extensions/engines/OAIEngine.ts +++ b/core/src/browser/extensions/engines/OAIEngine.ts @@ -55,7 +55,21 @@ export abstract class OAIEngine extends AIEngine { * Inference request */ override async inference(data: MessageRequest) { - if (data.model?.engine?.toString() !== this.provider) return + if (!data.model?.id) { + events.emit(MessageEvent.OnMessageResponse, { + status: MessageStatus.Error, + content: [ + { + type: ContentType.Text, + text: { + value: 'No model ID provided', + annotations: [], + }, + }, + ], + }) + return + } const timestamp = Date.now() const message: ThreadMessage = { @@ -89,7 +103,6 @@ export abstract class OAIEngine extends AIEngine { model: model.id, stream: true, ...model.parameters, - ...(this.provider === 'nitro' ? { engine: 'cortex.llamacpp'} : {}), } if (this.transformPayload) { requestBody = this.transformPayload(requestBody) diff --git a/core/src/browser/extensions/engines/helpers/sse.ts b/core/src/browser/extensions/engines/helpers/sse.ts index 024ced470..9713256b3 100644 --- a/core/src/browser/extensions/engines/helpers/sse.ts +++ b/core/src/browser/extensions/engines/helpers/sse.ts @@ -10,7 +10,7 @@ export function requestInference( requestBody: any, model: { id: string - parameters: ModelRuntimeParams + parameters?: ModelRuntimeParams }, controller?: AbortController, headers?: HeadersInit, @@ -22,7 +22,7 @@ export function requestInference( headers: { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*', - 'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json', + 'Accept': model.parameters?.stream ? 'text/event-stream' : 'application/json', ...headers, }, body: JSON.stringify(requestBody), @@ -45,7 +45,7 @@ export function requestInference( subscriber.complete() return } - if (model.parameters.stream === false) { + if (model.parameters?.stream === false) { const data = await response.json() if (transformResponse) { subscriber.next(transformResponse(data)) diff --git a/core/src/browser/extensions/model.ts b/core/src/browser/extensions/model.ts index 040542927..e224ec5cc 100644 --- a/core/src/browser/extensions/model.ts +++ b/core/src/browser/extensions/model.ts @@ -1,13 +1,5 @@ import { BaseExtension, ExtensionTypeEnum } from '../extension' -import { - GpuSetting, - HuggingFaceRepoData, - ImportingModel, - Model, - ModelFile, - ModelInterface, - OptionType, -} from '../../types' +import { Model, ModelInterface, OptionType } from '../../types' /** * Model extension for managing models. @@ -20,17 +12,16 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter return ExtensionTypeEnum.Model } - abstract downloadModel( - model: Model, - gpuSettings?: GpuSetting, - network?: { proxy: string; ignoreSSL?: boolean } + abstract getModels(): Promise + abstract pullModel(model: string, id?: string, name?: string): Promise + abstract cancelModelPull(modelId: string): Promise + abstract importModel( + model: string, + modePath: string, + name?: string, + optionType?: OptionType ): Promise - abstract cancelModelDownload(modelId: string): Promise - abstract deleteModel(model: ModelFile): Promise - abstract getDownloadedModels(): Promise - abstract getConfiguredModels(): Promise - abstract importModels(models: ImportingModel[], optionType: OptionType): Promise - abstract updateModelInfo(modelInfo: Partial): Promise - abstract fetchHuggingFaceRepoData(repoId: string): Promise - abstract getDefaultModel(): Promise + abstract updateModel(modelInfo: Partial): Promise + abstract deleteModel(model: string): Promise + abstract isModelLoaded(model: string): Promise } diff --git a/core/src/browser/index.test.ts b/core/src/browser/index.test.ts index 339cd9046..c8cabbb0b 100644 --- a/core/src/browser/index.test.ts +++ b/core/src/browser/index.test.ts @@ -1,32 +1,37 @@ -import * as Core from './core'; -import * as Events from './events'; -import * as FileSystem from './fs'; -import * as Extension from './extension'; -import * as Extensions from './extensions'; -import * as Tools from './tools'; +import * as Core from './core' +import * as Events from './events' +import * as FileSystem from './fs' +import * as Extension from './extension' +import * as Extensions from './extensions' +import * as Tools from './tools' +import * as Models from './models' describe('Module Tests', () => { - it('should export Core module', () => { - expect(Core).toBeDefined(); - }); + it('should export Core module', () => { + expect(Core).toBeDefined() + }) - it('should export Event module', () => { - expect(Events).toBeDefined(); - }); + it('should export Event module', () => { + expect(Events).toBeDefined() + }) - it('should export Filesystem module', () => { - expect(FileSystem).toBeDefined(); - }); + it('should export Filesystem module', () => { + expect(FileSystem).toBeDefined() + }) - it('should export Extension module', () => { - expect(Extension).toBeDefined(); - }); + it('should export Extension module', () => { + expect(Extension).toBeDefined() + }) - it('should export all base extensions', () => { - expect(Extensions).toBeDefined(); - }); + it('should export all base extensions', () => { + expect(Extensions).toBeDefined() + }) - it('should export all base tools', () => { - expect(Tools).toBeDefined(); - }); -}); \ No newline at end of file + it('should export all base tools', () => { + expect(Tools).toBeDefined() + }) + + it('should export all base tools', () => { + expect(Models).toBeDefined() + }) +}) diff --git a/core/src/browser/index.ts b/core/src/browser/index.ts index a7803c7e0..a6ce187ca 100644 --- a/core/src/browser/index.ts +++ b/core/src/browser/index.ts @@ -33,3 +33,9 @@ export * from './extensions' * @module */ export * from './tools' + +/** + * Export all base models. + * @module + */ +export * from './models' diff --git a/core/src/browser/models/index.ts b/core/src/browser/models/index.ts new file mode 100644 index 000000000..81d37e501 --- /dev/null +++ b/core/src/browser/models/index.ts @@ -0,0 +1,10 @@ +/** + * Export ModelManager + * @module + */ +export { ModelManager } from './manager' + +/** + * Export all utils + */ +export * from './utils' diff --git a/core/src/browser/models/manager.ts b/core/src/browser/models/manager.ts new file mode 100644 index 000000000..d5afe83d5 --- /dev/null +++ b/core/src/browser/models/manager.ts @@ -0,0 +1,47 @@ +import { Model, ModelEvent } from '../../types' +import { events } from '../events' + +/** + * Manages the registered models across extensions. + */ +export class ModelManager { + public models = new Map() + + constructor() { + if (window) { + window.core.modelManager = this + } + } + + /** + * Registers a model. + * @param model - The model to register. + */ + register(model: T) { + if (this.models.has(model.id)) { + this.models.set(model.id, { + ...model, + ...this.models.get(model.id), + }) + } else { + this.models.set(model.id, model) + } + events.emit(ModelEvent.OnModelsUpdate, {}) + } + + /** + * Retrieves a model by it's id. + * @param id - The id of the model to retrieve. + * @returns The model, if found. + */ + get(id: string): T | undefined { + return this.models.get(id) as T | undefined + } + + /** + * The instance of the tool manager. + */ + static instance(): ModelManager { + return (window.core?.modelManager as ModelManager) ?? new ModelManager() + } +} diff --git a/web/utils/modelParam.test.ts b/core/src/browser/models/utils.test.ts similarity index 87% rename from web/utils/modelParam.test.ts rename to core/src/browser/models/utils.test.ts index 97325d277..ac876c3dc 100644 --- a/web/utils/modelParam.test.ts +++ b/core/src/browser/models/utils.test.ts @@ -1,7 +1,10 @@ // web/utils/modelParam.test.ts -import { normalizeValue, validationRules } from './modelParam' -import { extractModelLoadParams } from './modelParam'; -import { extractInferenceParams } from './modelParam'; +import { + normalizeValue, + validationRules, + extractModelLoadParams, + extractInferenceParams, +} from './utils' describe('validationRules', () => { it('should validate temperature correctly', () => { @@ -151,13 +154,12 @@ describe('validationRules', () => { }) }) - - it('should normalize invalid values for keys not listed in validationRules', () => { - expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid') - expect(normalizeValue('invalid_key', 123)).toBe(123) - expect(normalizeValue('invalid_key', true)).toBe(true) - expect(normalizeValue('invalid_key', false)).toBe(false) - }) +it('should normalize invalid values for keys not listed in validationRules', () => { + expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid') + expect(normalizeValue('invalid_key', 123)).toBe(123) + expect(normalizeValue('invalid_key', true)).toBe(true) + expect(normalizeValue('invalid_key', false)).toBe(false) +}) describe('normalizeValue', () => { it('should normalize ctx_len correctly', () => { @@ -192,19 +194,16 @@ describe('normalizeValue', () => { }) }) +it('should handle invalid values correctly by falling back to originParams', () => { + const modelParams = { temperature: 'invalid', token_limit: -1 } + const originParams = { temperature: 0.5, token_limit: 100 } + expect(extractInferenceParams(modelParams as any, originParams)).toEqual(originParams) +}) - it('should handle invalid values correctly by falling back to originParams', () => { - const modelParams = { temperature: 'invalid', token_limit: -1 }; - const originParams = { temperature: 0.5, token_limit: 100 }; - expect(extractInferenceParams(modelParams, originParams)).toEqual(originParams); - }); +it('should return an empty object when no modelParams are provided', () => { + expect(extractModelLoadParams()).toEqual({}) +}) - - it('should return an empty object when no modelParams are provided', () => { - expect(extractModelLoadParams()).toEqual({}); - }); - - - it('should return an empty object when no modelParams are provided', () => { - expect(extractInferenceParams()).toEqual({}); - }); +it('should return an empty object when no modelParams are provided', () => { + expect(extractInferenceParams()).toEqual({}) +}) diff --git a/web/utils/modelParam.ts b/core/src/browser/models/utils.ts similarity index 86% rename from web/utils/modelParam.ts rename to core/src/browser/models/utils.ts index 315aeaeb3..0e52441b2 100644 --- a/web/utils/modelParam.ts +++ b/core/src/browser/models/utils.ts @@ -1,26 +1,20 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ /* eslint-disable @typescript-eslint/naming-convention */ -import { ModelRuntimeParams, ModelSettingParams } from '@janhq/core' - -import { ModelParams } from '@/types/model' +import { ModelParams, ModelRuntimeParams, ModelSettingParams } from '../../types' /** * Validation rules for model parameters */ export const validationRules: { [key: string]: (value: any) => boolean } = { - temperature: (value: any) => - typeof value === 'number' && value >= 0 && value <= 2, + temperature: (value: any) => typeof value === 'number' && value >= 0 && value <= 2, token_limit: (value: any) => Number.isInteger(value) && value >= 0, top_k: (value: any) => typeof value === 'number' && value >= 0 && value <= 1, top_p: (value: any) => typeof value === 'number' && value >= 0 && value <= 1, stream: (value: any) => typeof value === 'boolean', max_tokens: (value: any) => Number.isInteger(value) && value >= 0, - stop: (value: any) => - Array.isArray(value) && value.every((v) => typeof v === 'string'), - frequency_penalty: (value: any) => - typeof value === 'number' && value >= 0 && value <= 1, - presence_penalty: (value: any) => - typeof value === 'number' && value >= 0 && value <= 1, + stop: (value: any) => Array.isArray(value) && value.every((v) => typeof v === 'string'), + frequency_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1, + presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1, ctx_len: (value: any) => Number.isInteger(value) && value >= 0, ngl: (value: any) => Number.isInteger(value) && value >= 0, @@ -76,6 +70,7 @@ export const extractInferenceParams = ( stop: undefined, frequency_penalty: undefined, presence_penalty: undefined, + engine: undefined, } const runtimeParams: ModelRuntimeParams = {} @@ -119,11 +114,18 @@ export const extractModelLoadParams = ( embedding: undefined, n_parallel: undefined, cpu_threads: undefined, + pre_prompt: undefined, + system_prompt: undefined, + ai_prompt: undefined, + user_prompt: undefined, prompt_template: undefined, + model_path: undefined, llama_model_path: undefined, mmproj: undefined, + cont_batching: undefined, vision_model: undefined, text_model: undefined, + engine: undefined, } const settingParams: ModelSettingParams = {} diff --git a/core/src/node/api/processors/download.test.ts b/core/src/node/api/processors/download.test.ts index 370f1746f..21d94165d 100644 --- a/core/src/node/api/processors/download.test.ts +++ b/core/src/node/api/processors/download.test.ts @@ -8,7 +8,8 @@ jest.mock('../../helper', () => ({ jest.mock('../../helper/path', () => ({ validatePath: jest.fn().mockReturnValue('path/to/folder'), - normalizeFilePath: () => process.platform === 'win32' ? 'C:\\Users\path\\to\\file.gguf' : '/Users/path/to/file.gguf', + normalizeFilePath: () => + process.platform === 'win32' ? 'C:\\Users\\path\\to\\file.gguf' : '/Users/path/to/file.gguf', })) jest.mock( diff --git a/core/src/node/api/processors/download.ts b/core/src/node/api/processors/download.ts index 5db18a53a..ebeb7c299 100644 --- a/core/src/node/api/processors/download.ts +++ b/core/src/node/api/processors/download.ts @@ -1,6 +1,6 @@ import { resolve, sep } from 'path' import { DownloadEvent } from '../../../types/api' -import { normalizeFilePath, validatePath } from '../../helper/path' +import { normalizeFilePath } from '../../helper/path' import { getJanDataFolderPath } from '../../helper' import { DownloadManager } from '../../helper/download' import { createWriteStream, renameSync } from 'fs' @@ -37,7 +37,6 @@ export class Downloader implements Processor { const modelId = downloadRequest.modelId ?? array.pop() ?? '' const destination = resolve(getJanDataFolderPath(), normalizedPath) - validatePath(destination) const rq = request({ url, strictSSL, proxy }) // Put request to download manager instance @@ -50,11 +49,6 @@ export class Downloader implements Processor { const initialDownloadState: DownloadState = { modelId, fileName, - time: { - elapsed: 0, - remaining: 0, - }, - speed: 0, percent: 0, size: { total: 0, diff --git a/core/src/node/api/processors/fs.ts b/core/src/node/api/processors/fs.ts index 0557d2187..ada744d53 100644 --- a/core/src/node/api/processors/fs.ts +++ b/core/src/node/api/processors/fs.ts @@ -1,5 +1,5 @@ import { join, resolve } from 'path' -import { normalizeFilePath, validatePath } from '../../helper/path' +import { normalizeFilePath } from '../../helper/path' import { getJanDataFolderPath } from '../../helper' import { Processor } from './Processor' import fs from 'fs' @@ -36,7 +36,6 @@ export class FileSystem implements Processor { return path } const absolutePath = resolve(path) - validatePath(absolutePath) return absolutePath }) ) @@ -55,7 +54,6 @@ export class FileSystem implements Processor { } const absolutePath = resolve(path) - validatePath(absolutePath) return new Promise((resolve, reject) => { fs.rm(absolutePath, { recursive: true, force: true }, (err) => { @@ -79,7 +77,6 @@ export class FileSystem implements Processor { } const absolutePath = resolve(path) - validatePath(absolutePath) return new Promise((resolve, reject) => { fs.mkdir(absolutePath, { recursive: true }, (err) => { diff --git a/core/src/node/api/processors/fsExt.ts b/core/src/node/api/processors/fsExt.ts index 4d113e1ee..846d0c26a 100644 --- a/core/src/node/api/processors/fsExt.ts +++ b/core/src/node/api/processors/fsExt.ts @@ -1,6 +1,6 @@ import { basename, join } from 'path' import fs, { readdirSync } from 'fs' -import { appResourcePath, normalizeFilePath, validatePath } from '../../helper/path' +import { appResourcePath, normalizeFilePath } from '../../helper/path' import { defaultAppConfig, getJanDataFolderPath, getJanDataFolderPath as getPath } from '../../helper' import { Processor } from './Processor' import { FileStat } from '../../../types' @@ -61,7 +61,6 @@ export class FSExt implements Processor { const dataBuffer = Buffer.from(data, 'base64') const writePath = join(getJanDataFolderPath(), normalizedPath) - validatePath(writePath) fs.writeFileSync(writePath, dataBuffer) } catch (err) { console.error(`writeFile ${path} result: ${err}`) @@ -69,7 +68,6 @@ export class FSExt implements Processor { } copyFile(src: string, dest: string): Promise { - validatePath(dest) return new Promise((resolve, reject) => { fs.copyFile(src, dest, (err) => { if (err) { diff --git a/core/src/node/api/restful/common.ts b/core/src/node/api/restful/common.ts index c8061c34a..39f7b8d8b 100644 --- a/core/src/node/api/restful/common.ts +++ b/core/src/node/api/restful/common.ts @@ -10,6 +10,7 @@ import { getMessages, retrieveMessage, updateThread, + models, } from './helper/builder' import { JanApiRouteConfiguration } from './helper/configuration' @@ -26,9 +27,12 @@ export const commonRouter = async (app: HttpServer) => { // Common Routes // Read & Delete :: Threads | Models | Assistants Object.keys(JanApiRouteConfiguration).forEach((key) => { - app.get(`/${key}`, async (_request) => - getBuilder(JanApiRouteConfiguration[key]).then(normalizeData) - ) + app.get(`/${key}`, async (_req, _res) => { + if (key === 'models') { + return models(_req, _res) + } + return getBuilder(JanApiRouteConfiguration[key]).then(normalizeData) + }) app.get(`/${key}/:id`, async (request: any) => retrieveBuilder(JanApiRouteConfiguration[key], request.params.id) diff --git a/core/src/node/api/restful/helper/builder.test.ts b/core/src/node/api/restful/helper/builder.test.ts index eb21e9401..f21257098 100644 --- a/core/src/node/api/restful/helper/builder.test.ts +++ b/core/src/node/api/restful/helper/builder.test.ts @@ -220,22 +220,6 @@ describe('builder helper functions', () => { }) describe('chatCompletions', () => { - it('should return an error if model is not found', async () => { - const request = { body: { model: 'nonexistentModel' } } - const reply = { code: jest.fn().mockReturnThis(), send: jest.fn() } - - await chatCompletions(request, reply) - expect(reply.code).toHaveBeenCalledWith(404) - expect(reply.send).toHaveBeenCalledWith({ - error: { - message: 'The model nonexistentModel does not exist', - type: 'invalid_request_error', - param: null, - code: 'model_not_found', - }, - }) - }) - it('should return the error on status not ok', async () => { const request = { body: { model: 'model1' } } const mockSend = jest.fn() diff --git a/core/src/node/api/restful/helper/builder.ts b/core/src/node/api/restful/helper/builder.ts index db2000d69..c3493a8be 100644 --- a/core/src/node/api/restful/helper/builder.ts +++ b/core/src/node/api/restful/helper/builder.ts @@ -10,9 +10,9 @@ import { } from 'fs' import { JanApiRouteConfiguration, RouteConfiguration } from './configuration' import { join } from 'path' -import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../types' -import { getEngineConfiguration, getJanDataFolderPath } from '../../../helper' -import { DEFAULT_CHAT_COMPLETION_URL } from './consts' +import { ContentType, InferenceEngine, MessageStatus, ThreadMessage } from '../../../../types' +import { getJanDataFolderPath } from '../../../helper' +import { CORTEX_API_URL } from './consts' // TODO: Refactor these export const getBuilder = async (configuration: RouteConfiguration) => { @@ -297,57 +297,56 @@ export const downloadModel = async ( } } -export const chatCompletions = async (request: any, reply: any) => { - const modelList = await getBuilder(JanApiRouteConfiguration.models) - const modelId = request.body.model - - const matchedModels = modelList.filter((model: Model) => model.id === modelId) - if (matchedModels.length === 0) { - const error = { - error: { - message: `The model ${request.body.model} does not exist`, - type: 'invalid_request_error', - param: null, - code: 'model_not_found', - }, - } - reply.code(404).send(error) - return - } - - const requestedModel = matchedModels[0] - - const engineConfiguration = await getEngineConfiguration(requestedModel.engine) - - let apiKey: string | undefined = undefined - let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL - - if (engineConfiguration) { - apiKey = engineConfiguration.api_key - apiUrl = engineConfiguration.full_url ?? DEFAULT_CHAT_COMPLETION_URL - } - +/** + * Proxy /models to cortex + * @param request + * @param reply + */ +export const models = async (request: any, reply: any) => { + const fetch = require('node-fetch') const headers: Record = { 'Content-Type': 'application/json', } - if (apiKey) { - headers['Authorization'] = `Bearer ${apiKey}` - headers['api-key'] = apiKey - } + const response = await fetch(`${CORTEX_API_URL}/models`, { + method: request.method, + headers: headers, + body: JSON.stringify(request.body), + }) - if (requestedModel.engine === 'openai' && request.body.stop) { - // openai only allows max 4 stop words - request.body.stop = request.body.stop.slice(0, 4) + if (response.status !== 200) { + // Forward the error response to client via reply + const responseBody = await response.text() + const responseHeaders = Object.fromEntries(response.headers) + reply.code(response.status).headers(responseHeaders).send(responseBody) + } else { + reply.raw.writeHead(200, { + 'Content-Type': 'application/json', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'Access-Control-Allow-Origin': '*', + }) + response.body.pipe(reply.raw) + } +} + +/** + * Proxy chat completions + * @param request + * @param reply + */ +export const chatCompletions = async (request: any, reply: any) => { + const headers: Record = { + 'Content-Type': 'application/json', } // add engine for new cortex cpp engine - if (requestedModel.engine === 'nitro') { - request.body.engine = 'cortex.llamacpp' + if (request.body.engine === InferenceEngine.nitro) { + request.body.engine = InferenceEngine.cortex_llamacpp } const fetch = require('node-fetch') - const response = await fetch(apiUrl, { + const response = await fetch(`${CORTEX_API_URL}/chat/completions`, { method: 'POST', headers: headers, body: JSON.stringify(request.body), diff --git a/core/src/node/api/restful/helper/consts.test.ts b/core/src/node/api/restful/helper/consts.test.ts index 34d42dcf0..524f0cbeb 100644 --- a/core/src/node/api/restful/helper/consts.test.ts +++ b/core/src/node/api/restful/helper/consts.test.ts @@ -1,6 +1,5 @@ +import { CORTEX_DEFAULT_PORT } from './consts' -import { NITRO_DEFAULT_PORT } from './consts'; - -it('should test NITRO_DEFAULT_PORT', () => { - expect(NITRO_DEFAULT_PORT).toBe(3928); -}); +it('should test CORTEX_DEFAULT_PORT', () => { + expect(CORTEX_DEFAULT_PORT).toBe(39291) +}) diff --git a/core/src/node/api/restful/helper/consts.ts b/core/src/node/api/restful/helper/consts.ts index 8d8f8e341..412d304ee 100644 --- a/core/src/node/api/restful/helper/consts.ts +++ b/core/src/node/api/restful/helper/consts.ts @@ -1,19 +1,7 @@ -// The PORT to use for the Nitro subprocess -export const NITRO_DEFAULT_PORT = 3928 +export const CORTEX_DEFAULT_PORT = 39291 -// The HOST address to use for the Nitro subprocess export const LOCAL_HOST = '127.0.0.1' export const SUPPORTED_MODEL_FORMAT = '.gguf' -// The URL for the Nitro subprocess -const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}` -// The URL for the Nitro subprocess to load a model -export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel` -// The URL for the Nitro subprocess to validate a model -export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus` - -// The URL for the Nitro subprocess to kill itself -export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` - -export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url +export const CORTEX_API_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1` diff --git a/core/src/node/api/restful/helper/startStopModel.test.ts b/core/src/node/api/restful/helper/startStopModel.test.ts index a5475cc28..7c1a56cf1 100644 --- a/core/src/node/api/restful/helper/startStopModel.test.ts +++ b/core/src/node/api/restful/helper/startStopModel.test.ts @@ -1,16 +1,10 @@ +import { startModel } from './startStopModel' +describe('startModel', () => { + it('test_startModel_error', async () => { + const modelId = 'testModelId' + const settingParams = undefined - import { startModel } from './startStopModel' - - describe('startModel', () => { - it('test_startModel_error', async () => { - const modelId = 'testModelId' - const settingParams = undefined - - const result = await startModel(modelId, settingParams) - - expect(result).toEqual({ - error: expect.any(Error), - }) - }) + expect(startModel(modelId, settingParams)).resolves.toThrow() }) +}) diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts index d1a23dca9..2e9db6d15 100644 --- a/core/src/node/api/restful/helper/startStopModel.ts +++ b/core/src/node/api/restful/helper/startStopModel.ts @@ -1,6 +1,5 @@ -import { join } from 'path' -import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper' import { ModelSettingParams } from '../../../../types' +import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts' /** * Start a model @@ -9,70 +8,18 @@ import { ModelSettingParams } from '../../../../types' * @returns */ export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => { - try { - await runModel(modelId, settingParams) - - return { - message: `Model ${modelId} started`, - } - } catch (e) { - return { - error: e, - } - } + return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, { + method: 'POST', + body: JSON.stringify({ model: modelId, ...settingParams }), + }) } -/** - * Run a model using installed cortex extension - * @param model - * @param settingParams - */ -const runModel = async (model: string, settingParams?: ModelSettingParams): Promise => { - const janDataFolderPath = getJanDataFolderPath() - const modelFolder = join(janDataFolderPath, 'models', model) - let module = join( - getJanExtensionsPath(), - '@janhq', - 'inference-cortex-extension', - 'dist', - 'node', - 'index.cjs' - ) - // Just reuse the cortex extension implementation, don't duplicate then lost of sync - return import(module).then((extension) => - extension - .loadModel( - { - modelFolder, - model, - }, - settingParams - ) - .then(() => log(`[SERVER]::Debug: Model is loaded`)) - .then({ - message: 'Model started', - }) - ) -} /* - * Stop model and kill nitro process. + * Stop model. */ -export const stopModel = async (_modelId: string) => { - let module = join( - getJanExtensionsPath(), - '@janhq', - 'inference-cortex-extension', - 'dist', - 'node', - 'index.cjs' - ) - // Just reuse the cortex extension implementation, don't duplicate then lost of sync - return import(module).then((extension) => - extension - .unloadModel() - .then(() => log(`[SERVER]::Debug: Model is unloaded`)) - .then({ - message: 'Model stopped', - }) - ) +export const stopModel = async (modelId: string) => { + return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, { + method: 'POST', + body: JSON.stringify({ model: modelId }), + }) } diff --git a/core/src/node/helper/path.ts b/core/src/node/helper/path.ts index 8115383bb..4efbea463 100644 --- a/core/src/node/helper/path.ts +++ b/core/src/node/helper/path.ts @@ -34,18 +34,4 @@ export function appResourcePath() { // server return join(global.core.appPath(), '../../..') -} - -export function validatePath(path: string) { - const appDataFolderPath = getJanDataFolderPath() - const resourcePath = appResourcePath() - const applicationSupportPath = global.core?.appPath() ?? resourcePath - const absolutePath = resolve(__dirname, path) - if ( - ![appDataFolderPath, resourcePath, applicationSupportPath].some((whiteListedPath) => - absolutePath.startsWith(whiteListedPath) - ) - ) { - throw new Error(`Invalid path: ${absolutePath}`) - } -} +} \ No newline at end of file diff --git a/core/src/types/api/index.ts b/core/src/types/api/index.ts index 8f1ff70bf..093314a15 100644 --- a/core/src/types/api/index.ts +++ b/core/src/types/api/index.ts @@ -72,6 +72,8 @@ export enum DownloadEvent { onFileDownloadUpdate = 'onFileDownloadUpdate', onFileDownloadError = 'onFileDownloadError', onFileDownloadSuccess = 'onFileDownloadSuccess', + onFileDownloadStopped = 'onFileDownloadStopped', + onFileDownloadStarted = 'onFileDownloadStarted', onFileUnzipSuccess = 'onFileUnzipSuccess', } diff --git a/core/src/types/file/index.ts b/core/src/types/file/index.ts index 9f3e32b3e..87d83c51d 100644 --- a/core/src/types/file/index.ts +++ b/core/src/types/file/index.ts @@ -6,8 +6,8 @@ export type FileStat = { export type DownloadState = { modelId: string // TODO: change to download id fileName: string - time: DownloadTime - speed: number + time?: DownloadTime + speed?: number percent: number size: DownloadSize diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts index 933c698c3..7b67a8e94 100644 --- a/core/src/types/model/modelEntity.ts +++ b/core/src/types/model/modelEntity.ts @@ -6,8 +6,8 @@ import { FileMetadata } from '../file' */ export type ModelInfo = { id: string - settings: ModelSettingParams - parameters: ModelRuntimeParams + settings?: ModelSettingParams + parameters?: ModelRuntimeParams engine?: InferenceEngine } @@ -15,7 +15,6 @@ export type ModelInfo = { * Represents the inference engine. * @stored */ - export enum InferenceEngine { anthropic = 'anthropic', mistral = 'mistral', @@ -28,11 +27,13 @@ export enum InferenceEngine { nitro_tensorrt_llm = 'nitro-tensorrt-llm', cohere = 'cohere', nvidia = 'nvidia', - cortex_llamacpp = 'cortex.llamacpp', - cortex_onnx = 'cortex.onnx', - cortex_tensorrtllm = 'cortex.tensorrt-llm', + cortex = 'cortex', + cortex_llamacpp = 'llama-cpp', + cortex_onnx = 'onnxruntime', + cortex_tensorrtllm = 'tensorrt-llm', } +// Represents an artifact of a model, including its filename and URL export type ModelArtifact = { filename: string url: string @@ -104,6 +105,7 @@ export type Model = { engine: InferenceEngine } +// Represents metadata associated with a model export type ModelMetadata = { author: string tags: string[] @@ -124,14 +126,20 @@ export type ModelSettingParams = { n_parallel?: number cpu_threads?: number prompt_template?: string + pre_prompt?: string system_prompt?: string ai_prompt?: string user_prompt?: string + // path param + model_path?: string + // legacy path param llama_model_path?: string + // clip model path mmproj?: string cont_batching?: boolean vision_model?: boolean text_model?: boolean + engine?: boolean } /** @@ -150,11 +158,12 @@ export type ModelRuntimeParams = { engine?: string } +// Represents a model that failed to initialize, including the error export type ModelInitFailed = Model & { error: Error } /** - * ModelFile is the model.json entity and it's file metadata + * ModelParams types */ -export type ModelFile = Model & FileMetadata +export type ModelParams = ModelRuntimeParams | ModelSettingParams diff --git a/core/src/types/model/modelImport.ts b/core/src/types/model/modelImport.ts index 7c72a691b..3f0ddab10 100644 --- a/core/src/types/model/modelImport.ts +++ b/core/src/types/model/modelImport.ts @@ -1,4 +1,4 @@ -export type OptionType = 'SYMLINK' | 'MOVE_BINARY_FILE' +export type OptionType = 'symlink' | 'copy' export type ModelImportOption = { type: OptionType diff --git a/core/src/types/model/modelInterface.ts b/core/src/types/model/modelInterface.ts index 08d456b7e..7ad1b136c 100644 --- a/core/src/types/model/modelInterface.ts +++ b/core/src/types/model/modelInterface.ts @@ -1,5 +1,5 @@ -import { GpuSetting } from '../miscellaneous' -import { Model, ModelFile } from './modelEntity' +import { Model } from './modelEntity' +import { OptionType } from './modelImport' /** * Model extension for managing models. @@ -8,38 +8,46 @@ export interface ModelInterface { /** * Downloads a model. * @param model - The model to download. - * @param network - Optional object to specify proxy/whether to ignore SSL certificates. * @returns A Promise that resolves when the model has been downloaded. */ - downloadModel( - model: ModelFile, - gpuSettings?: GpuSetting, - network?: { ignoreSSL?: boolean; proxy?: string } - ): Promise + pullModel(model: string, id?: string, name?: string): Promise /** * Cancels the download of a specific model. * @param {string} modelId - The ID of the model to cancel the download for. * @returns {Promise} A promise that resolves when the download has been cancelled. */ - cancelModelDownload(modelId: string): Promise + cancelModelPull(model: string): Promise /** * Deletes a model. * @param modelId - The ID of the model to delete. * @returns A Promise that resolves when the model has been deleted. */ - deleteModel(model: ModelFile): Promise + deleteModel(model: string): Promise /** - * Gets a list of downloaded models. + * Gets downloaded models. * @returns A Promise that resolves with an array of downloaded models. */ - getDownloadedModels(): Promise + getModels(): Promise /** - * Gets a list of configured models. - * @returns A Promise that resolves with an array of configured models. + * Update a pulled model's metadata + * @param model - The model to update. + * @returns A Promise that resolves when the model has been updated. */ - getConfiguredModels(): Promise + updateModel(model: Partial): Promise + + /** + * Import an existing model file. + * @param model id of the model to import + * @param modelPath - path of the model file + */ + importModel( + model: string, + modePath: string, + name?: string, + optionType?: OptionType + ): Promise } diff --git a/core/src/types/monitoring/index.test.ts b/core/src/types/monitoring/index.test.ts index 010fcb97a..56c5879e4 100644 --- a/core/src/types/monitoring/index.test.ts +++ b/core/src/types/monitoring/index.test.ts @@ -1,16 +1,13 @@ +import * as monitoringInterface from './monitoringInterface' +import * as resourceInfo from './resourceInfo' -import * as monitoringInterface from './monitoringInterface'; -import * as resourceInfo from './resourceInfo'; +import * as index from './index' - import * as index from './index'; - import * as monitoringInterface from './monitoringInterface'; - import * as resourceInfo from './resourceInfo'; - - it('should re-export all symbols from monitoringInterface and resourceInfo', () => { - for (const key in monitoringInterface) { - expect(index[key]).toBe(monitoringInterface[key]); - } - for (const key in resourceInfo) { - expect(index[key]).toBe(resourceInfo[key]); - } - }); +it('should re-export all symbols from monitoringInterface and resourceInfo', () => { + for (const key in monitoringInterface) { + expect(index[key]).toBe(monitoringInterface[key]) + } + for (const key in resourceInfo) { + expect(index[key]).toBe(resourceInfo[key]) + } +}) diff --git a/core/tsconfig.json b/core/tsconfig.json index 02caf65e2..b30d65851 100644 --- a/core/tsconfig.json +++ b/core/tsconfig.json @@ -1,7 +1,7 @@ { "compilerOptions": { "moduleResolution": "node", - "target": "es5", + "target": "ES2015", "module": "ES2020", "lib": ["es2015", "es2016", "es2017", "dom"], "strict": true, @@ -13,7 +13,7 @@ "declarationDir": "dist/types", "outDir": "dist/lib", "importHelpers": true, - "types": ["@types/jest"], + "types": ["@types/jest"] }, "include": ["src"], "exclude": ["**/*.test.ts"] diff --git a/docs/public/assets/images/changelog/jan-v0.5.5.jpeg b/docs/public/assets/images/changelog/jan-v0.5.5.jpeg new file mode 100644 index 000000000..a0b1c4984 Binary files /dev/null and b/docs/public/assets/images/changelog/jan-v0.5.5.jpeg differ diff --git a/docs/public/assets/images/changelog/jan-v0.5.7.gif b/docs/public/assets/images/changelog/jan-v0.5.7.gif new file mode 100644 index 000000000..cddbf62b0 Binary files /dev/null and b/docs/public/assets/images/changelog/jan-v0.5.7.gif differ diff --git a/docs/src/pages/changelog/2024-02-10-jan-is-more-stable.mdx b/docs/src/pages/changelog/2024-02-10-jan-is-more-stable.mdx new file mode 100644 index 000000000..59e78a0a7 --- /dev/null +++ b/docs/src/pages/changelog/2024-02-10-jan-is-more-stable.mdx @@ -0,0 +1,27 @@ +--- +title: "Jan is more stable 👋" +version: 0.5.5 +description: "Jan supports Llama 3.2 and Qwen 2.5" +date: 2024-10-02 +ogImage: "/assets/images/changelog/jan-v0.5.5.jpeg" +--- + +import ChangelogHeader from "@/components/Changelog/ChangelogHeader" + + + +Highlights 🎉 + +- Meta's Llama 3.2 and Alibaba's Qwen 2.5 added to the hub +- Improved starter screen +- Better local vs. cloud model navigation + +Fixes 💫 + +- Solved GPU acceleration for GGUF models +- Improved model caching & threading +- Resolved input & toolbar overlaps + +Update your product or download the latest: https://jan.ai + +For more details, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.5.5). \ No newline at end of file diff --git a/docs/src/pages/changelog/2024-10-24-jan-stable.mdx b/docs/src/pages/changelog/2024-10-24-jan-stable.mdx new file mode 100644 index 000000000..7c24d51ee --- /dev/null +++ b/docs/src/pages/changelog/2024-10-24-jan-stable.mdx @@ -0,0 +1,26 @@ +--- +title: "Jan has Stable, Beta and Nightly versions" +version: 0.5.7 +description: "This release is mostly focused on bug fixes." +date: 2024-10-24 +ogImage: "/assets/images/changelog/jan-v0.5.7.gif" +--- + +import ChangelogHeader from "@/components/Changelog/ChangelogHeader" + + + +Highlights 🎉 + +- Jan has Stable, Beta and Nightly versions +- Saving instructions for new threads is now stable + +Fixes 💫 + +- Fixed broken links, hardware issues, and multi-modal download +- Resolved text overlap, scrolling, and multi-monitor reset problems +- Adjusted LLava model EOS token and context input + +Update your product or download the latest: https://jan.ai + +For more details, see the [GitHub release notes](https://github.com/janhq/jan/releases/tag/v0.5.7). \ No newline at end of file diff --git a/docs/src/pages/docs/_assets/jan-app.png b/docs/src/pages/docs/_assets/jan-app.png new file mode 100644 index 000000000..a45943055 Binary files /dev/null and b/docs/src/pages/docs/_assets/jan-app.png differ diff --git a/docs/src/pages/docs/_assets/jan-display.png b/docs/src/pages/docs/_assets/jan-display.png deleted file mode 100644 index 2fc3610ba..000000000 Binary files a/docs/src/pages/docs/_assets/jan-display.png and /dev/null differ diff --git a/docs/src/pages/docs/index.mdx b/docs/src/pages/docs/index.mdx index 027ad11b6..0efb1ba51 100644 --- a/docs/src/pages/docs/index.mdx +++ b/docs/src/pages/docs/index.mdx @@ -22,7 +22,7 @@ import FAQBox from '@/components/FaqBox' # Jan -![Jan's Cover Image](./_assets/jan-display.png) +![Jan's Cover Image](./_assets/jan-app.png) Jan is a ChatGPT-alternative that runs 100% offline on your [Desktop](/docs/desktop-installation). Our goal is to make it easy for a layperson[^1] to download and run LLMs and use AI with full control and [privacy](https://www.reuters.com/legal/legalindustry/privacy-paradox-with-ai-2023-10-31/). diff --git a/electron/package.json b/electron/package.json index feaee5e16..662a912f0 100644 --- a/electron/package.json +++ b/electron/package.json @@ -18,7 +18,8 @@ "docs/**/*", "scripts/**/*", "icons/**/*", - "themes" + "themes", + "shared" ], "asarUnpack": [ "pre-install", @@ -26,7 +27,8 @@ "docs", "scripts", "icons", - "themes" + "themes", + "shared" ], "publish": [ { @@ -111,7 +113,7 @@ "@kirillvakalov/nut-tree__nut-js": "4.2.1-2" }, "devDependencies": { - "@electron/notarize": "^2.1.0", + "@electron/notarize": "^2.5.0", "@playwright/test": "^1.38.1", "@types/npmcli__arborist": "^5.6.4", "@types/pacote": "^11.1.7", diff --git a/electron/shared/.gitkeep b/electron/shared/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/electron/tests/config/fixtures.ts b/electron/tests/config/fixtures.ts index 22d83b636..bc3f8a7d1 100644 --- a/electron/tests/config/fixtures.ts +++ b/electron/tests/config/fixtures.ts @@ -15,6 +15,8 @@ import { import { Constants } from './constants' import { HubPage } from '../pages/hubPage' import { CommonActions } from '../pages/commonActions' +import { rmSync } from 'fs' +import * as path from 'path' export let electronApp: ElectronApplication export let page: Page @@ -103,10 +105,14 @@ export const test = base.extend< }, { auto: true }, ], - }) test.beforeAll(async () => { + await rmSync(path.join(__dirname, '../../test-data'), { + recursive: true, + force: true, + }) + test.setTimeout(TIMEOUT) await setupElectron() await page.waitForSelector('img[alt="Jan - Logo"]', { diff --git a/electron/tests/e2e/hub.e2e.spec.ts b/electron/tests/e2e/hub.e2e.spec.ts index 23d4d0b6d..ef305e9c3 100644 --- a/electron/tests/e2e/hub.e2e.spec.ts +++ b/electron/tests/e2e/hub.e2e.spec.ts @@ -16,7 +16,8 @@ test.beforeAll(async () => { test('explores hub', async ({ hubPage }) => { await hubPage.navigateByMenu() await hubPage.verifyContainerVisible() - const useModelBtn= page.getByTestId(/^use-model-btn-.*/).first() + await hubPage.scrollToBottom() + const useModelBtn = page.getByTestId(/^use-model-btn-.*/).first() await expect(useModelBtn).toBeVisible({ timeout: TIMEOUT, diff --git a/electron/tests/pages/basePage.ts b/electron/tests/pages/basePage.ts index 1817bc731..11e3ba81a 100644 --- a/electron/tests/pages/basePage.ts +++ b/electron/tests/pages/basePage.ts @@ -8,9 +8,8 @@ export class BasePage { constructor( protected readonly page: Page, readonly action: CommonActions, - protected containerId: string, - ) { - } + protected containerId: string + ) {} public getValue(key: string) { return this.action.getValue(key) @@ -37,6 +36,12 @@ export class BasePage { expect(container.isVisible()).toBeTruthy() } + async scrollToBottom() { + await this.page.evaluate(() => { + window.scrollTo(0, document.body.scrollHeight) + }) + } + async waitUpdateLoader() { await this.isElementVisible('img[alt="Jan - Logo"]') } diff --git a/electron/utils/migration.ts b/electron/utils/migration.ts index 52ee45ed0..80851f9de 100644 --- a/electron/utils/migration.ts +++ b/electron/utils/migration.ts @@ -47,9 +47,6 @@ async function migrateThemes() { const themes = readdirSync(join(appResourcePath(), 'themes')) for (const theme of themes) { const themePath = join(appResourcePath(), 'themes', theme) - if (existsSync(themePath) && !lstatSync(themePath).isDirectory()) { - continue - } await checkAndMigrateTheme(theme, themePath) } } @@ -64,21 +61,14 @@ async function checkAndMigrateTheme( ) if (existingTheme) { const desTheme = join(janDataThemesFolder, existingTheme) - if (!existsSync(desTheme) || !lstatSync(desTheme).isDirectory()) return - - const desThemeData = JSON.parse( - readFileSync(join(desTheme, 'theme.json'), 'utf-8') - ) - const sourceThemeData = JSON.parse( - readFileSync(join(sourceThemePath, 'theme.json'), 'utf-8') - ) - if (desThemeData.version !== sourceThemeData.version) { - console.debug('Updating theme', existingTheme) - rmdirSync(desTheme, { recursive: true }) - cpSync(sourceThemePath, join(janDataThemesFolder, sourceThemeName), { - recursive: true, - }) + if (!lstatSync(desTheme).isDirectory()) { + return } + console.debug('Updating theme', existingTheme) + rmdirSync(desTheme, { recursive: true }) + cpSync(sourceThemePath, join(janDataThemesFolder, sourceThemeName), { + recursive: true, + }) } else { console.debug('Adding new theme', sourceThemeName) cpSync(sourceThemePath, join(janDataThemesFolder, sourceThemeName), { diff --git a/extensions/assistant-extension/src/index.ts b/extensions/assistant-extension/src/index.ts index 12441995e..6705483d6 100644 --- a/extensions/assistant-extension/src/index.ts +++ b/extensions/assistant-extension/src/index.ts @@ -63,39 +63,46 @@ export default class JanAssistantExtension extends AssistantExtension { } async getAssistants(): Promise { - // get all the assistant directories - // get all the assistant metadata json - const results: Assistant[] = [] - const allFileName: string[] = await fs.readdirSync( - JanAssistantExtension._homeDir - ) - for (const fileName of allFileName) { - const filePath = await joinPath([ - JanAssistantExtension._homeDir, - fileName, - ]) + try { + // get all the assistant directories + // get all the assistant metadata json + const results: Assistant[] = [] - if (!(await fs.fileStat(filePath))?.isDirectory) continue - const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter( - (file: string) => file === 'assistant.json' + const allFileName: string[] = await fs.readdirSync( + JanAssistantExtension._homeDir ) - if (jsonFiles.length !== 1) { - // has more than one assistant file -> ignore - continue + for (const fileName of allFileName) { + const filePath = await joinPath([ + JanAssistantExtension._homeDir, + fileName, + ]) + + if (!(await fs.fileStat(filePath))?.isDirectory) continue + const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter( + (file: string) => file === 'assistant.json' + ) + + if (jsonFiles.length !== 1) { + // has more than one assistant file -> ignore + continue + } + + const content = await fs.readFileSync( + await joinPath([filePath, jsonFiles[0]]), + 'utf-8' + ) + const assistant: Assistant = + typeof content === 'object' ? content : JSON.parse(content) + + results.push(assistant) } - const content = await fs.readFileSync( - await joinPath([filePath, jsonFiles[0]]), - 'utf-8' - ) - const assistant: Assistant = - typeof content === 'object' ? content : JSON.parse(content) - - results.push(assistant) + return results + } catch (err) { + console.debug(err) + return [this.defaultAssistant] } - - return results } async deleteAssistant(assistant: Assistant): Promise { @@ -112,39 +119,39 @@ export default class JanAssistantExtension extends AssistantExtension { } private async createJanAssistant(): Promise { - const janAssistant: Assistant = { - avatar: '', - thread_location: undefined, - id: 'jan', - object: 'assistant', - created_at: Date.now(), - name: 'Jan', - description: 'A default assistant that can use all downloaded models', - model: '*', - instructions: '', - tools: [ - { - type: 'retrieval', - enabled: false, - useTimeWeightedRetriever: false, - settings: { - top_k: 2, - chunk_size: 1024, - chunk_overlap: 64, - retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. + await this.createAssistant(this.defaultAssistant) + } + + private defaultAssistant: Assistant = { + avatar: '', + thread_location: undefined, + id: 'jan', + object: 'assistant', + created_at: Date.now(), + name: 'Jan', + description: 'A default assistant that can use all downloaded models', + model: '*', + instructions: '', + tools: [ + { + type: 'retrieval', + enabled: false, + useTimeWeightedRetriever: false, + settings: { + top_k: 2, + chunk_size: 1024, + chunk_overlap: 64, + retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. ---------------- CONTEXT: {CONTEXT} ---------------- QUESTION: {QUESTION} ---------------- Helpful Answer:`, - }, }, - ], - file_ids: [], - metadata: undefined, - } - - await this.createAssistant(janAssistant) + }, + ], + file_ids: [], + metadata: undefined, } } diff --git a/extensions/assistant-extension/src/node/retrieval.ts b/extensions/assistant-extension/src/node/retrieval.ts index 28d629aa8..3386029fa 100644 --- a/extensions/assistant-extension/src/node/retrieval.ts +++ b/extensions/assistant-extension/src/node/retrieval.ts @@ -10,8 +10,6 @@ import { HNSWLib } from 'langchain/vectorstores/hnswlib' import { OpenAIEmbeddings } from 'langchain/embeddings/openai' import { readEmbeddingEngine } from './engine' -import path from 'path' - export class Retrieval { public chunkSize: number = 100 public chunkOverlap?: number = 0 diff --git a/extensions/assistant-extension/tsconfig.json b/extensions/assistant-extension/tsconfig.json index e425358c3..3838d1c42 100644 --- a/extensions/assistant-extension/tsconfig.json +++ b/extensions/assistant-extension/tsconfig.json @@ -1,7 +1,7 @@ { "compilerOptions": { "moduleResolution": "node", - "target": "es5", + "target": "ES2015", "module": "ES2020", "lib": ["es2015", "es2016", "es2017", "dom"], "strict": true, diff --git a/extensions/inference-nitro-extension/.gitignore b/extensions/inference-cortex-extension/.gitignore similarity index 100% rename from extensions/inference-nitro-extension/.gitignore rename to extensions/inference-cortex-extension/.gitignore diff --git a/extensions/inference-nitro-extension/README.md b/extensions/inference-cortex-extension/README.md similarity index 100% rename from extensions/inference-nitro-extension/README.md rename to extensions/inference-cortex-extension/README.md diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt new file mode 100644 index 000000000..e6d5cb833 --- /dev/null +++ b/extensions/inference-cortex-extension/bin/version.txt @@ -0,0 +1 @@ +1.0.2 \ No newline at end of file diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat new file mode 100644 index 000000000..ecff683c3 --- /dev/null +++ b/extensions/inference-cortex-extension/download.bat @@ -0,0 +1,41 @@ +@echo off +set BIN_PATH=./bin +set SHARED_PATH=./../../electron/shared +set /p CORTEX_VERSION=<./bin/version.txt + +@REM Download cortex.llamacpp binaries +set VERSION=v0.1.35 +set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64 +set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION% +set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan + +call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp +call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH% +call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH% + +move %BIN_PATH%\cortex-server-beta.exe %BIN_PATH%\cortex-server.exe +del %BIN_PATH%\cortex-beta.exe +del %BIN_PATH%\cortex.exe + +@REM Loop through each folder and move DLLs (excluding engine.dll) +for %%F in (%SUBFOLDERS%) do ( + echo Processing folder: %BIN_PATH%\%%F + + @REM Move all .dll files except engine.dll + for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do ( + if /I not "%%~nxD"=="engine.dll" ( + move "%%D" "%BIN_PATH%" + ) + ) +) + +echo DLL files moved successfully. \ No newline at end of file diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh new file mode 100755 index 000000000..902a31e51 --- /dev/null +++ b/extensions/inference-cortex-extension/download.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# Read CORTEX_VERSION +CORTEX_VERSION=$(cat ./bin/version.txt) +CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download" +ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35" +CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35" +# Detect platform +OS_TYPE=$(uname) + +if [ "$OS_TYPE" == "Linux" ]; then + # Linux downloads + download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin" + mv ./bin/cortex-server-beta ./bin/cortex-server + rm -rf ./bin/cortex + rm -rf ./bin/cortex-beta + chmod +x "./bin/cortex-server" + + # Download engines for Linux + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1 + download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1 + download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1 + +elif [ "$OS_TYPE" == "Darwin" ]; then + # macOS downloads + download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1 + mv ./bin/cortex-server-beta ./bin/cortex-server + rm -rf ./bin/cortex + rm -rf ./bin/cortex-beta + chmod +x "./bin/cortex-server" + + # Download engines for macOS + download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp + download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp + +else + echo "Unsupported operating system: $OS_TYPE" + exit 1 +fi diff --git a/extensions/inference-nitro-extension/jest.config.js b/extensions/inference-cortex-extension/jest.config.js similarity index 100% rename from extensions/inference-nitro-extension/jest.config.js rename to extensions/inference-cortex-extension/jest.config.js diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-cortex-extension/package.json similarity index 80% rename from extensions/inference-nitro-extension/package.json rename to extensions/inference-cortex-extension/package.json index 15ceaf566..d262ad5ec 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-cortex-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.20", + "version": "1.0.21", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", @@ -10,12 +10,12 @@ "scripts": { "test": "jest", "build": "tsc --module commonjs && rollup -c rollup.config.ts", - "downloadnitro:linux:darwin": "./download.sh", - "downloadnitro:win32": "download.bat", - "downloadnitro": "run-script-os", - "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", - "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", - "build:publish": "yarn test && run-script-os" + "downloadcortex:linux:darwin": "./download.sh", + "downloadcortex:win32": "download.bat", + "downloadcortex": "run-script-os", + "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", + "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", + "build:publish": "run-script-os" }, "exports": { ".": "./dist/index.js", @@ -50,6 +50,8 @@ "cpu-instructions": "^0.0.13", "decompress": "^4.2.1", "fetch-retry": "^5.0.6", + "ky": "^1.7.2", + "p-queue": "^8.0.1", "rxjs": "^7.8.1", "tcp-port-used": "^1.0.2", "terminate": "2.6.1", diff --git a/extensions/inference-nitro-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json similarity index 100% rename from extensions/inference-nitro-extension/resources/default_settings.json rename to extensions/inference-cortex-extension/resources/default_settings.json diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json rename to extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json index 8c3029be0..f6e3d08e9 100644 --- a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json +++ b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json @@ -31,5 +31,5 @@ "tags": ["34B", "Finetuned"], "size": 21556982144 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json rename to extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json index 163373014..463f7eec7 100644 --- a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json +++ b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json @@ -31,5 +31,5 @@ "tags": ["7B", "Finetuned"], "size": 5056982144 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/bakllava-1/model.json b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/bakllava-1/model.json rename to extensions/inference-cortex-extension/resources/models/bakllava-1/model.json index 93f87c7f4..391c93990 100644 --- a/extensions/inference-nitro-extension/resources/models/bakllava-1/model.json +++ b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json @@ -31,5 +31,5 @@ "tags": ["Vision"], "size": 5750000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json rename to extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json index fb2a5f346..7bd5bf3a4 100644 --- a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json @@ -30,5 +30,5 @@ "tags": ["7B", "Finetuned"], "size": 4370000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/codestral-22b/model.json rename to extensions/inference-cortex-extension/resources/models/codestral-22b/model.json index f90f848dd..2cce063e6 100644 --- a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json +++ b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json @@ -31,6 +31,6 @@ "tags": ["22B", "Finetuned", "Featured"], "size": 13341237440 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/command-r-34b/model.json rename to extensions/inference-cortex-extension/resources/models/command-r-34b/model.json index 6b166eea5..13518604c 100644 --- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json +++ b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json @@ -31,6 +31,6 @@ "tags": ["34B", "Finetuned"], "size": 21500000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json rename to extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json index 4d825cfeb..6722d253d 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json +++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json @@ -31,5 +31,5 @@ "tags": ["Tiny"], "size": 1430000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json rename to extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json index e87d6a643..8a2e271cd 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json +++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json @@ -31,5 +31,5 @@ "tags": ["33B"], "size": 19940000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json rename to extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json index 837b10ce3..3278c9a81 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json +++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json @@ -31,5 +31,5 @@ "tags": ["2B", "Finetuned", "Tiny"], "size": 1630000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json rename to extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json index b29043483..9a57f9b37 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json @@ -31,5 +31,5 @@ "tags": ["7B", "Finetuned"], "size": 5330000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json rename to extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json index 4db74ac6f..66eaff7c2 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json +++ b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json @@ -37,5 +37,5 @@ ], "size": 16600000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json rename to extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json index d85759f9b..60be558b8 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json +++ b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json @@ -38,5 +38,5 @@ ], "size": 1710000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json rename to extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json index 8f6af15d9..67acaad09 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json +++ b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json @@ -37,5 +37,5 @@ ], "size": 5760000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json rename to extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json index 0c770b189..c91a0a73b 100644 --- a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json @@ -31,5 +31,5 @@ "tags": ["70B", "Foundational Model"], "size": 43920000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json rename to extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json index 9efd634b5..4a28f6004 100644 --- a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json @@ -31,5 +31,5 @@ "tags": ["7B", "Foundational Model"], "size": 4080000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json index 4d84b9967..3456a185e 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json @@ -31,5 +31,5 @@ "tags": ["8B"], "size": 4920000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json rename to extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json index a3601c8cd..718629fb0 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json @@ -34,5 +34,5 @@ ], "size": 4920000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json similarity index 98% rename from extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json index 1f4931e11..aec73719e 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json @@ -37,5 +37,5 @@ ], "size": 42500000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json similarity index 98% rename from extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json index 4b21534bc..ec9a0284b 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json @@ -37,5 +37,5 @@ ], "size": 4920000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json index 5be08409d..0fe7d3316 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json @@ -31,5 +31,5 @@ "tags": ["1B", "Featured"], "size": 1320000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json index aacb3f0f8..299362fbf 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json @@ -31,5 +31,5 @@ "tags": ["3B", "Featured"], "size": 3420000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json rename to extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json index 94b62ec82..3230df5b0 100644 --- a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json @@ -34,5 +34,5 @@ ], "size": 1170000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/llava-13b/model.json b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llava-13b/model.json rename to extensions/inference-cortex-extension/resources/models/llava-13b/model.json index 6d94fd272..791c98749 100644 --- a/extensions/inference-nitro-extension/resources/models/llava-13b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json @@ -32,5 +32,5 @@ "tags": ["Vision"], "size": 7870000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llava-7b/model.json b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llava-7b/model.json rename to extensions/inference-cortex-extension/resources/models/llava-7b/model.json index 1fdd75247..b22899c96 100644 --- a/extensions/inference-nitro-extension/resources/models/llava-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json @@ -32,5 +32,5 @@ "tags": ["Vision"], "size": 4370000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json rename to extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json index 88f701466..9b568e468 100644 --- a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json +++ b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json @@ -32,5 +32,5 @@ "size": 4370000000, "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png" }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json index 4413b415c..c711065ff 100644 --- a/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json @@ -30,5 +30,5 @@ "tags": ["70B", "Foundational Model"], "size": 26440000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json rename to extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json index 10c17c310..1999035aa 100644 --- a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json @@ -31,5 +31,5 @@ "tags": ["7B", "Finetuned"], "size": 4370000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json rename to extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json index e743a74c9..05371b69e 100644 --- a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json @@ -31,5 +31,5 @@ "tags": ["Recommended", "7B", "Finetuned"], "size": 4370000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json rename to extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json index 6459b049d..a2197dab2 100644 --- a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json +++ b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json @@ -34,5 +34,5 @@ ], "size": 2320000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/phi3-medium/model.json rename to extensions/inference-cortex-extension/resources/models/phi3-medium/model.json index 7331b2fd8..f7131ee98 100644 --- a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json +++ b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json @@ -34,5 +34,5 @@ ], "size": 8366000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/phind-34b/model.json rename to extensions/inference-cortex-extension/resources/models/phind-34b/model.json index 14099a635..f6e302173 100644 --- a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json +++ b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json @@ -31,5 +31,5 @@ "tags": ["34B", "Finetuned"], "size": 20220000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen-7b/model.json rename to extensions/inference-cortex-extension/resources/models/qwen-7b/model.json index 85081a605..be37cac0d 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json @@ -31,5 +31,5 @@ "tags": ["7B", "Finetuned"], "size": 4770000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json rename to extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json index a7613982c..210848a43 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json @@ -31,6 +31,6 @@ "tags": ["7B", "Finetuned"], "size": 4680000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json index 04913b874..96e4d214c 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json @@ -31,6 +31,6 @@ "tags": ["14B", "Featured"], "size": 8990000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json index 43ba30c56..20681dff4 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json @@ -31,6 +31,6 @@ "tags": ["32B"], "size": 19900000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json index 1852a0909..b741539eb 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json @@ -31,6 +31,6 @@ "tags": ["72B"], "size": 47400000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json index b47511f96..6741aef64 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json @@ -31,6 +31,6 @@ "tags": ["7B", "Featured"], "size": 4680000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json new file mode 100644 index 000000000..a445ee2db --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json @@ -0,0 +1,36 @@ +{ + "sources": [ + { + "filename": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf" + } + ], + "id": "qwen2.5-coder-14b-instruct", + "object": "model", + "name": "Qwen2.5 Coder 14B Instruct Q4", + "version": "1.0", + "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.", + "format": "gguf", + "settings": { + "ctx_len": 32768, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf", + "ngl": 29 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 32768, + "stop": ["<|endoftext|>", "<|im_end|>"], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "QwenLM", + "tags": ["14B", "Featured"], + "size": 8990000000 + }, + "engine": "llama-cpp" + } + \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json new file mode 100644 index 000000000..cffdf03df --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json @@ -0,0 +1,36 @@ +{ + "sources": [ + { + "filename": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf" + } + ], + "id": "qwen2.5-coder-32b-instruct", + "object": "model", + "name": "Qwen2.5 Coder 32B Instruct Q4", + "version": "1.0", + "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.", + "format": "gguf", + "settings": { + "ctx_len": 32768, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf", + "ngl": 29 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 32768, + "stop": ["<|endoftext|>", "<|im_end|>"], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "QwenLM", + "tags": ["32B", "Featured"], + "size": 19900000000 + }, + "engine": "llama-cpp" + } + \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json index 2f1080b2c..9162c8a43 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json @@ -31,6 +31,6 @@ "tags": ["7B", "Featured"], "size": 4680000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json rename to extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json index 938e03fb7..a6c84bd17 100644 --- a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json +++ b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json @@ -31,5 +31,5 @@ "tags": ["3B", "Finetuned", "Tiny"], "size": 2970000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json rename to extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json index c17d1c35e..ffb32922e 100644 --- a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json @@ -30,5 +30,5 @@ "tags": ["7B", "Finetuned"], "size": 4370000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json rename to extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json index a49e79073..b6aeea3e3 100644 --- a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json +++ b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json @@ -31,5 +31,5 @@ "tags": ["Tiny", "Foundation Model"], "size": 669000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json rename to extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json index 6c9aa2b89..fae5d0ca5 100644 --- a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json @@ -31,5 +31,5 @@ "size": 4370000000, "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png" }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/vistral-7b/model.json rename to extensions/inference-cortex-extension/resources/models/vistral-7b/model.json index b84f2c676..46b6999a6 100644 --- a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json @@ -31,6 +31,6 @@ "tags": ["7B", "Finetuned"], "size": 4410000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json rename to extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json index 101eedfd1..cf39ad857 100644 --- a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json +++ b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json @@ -31,5 +31,5 @@ "tags": ["Recommended", "13B", "Finetuned"], "size": 7870000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/yi-34b/model.json rename to extensions/inference-cortex-extension/resources/models/yi-34b/model.json index db7df9f2d..4f56650d7 100644 --- a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json +++ b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json @@ -31,5 +31,5 @@ "tags": ["34B", "Foundational Model"], "size": 20660000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts similarity index 93% rename from extensions/inference-nitro-extension/rollup.config.ts rename to extensions/inference-cortex-extension/rollup.config.ts index 1a8badb6f..34ad9295d 100644 --- a/extensions/inference-nitro-extension/rollup.config.ts +++ b/extensions/inference-cortex-extension/rollup.config.ts @@ -49,6 +49,8 @@ const llama321bJson = require('./resources/models/llama3.2-1b-instruct/model.jso const llama323bJson = require('./resources/models/llama3.2-3b-instruct/model.json') const qwen257bJson = require('./resources/models/qwen2.5-7b-instruct/model.json') const qwen25coder7bJson = require('./resources/models/qwen2.5-coder-7b-instruct/model.json') +const qwen25coder14bJson = require('./resources/models/qwen2.5-coder-14b-instruct/model.json') +const qwen25coder32bJson = require('./resources/models/qwen2.5-coder-32b-instruct/model.json') const qwen2514bJson = require('./resources/models/qwen2.5-14b-instruct/model.json') const qwen2532bJson = require('./resources/models/qwen2.5-32b-instruct/model.json') const qwen2572bJson = require('./resources/models/qwen2.5-72b-instruct/model.json') @@ -108,25 +110,16 @@ export default [ llama323bJson, qwen257bJson, qwen25coder7bJson, + qwen25coder14bJson, + qwen25coder32bJson, qwen2514bJson, qwen2532bJson, qwen2572bJson, ]), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), - INFERENCE_URL: JSON.stringify( - process.env.INFERENCE_URL || - 'http://127.0.0.1:3928/inferences/server/chat_completion' - ), - TROUBLESHOOTING_URL: JSON.stringify( - 'https://jan.ai/guides/troubleshooting' - ), - JAN_SERVER_INFERENCE_URL: JSON.stringify( - 'http://localhost:1337/v1/chat/completions' - ), - CUDA_DOWNLOAD_URL: JSON.stringify( - 'https://catalog.jan.ai/dist/cuda-dependencies///cuda.tar.gz' - ), + CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'), + CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), }), // Allow json resolution json(), diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts similarity index 70% rename from extensions/inference-nitro-extension/src/@types/global.d.ts rename to extensions/inference-cortex-extension/src/@types/global.d.ts index 85c9b939f..48dbcd780 100644 --- a/extensions/inference-nitro-extension/src/@types/global.d.ts +++ b/extensions/inference-cortex-extension/src/@types/global.d.ts @@ -1,7 +1,6 @@ declare const NODE: string -declare const INFERENCE_URL: string -declare const TROUBLESHOOTING_URL: string -declare const JAN_SERVER_INFERENCE_URL: string +declare const CORTEX_API_URL: string +declare const CORTEX_SOCKET_URL: string declare const DEFAULT_SETTINGS: Array declare const MODELS: Array diff --git a/extensions/inference-nitro-extension/src/babel.config.js b/extensions/inference-cortex-extension/src/babel.config.js similarity index 100% rename from extensions/inference-nitro-extension/src/babel.config.js rename to extensions/inference-cortex-extension/src/babel.config.js diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts new file mode 100644 index 000000000..44ec423da --- /dev/null +++ b/extensions/inference-cortex-extension/src/index.ts @@ -0,0 +1,236 @@ +/** + * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + * @version 1.0.0 + * @module inference-extension/src/index + */ + +import { + Model, + executeOnMain, + systemInformation, + joinPath, + LocalOAIEngine, + InferenceEngine, + getJanDataFolderPath, + extractModelLoadParams, + fs, + events, + ModelEvent +} from '@janhq/core' +import PQueue from 'p-queue' +import ky from 'ky' + +/** + * Event subscription types of Downloader + */ +enum DownloadTypes { + DownloadUpdated = 'onFileDownloadUpdate', + DownloadError = 'onFileDownloadError', + DownloadSuccess = 'onFileDownloadSuccess', + DownloadStopped = 'onFileDownloadStopped', + DownloadStarted = 'onFileDownloadStarted', +} + +/** + * A class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + */ +export default class JanInferenceCortexExtension extends LocalOAIEngine { + nodeModule: string = 'node' + + queue = new PQueue({ concurrency: 1 }) + + provider: string = InferenceEngine.cortex + + /** + * The URL for making inference requests. + */ + inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions` + + /** + * Socket instance of events subscription + */ + socket?: WebSocket = undefined + + /** + * Subscribes to events emitted by the @janhq/core package. + */ + async onLoad() { + const models = MODELS as Model[] + + this.registerModels(models) + + super.onLoad() + + // Run the process watchdog + const systemInfo = await systemInformation() + await this.clean() + await executeOnMain(NODE, 'run', systemInfo) + + this.queue.add(() => this.healthz()) + + this.subscribeToEvents() + + window.addEventListener('beforeunload', () => { + this.clean() + }) + } + + onUnload(): void { + this.clean() + executeOnMain(NODE, 'dispose') + super.onUnload() + } + + override async loadModel( + model: Model & { file_path?: string } + ): Promise { + if ( + model.engine === InferenceEngine.nitro && + model.settings.llama_model_path + ) { + // Legacy chat model support + model.settings = { + ...model.settings, + llama_model_path: await getModelFilePath( + model, + model.settings.llama_model_path + ), + } + } else { + const { llama_model_path, ...settings } = model.settings + model.settings = settings + } + + if (model.engine === InferenceEngine.nitro && model.settings.mmproj) { + // Legacy clip vision model support + model.settings = { + ...model.settings, + mmproj: await getModelFilePath(model, model.settings.mmproj), + } + } else { + const { mmproj, ...settings } = model.settings + model.settings = settings + } + + return await this.queue.add(() => + ky + .post(`${CORTEX_API_URL}/v1/models/start`, { + json: { + ...extractModelLoadParams(model.settings), + model: model.id, + engine: + model.engine === InferenceEngine.nitro // Legacy model cache + ? InferenceEngine.cortex_llamacpp + : model.engine, + }, + }) + .json() + .catch(async (e) => { + throw (await e.response?.json()) ?? e + }) + .then() + ) + } + + override async unloadModel(model: Model): Promise { + return ky + .post(`${CORTEX_API_URL}/v1/models/stop`, { + json: { model: model.id }, + }) + .json() + .then() + } + + /** + * Do health check on cortex.cpp + * @returns + */ + healthz(): Promise { + return ky + .get(`${CORTEX_API_URL}/healthz`, { + retry: { + limit: 10, + methods: ['get'], + }, + }) + .then(() => { }) + } + + /** + * Clean cortex processes + * @returns + */ + clean(): Promise { + return ky + .delete(`${CORTEX_API_URL}/processmanager/destroy`, { + timeout: 2000, // maximum 2 seconds + }) + .catch(() => { + // Do nothing + }) + } + + /** + * Subscribe to cortex.cpp websocket events + */ + subscribeToEvents() { + this.queue.add( + () => + new Promise((resolve) => { + this.socket = new WebSocket(`${CORTEX_SOCKET_URL}/events`) + + this.socket.addEventListener('message', (event) => { + const data = JSON.parse(event.data) + const transferred = data.task.items.reduce( + (acc: number, cur: any) => acc + cur.downloadedBytes, + 0 + ) + const total = data.task.items.reduce( + (acc: number, cur: any) => acc + cur.bytes, + 0 + ) + const percent = total > 0 ? transferred / total : 0 + + events.emit(DownloadTypes[data.type as keyof typeof DownloadTypes], { + modelId: data.task.id, + percent: percent, + size: { + transferred: transferred, + total: total, + }, + }) + // Update models list from Hub + if (data.type === DownloadTypes.DownloadSuccess) { + // Delay for the state update from cortex.cpp + // Just to be sure + setTimeout(() => { + events.emit(ModelEvent.OnModelsUpdate, {}) + }, 500) + } + }) + resolve() + }) + ) + } + +} + +/// Legacy +export const getModelFilePath = async ( + model: Model, + file: string +): Promise => { + // Symlink to the model file + if ( + !model.sources[0]?.url.startsWith('http') && + (await fs.existsSync(model.sources[0].url)) + ) { + return model.sources[0]?.url + } + return joinPath([await getJanDataFolderPath(), 'models', model.id, file]) +} +/// diff --git a/extensions/inference-nitro-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts similarity index 67% rename from extensions/inference-nitro-extension/src/node/execute.test.ts rename to extensions/inference-cortex-extension/src/node/execute.test.ts index dfd8b35a9..622eb38af 100644 --- a/extensions/inference-nitro-extension/src/node/execute.test.ts +++ b/extensions/inference-cortex-extension/src/node/execute.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from '@jest/globals' -import { executableNitroFile } from './execute' +import { executableCortexFile } from './execute' import { GpuSetting } from '@janhq/core' import { cpuInfo } from 'cpu-instructions' @@ -27,10 +27,10 @@ jest.mock('cpu-instructions', () => ({ cpuInfo: jest.fn(), }, })) -let mock = cpuInfo.cpuInfo as jest.Mock -mock.mockReturnValue([]) +let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock +mockCpuInfo.mockReturnValue([]) -describe('test executable nitro file', () => { +describe('test executable cortex file', () => { afterAll(function () { Object.defineProperty(process, 'platform', { value: originalPlatform, @@ -44,10 +44,14 @@ describe('test executable nitro file', () => { Object.defineProperty(process, 'arch', { value: 'arm64', }) - expect(executableNitroFile(testSettings)).toEqual( + expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`mac-arm64`), - executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(), + enginePath: expect.stringContaining(`arm64`), + binPath: expect.stringContaining(`bin`), + executablePath: + originalPlatform === 'darwin' + ? expect.stringContaining(`cortex-server`) + : expect.anything(), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -55,10 +59,14 @@ describe('test executable nitro file', () => { Object.defineProperty(process, 'arch', { value: 'x64', }) - expect(executableNitroFile(testSettings)).toEqual( + expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`mac-x64`), - executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(), + enginePath: expect.stringContaining(`x64`), + binPath: expect.stringContaining(`bin`), + executablePath: + originalPlatform === 'darwin' + ? expect.stringContaining(`cortex-server`) + : expect.anything(), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -73,10 +81,12 @@ describe('test executable nitro file', () => { ...testSettings, run_mode: 'cpu', } - expect(executableNitroFile(settings)).toEqual( + mockCpuInfo.mockReturnValue(['avx']) + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`win`), - executablePath: expect.stringContaining(`cortex-cpp.exe`), + enginePath: expect.stringContaining(`avx`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -107,10 +117,12 @@ describe('test executable nitro file', () => { }, ], } - expect(executableNitroFile(settings)).toEqual( + mockCpuInfo.mockReturnValue(['avx2']) + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`win-cuda-11-7`), - executablePath: expect.stringContaining(`cortex-cpp.exe`), + enginePath: expect.stringContaining(`avx2-cuda-11-7`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -141,10 +153,12 @@ describe('test executable nitro file', () => { }, ], } - expect(executableNitroFile(settings)).toEqual( + mockCpuInfo.mockReturnValue(['noavx']) + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`win-cuda-12-0`), - executablePath: expect.stringContaining(`cortex-cpp.exe`), + enginePath: expect.stringContaining(`noavx-cuda-12-0`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -159,10 +173,11 @@ describe('test executable nitro file', () => { ...testSettings, run_mode: 'cpu', } - expect(executableNitroFile(settings)).toEqual( + mockCpuInfo.mockReturnValue(['noavx']) + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`linux`), - executablePath: expect.stringContaining(`cortex-cpp`), + enginePath: expect.stringContaining(`noavx`), + executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -193,10 +208,11 @@ describe('test executable nitro file', () => { }, ], } - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`linux-cuda-11-7`), - executablePath: expect.stringContaining(`cortex-cpp`), + enginePath: expect.stringContaining(`cuda-11-7`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -227,10 +243,11 @@ describe('test executable nitro file', () => { }, ], } - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`linux-cuda-12-0`), - executablePath: expect.stringContaining(`cortex-cpp`), + enginePath: expect.stringContaining(`cuda-12-0`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -249,12 +266,13 @@ describe('test executable nitro file', () => { const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { - mock.mockReturnValue([instruction]) + mockCpuInfo.mockReturnValue([instruction]) - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`linux-${instruction}`), - executablePath: expect.stringContaining(`cortex-cpp`), + enginePath: expect.stringContaining(instruction), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', @@ -273,11 +291,12 @@ describe('test executable nitro file', () => { } const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { - mock.mockReturnValue([instruction]) - expect(executableNitroFile(settings)).toEqual( + mockCpuInfo.mockReturnValue([instruction]) + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`win-${instruction}`), - executablePath: expect.stringContaining(`cortex-cpp.exe`), + enginePath: expect.stringContaining(instruction), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -312,11 +331,12 @@ describe('test executable nitro file', () => { } const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { - mock.mockReturnValue([instruction]) - expect(executableNitroFile(settings)).toEqual( + mockCpuInfo.mockReturnValue([instruction]) + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`win-cuda-12-0`), - executablePath: expect.stringContaining(`cortex-cpp.exe`), + enginePath: expect.stringContaining(`cuda-12-0`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -351,11 +371,12 @@ describe('test executable nitro file', () => { ], } cpuInstructions.forEach((instruction) => { - mock.mockReturnValue([instruction]) - expect(executableNitroFile(settings)).toEqual( + mockCpuInfo.mockReturnValue([instruction]) + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`linux-cuda-12-0`), - executablePath: expect.stringContaining(`cortex-cpp`), + enginePath: expect.stringContaining(`cuda-12-0`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -391,11 +412,12 @@ describe('test executable nitro file', () => { ], } cpuInstructions.forEach((instruction) => { - mock.mockReturnValue([instruction]) - expect(executableNitroFile(settings)).toEqual( + mockCpuInfo.mockReturnValue([instruction]) + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`linux-vulkan`), - executablePath: expect.stringContaining(`cortex-cpp`), + enginePath: expect.stringContaining(`vulkan`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -417,11 +439,15 @@ describe('test executable nitro file', () => { ...testSettings, run_mode: 'cpu', } - mock.mockReturnValue([]) - expect(executableNitroFile(settings)).toEqual( + mockCpuInfo.mockReturnValue([]) + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`mac-x64`), - executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(), + enginePath: expect.stringContaining(`x64`), + binPath: expect.stringContaining(`bin`), + executablePath: + originalPlatform === 'darwin' + ? expect.stringContaining(`cortex-server`) + : expect.anything(), cudaVisibleDevices: '', vkVisibleDevices: '', }) diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts similarity index 72% rename from extensions/inference-nitro-extension/src/node/execute.ts rename to extensions/inference-cortex-extension/src/node/execute.ts index 595063ed4..74ffb48c6 100644 --- a/extensions/inference-nitro-extension/src/node/execute.ts +++ b/extensions/inference-cortex-extension/src/node/execute.ts @@ -2,8 +2,9 @@ import { GpuSetting } from '@janhq/core' import * as path from 'path' import { cpuInfo } from 'cpu-instructions' -export interface NitroExecutableOptions { +export interface CortexExecutableOptions { enginePath: string + binPath: string executablePath: string cudaVisibleDevices: string vkVisibleDevices: string @@ -36,8 +37,8 @@ const os = (): string => { ? 'win' : process.platform === 'darwin' ? process.arch === 'arm64' - ? 'mac-arm64' - : 'mac-x64' + ? 'arm64' + : 'x64' : 'linux' } @@ -66,7 +67,7 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => { * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'. * @returns */ -const cpuInstructions = () => { +const cpuInstructions = (): string => { if (process.platform === 'darwin') return '' return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512') ? 'avx512' @@ -81,29 +82,32 @@ const cpuInstructions = () => { * Find which executable file to run based on the current platform. * @returns The name of the executable file to run. */ -export const executableNitroFile = ( +export const executableCortexFile = ( gpuSetting?: GpuSetting -): NitroExecutableOptions => { - let engineFolder = [ - os(), - ...(gpuSetting?.vulkan - ? [] +): CortexExecutableOptions => { + const cpuInstruction = cpuInstructions() + let engineFolder = gpuSetting?.vulkan + ? 'vulkan' + : process.platform === 'darwin' + ? os() : [ - gpuRunMode(gpuSetting) !== 'cuda' ? cpuInstructions() : '', - gpuRunMode(gpuSetting), - cudaVersion(gpuSetting), - ]), - gpuSetting?.vulkan ? 'vulkan' : undefined, - ] - .filter((e) => !!e) - .join('-') + gpuRunMode(gpuSetting) !== 'cuda' || + cpuInstruction === 'avx2' || cpuInstruction === 'avx512' + ? cpuInstruction + : 'noavx', + gpuRunMode(gpuSetting), + cudaVersion(gpuSetting), + ] + .filter((e) => !!e) + .join('-') let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' - let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}` - + let binaryName = `cortex-server${extension()}` + const binPath = path.join(__dirname, '..', 'bin') return { - enginePath: path.join(__dirname, '..', 'bin', engineFolder), - executablePath: path.join(__dirname, '..', 'bin', binaryName), + enginePath: path.join(binPath, engineFolder), + executablePath: path.join(binPath, binaryName), + binPath: binPath, cudaVisibleDevices, vkVisibleDevices, } diff --git a/extensions/inference-cortex-extension/src/node/index.test.ts b/extensions/inference-cortex-extension/src/node/index.test.ts new file mode 100644 index 000000000..ff9d7c2fc --- /dev/null +++ b/extensions/inference-cortex-extension/src/node/index.test.ts @@ -0,0 +1,94 @@ +jest.mock('@janhq/core/node', () => ({ + ...jest.requireActual('@janhq/core/node'), + getJanDataFolderPath: () => '', + getSystemResourceInfo: () => { + return { + cpu: { + cores: 1, + logicalCores: 1, + threads: 1, + model: 'model', + speed: 1, + }, + memory: { + total: 1, + free: 1, + }, + gpu: { + model: 'model', + memory: 1, + cuda: { + version: 'version', + devices: 'devices', + }, + vulkan: { + version: 'version', + devices: 'devices', + }, + }, + } + }, +})) + +jest.mock('fs', () => ({ + default: { + readdirSync: () => [], + }, +})) + +jest.mock('child_process', () => ({ + exec: () => { + return { + stdout: { on: jest.fn() }, + stderr: { on: jest.fn() }, + on: jest.fn(), + } + }, + spawn: () => { + return { + stdout: { on: jest.fn() }, + stderr: { on: jest.fn() }, + on: jest.fn(), + pid: '111', + } + }, +})) + +jest.mock('./execute', () => ({ + executableCortexFile: () => { + return { + enginePath: 'enginePath', + executablePath: 'executablePath', + cudaVisibleDevices: 'cudaVisibleDevices', + vkVisibleDevices: 'vkVisibleDevices', + } + }, +})) + +import index from './index' + +describe('dispose', () => { + it('should dispose a model successfully on Mac', async () => { + Object.defineProperty(process, 'platform', { + value: 'darwin', + }) + + // Call the dispose function + const result = await index.dispose() + + // Assert that the result is as expected + expect(result).toBeUndefined() + }) + + it('should kill the subprocess successfully on Windows', async () => { + Object.defineProperty(process, 'platform', { + value: 'win32', + }) + + // Call the killSubprocess function + const result = await index.dispose() + + // Assert that the result is as expected + expect(result).toBeUndefined() + }) +}) diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts new file mode 100644 index 000000000..3816605d2 --- /dev/null +++ b/extensions/inference-cortex-extension/src/node/index.ts @@ -0,0 +1,108 @@ +import path from 'path' +import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node' +import { executableCortexFile } from './execute' +import { ProcessWatchdog } from './watchdog' +import { appResourcePath } from '@janhq/core/node' + +// The HOST address to use for the Nitro subprocess +const LOCAL_PORT = '39291' +let watchdog: ProcessWatchdog | undefined = undefined + +/** + * Spawns a Nitro subprocess. + * @returns A promise that resolves when the Nitro subprocess is started. + */ +function run(systemInfo?: SystemInformation): Promise { + log(`[CORTEX]:: Spawning cortex subprocess...`) + + return new Promise(async (resolve, reject) => { + let executableOptions = executableCortexFile( + // If ngl is not set or equal to 0, run on CPU with correct instructions + systemInfo?.gpuSetting + ? { + ...systemInfo.gpuSetting, + run_mode: systemInfo.gpuSetting.run_mode, + } + : undefined + ) + + // Execute the binary + log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`) + log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`) + + addEnvPaths(path.join(appResourcePath(), 'shared')) + addEnvPaths(executableOptions.binPath) + addEnvPaths(executableOptions.enginePath) + // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH + // This is required for the cortex engine to run for now since dlls are not moved to the root + addEnvPaths( + path.join(executableOptions.enginePath, 'engines', 'cortex.llamacpp') + ) + + const dataFolderPath = getJanDataFolderPath() + watchdog = new ProcessWatchdog( + executableOptions.executablePath, + [ + '--start-server', + '--port', + LOCAL_PORT.toString(), + '--config_file_path', + `${path.join(dataFolderPath, '.janrc')}`, + '--data_folder_path', + dataFolderPath, + ], + { + cwd: executableOptions.enginePath, + env: { + ...process.env, + ENGINE_PATH: executableOptions.enginePath, + CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, + // Vulkan - Support 1 device at a time for now + ...(executableOptions.vkVisibleDevices?.length > 0 && { + GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], + }), + }, + } + ) + watchdog.start() + resolve() + }) +} + +/** + * Every module should have a dispose function + * This will be called when the extension is unloaded and should clean up any resources + * Also called when app is closed + */ +function dispose() { + watchdog?.terminate() +} + +function addEnvPaths(dest: string) { + // Add engine path to the PATH and LD_LIBRARY_PATH + if (process.platform === 'win32') { + process.env.PATH = (process.env.PATH || '').concat( + path.delimiter, + dest, + ) + log(`[CORTEX] PATH: ${process.env.PATH}`) + } else { + process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( + path.delimiter, + dest, + ) + log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`) + } +} + +/** + * Cortex process info + */ +export interface CortexProcessInfo { + isRunning: boolean +} + +export default { + run, + dispose, +} diff --git a/extensions/inference-cortex-extension/src/node/watchdog.ts b/extensions/inference-cortex-extension/src/node/watchdog.ts new file mode 100644 index 000000000..3e2b81d70 --- /dev/null +++ b/extensions/inference-cortex-extension/src/node/watchdog.ts @@ -0,0 +1,84 @@ +import { log } from '@janhq/core/node' +import { spawn, ChildProcess } from 'child_process' +import { EventEmitter } from 'events' + +interface WatchdogOptions { + cwd?: string + restartDelay?: number + maxRestarts?: number + env?: NodeJS.ProcessEnv +} + +export class ProcessWatchdog extends EventEmitter { + private command: string + private args: string[] + private options: WatchdogOptions + private process: ChildProcess | null + private restartDelay: number + private maxRestarts: number + private restartCount: number + private isTerminating: boolean + + constructor(command: string, args: string[], options: WatchdogOptions = {}) { + super() + this.command = command + this.args = args + this.options = options + this.process = null + this.restartDelay = options.restartDelay || 5000 + this.maxRestarts = options.maxRestarts || 5 + this.restartCount = 0 + this.isTerminating = false + } + + start(): void { + this.spawnProcess() + } + + private spawnProcess(): void { + if (this.isTerminating) return + + log(`Starting process: ${this.command} ${this.args.join(' ')}`) + this.process = spawn(this.command, this.args, this.options) + + this.process.stdout?.on('data', (data: Buffer) => { + log(`Process output: ${data}`) + this.emit('output', data.toString()) + }) + + this.process.stderr?.on('data', (data: Buffer) => { + log(`Process error: ${data}`) + this.emit('error', data.toString()) + }) + + this.process.on('close', (code: number | null) => { + log(`Process exited with code ${code}`) + this.emit('close', code) + if (!this.isTerminating) { + this.restartProcess() + } + }) + } + + private restartProcess(): void { + if (this.restartCount < this.maxRestarts) { + this.restartCount++ + log( + `Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})` + ) + setTimeout(() => this.spawnProcess(), this.restartDelay) + } else { + log('Max restart attempts reached. Exiting watchdog.') + this.emit('maxRestartsReached') + } + } + + terminate(): void { + this.isTerminating = true + if (this.process) { + log('Terminating watched process...') + this.process.kill() + } + this.emit('terminated') + } +} diff --git a/extensions/inference-nitro-extension/tsconfig.json b/extensions/inference-cortex-extension/tsconfig.json similarity index 81% rename from extensions/inference-nitro-extension/tsconfig.json rename to extensions/inference-cortex-extension/tsconfig.json index 19d8572b5..af00a035a 100644 --- a/extensions/inference-nitro-extension/tsconfig.json +++ b/extensions/inference-cortex-extension/tsconfig.json @@ -1,9 +1,8 @@ { "compilerOptions": { "moduleResolution": "node", - "target": "es5", - "module": "ES2020", - "lib": ["es2015", "es2016", "es2017", "dom"], + "target": "es2016", + "module": "esnext", "strict": true, "sourceMap": true, "declaration": true, diff --git a/extensions/inference-nitro-extension/bin/version.txt b/extensions/inference-nitro-extension/bin/version.txt deleted file mode 100644 index 8f0916f76..000000000 --- a/extensions/inference-nitro-extension/bin/version.txt +++ /dev/null @@ -1 +0,0 @@ -0.5.0 diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat deleted file mode 100644 index 7acd385d5..000000000 --- a/extensions/inference-nitro-extension/download.bat +++ /dev/null @@ -1,31 +0,0 @@ -@echo off -set BIN_PATH=./bin -set /p CORTEX_VERSION=<./bin/version.txt - -@REM Download cortex.llamacpp binaries -set VERSION=v0.1.25 -set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.25-windows-amd64 -set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan - -call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-12-0/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-11-7/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/win-noavx/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx2/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx512/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/win-vulkan/engines/cortex.llamacpp - -@REM Loop through each folder and move DLLs (excluding engine.dll) -for %%F in (%SUBFOLDERS%) do ( - echo Processing folder: %BIN_PATH%\%%F - - @REM Move all .dll files except engine.dll - for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do ( - if /I not "%%~nxD"=="engine.dll" ( - move "%%D" "%BIN_PATH%" - ) - ) -) - -echo DLL files moved successfully. \ No newline at end of file diff --git a/extensions/inference-nitro-extension/download.sh b/extensions/inference-nitro-extension/download.sh deleted file mode 100755 index 98ed8504a..000000000 --- a/extensions/inference-nitro-extension/download.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# Read CORTEX_VERSION -CORTEX_VERSION=$(cat ./bin/version.txt) -CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download" - -# Detect platform -OS_TYPE=$(uname) - -if [ "$OS_TYPE" == "Linux" ]; then - # Linux downloads - download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin" - chmod +x "./bin/cortex-cpp" - - ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64" - - # Download engines for Linux - download "${ENGINE_DOWNLOAD_URL}-noavx.tar.gz" -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-avx.tar.gz" -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-avx2.tar.gz" -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-avx512.tar.gz" -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-vulkan.tar.gz" -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1 - -elif [ "$OS_TYPE" == "Darwin" ]; then - # macOS downloads - download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/mac-arm64" 1 - download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/mac-x64" 1 - chmod +x "./bin/mac-arm64/cortex-cpp" - chmod +x "./bin/mac-x64/cortex-cpp" - - ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac" - # Download engines for macOS - download "${ENGINE_DOWNLOAD_URL}-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp - download "${ENGINE_DOWNLOAD_URL}-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp - -else - echo "Unsupported operating system: $OS_TYPE" - exit 1 -fi diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts deleted file mode 100644 index 6e825e8fd..000000000 --- a/extensions/inference-nitro-extension/src/index.ts +++ /dev/null @@ -1,193 +0,0 @@ -/** - * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package. - * The class provides methods for initializing and stopping a model, and for making inference requests. - * It also subscribes to events emitted by the @janhq/core package and handles new message requests. - * @version 1.0.0 - * @module inference-extension/src/index - */ - -import { - events, - executeOnMain, - Model, - ModelEvent, - LocalOAIEngine, - InstallationState, - systemInformation, - fs, - getJanDataFolderPath, - joinPath, - DownloadRequest, - baseName, - downloadFile, - DownloadState, - DownloadEvent, - ModelFile, -} from '@janhq/core' - -declare const CUDA_DOWNLOAD_URL: string -/** - * A class that implements the InferenceExtension interface from the @janhq/core package. - * The class provides methods for initializing and stopping a model, and for making inference requests. - * It also subscribes to events emitted by the @janhq/core package and handles new message requests. - */ -export default class JanInferenceNitroExtension extends LocalOAIEngine { - nodeModule: string = NODE - provider: string = 'nitro' - - /** - * Checking the health for Nitro's process each 5 secs. - */ - private static readonly _intervalHealthCheck = 5 * 1000 - - /** - * The interval id for the health check. Used to stop the health check. - */ - private getNitroProcessHealthIntervalId: NodeJS.Timeout | undefined = undefined - - /** - * Tracking the current state of nitro process. - */ - private nitroProcessInfo: any = undefined - - /** - * The URL for making inference requests. - */ - inferenceUrl = '' - - /** - * Subscribes to events emitted by the @janhq/core package. - */ - async onLoad() { - this.inferenceUrl = INFERENCE_URL - - // If the extension is running in the browser, use the base API URL from the core package. - if (!('electronAPI' in window)) { - this.inferenceUrl = `${window.core?.api?.baseApiUrl}/v1/chat/completions` - } - - this.getNitroProcessHealthIntervalId = setInterval( - () => this.periodicallyGetNitroHealth(), - JanInferenceNitroExtension._intervalHealthCheck - ) - const models = MODELS as unknown as Model[] - this.registerModels(models) - super.onLoad() - - // Add additional dependencies PATH to the env - executeOnMain(NODE, 'addAdditionalDependencies', { - name: this.name, - version: this.version, - }) - } - - /** - * Periodically check for nitro process's health. - */ - private async periodicallyGetNitroHealth(): Promise { - const health = await executeOnMain(NODE, 'getCurrentNitroProcessInfo') - - const isRunning = this.nitroProcessInfo?.isRunning ?? false - if (isRunning && health.isRunning === false) { - console.debug('Nitro process is stopped') - events.emit(ModelEvent.OnModelStopped, {}) - } - this.nitroProcessInfo = health - } - - override loadModel(model: ModelFile): Promise { - if (model.engine !== this.provider) return Promise.resolve() - this.getNitroProcessHealthIntervalId = setInterval( - () => this.periodicallyGetNitroHealth(), - JanInferenceNitroExtension._intervalHealthCheck - ) - return super.loadModel(model) - } - - override async unloadModel(model?: Model): Promise { - if (model?.engine && model.engine !== this.provider) return - - // stop the periocally health check - if (this.getNitroProcessHealthIntervalId) { - clearInterval(this.getNitroProcessHealthIntervalId) - this.getNitroProcessHealthIntervalId = undefined - } - return super.unloadModel(model) - } - - override async install(): Promise { - const info = await systemInformation() - - const platform = info.osInfo?.platform === 'win32' ? 'windows' : 'linux' - const downloadUrl = CUDA_DOWNLOAD_URL - - const url = downloadUrl - .replace('', info.gpuSetting?.cuda?.version ?? '12.4') - .replace('', platform) - - console.debug('Downloading Cuda Toolkit Dependency: ', url) - - const janDataFolderPath = await getJanDataFolderPath() - - const executableFolderPath = await joinPath([ - janDataFolderPath, - 'engines', - this.name ?? 'cortex-cpp', - this.version ?? '1.0.0', - ]) - - if (!(await fs.existsSync(executableFolderPath))) { - await fs.mkdir(executableFolderPath) - } - - const tarball = await baseName(url) - const tarballFullPath = await joinPath([executableFolderPath, tarball]) - - const downloadRequest: DownloadRequest = { - url, - localPath: tarballFullPath, - extensionId: this.name, - downloadType: 'extension', - } - downloadFile(downloadRequest) - - const onFileDownloadSuccess = async (state: DownloadState) => { - console.log(state) - // if other download, ignore - if (state.fileName !== tarball) return - events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess) - await executeOnMain( - NODE, - 'decompressRunner', - tarballFullPath, - executableFolderPath - ) - events.emit(DownloadEvent.onFileUnzipSuccess, state) - } - events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess) - } - - override async installationState(): Promise { - const info = await systemInformation() - if ( - info.gpuSetting?.run_mode === 'gpu' && - !info.gpuSetting?.vulkan && - info.osInfo && - info.osInfo.platform !== 'darwin' && - !info.gpuSetting?.cuda?.exist - ) { - const janDataFolderPath = await getJanDataFolderPath() - - const executableFolderPath = await joinPath([ - janDataFolderPath, - 'engines', - this.name ?? 'cortex-cpp', - this.version ?? '1.0.0', - ]) - - if (!(await fs.existsSync(executableFolderPath))) return 'NotInstalled' - return 'Installed' - } - return 'NotRequired' - } -} diff --git a/extensions/inference-nitro-extension/src/node/index.test.ts b/extensions/inference-nitro-extension/src/node/index.test.ts deleted file mode 100644 index 6e64b4a06..000000000 --- a/extensions/inference-nitro-extension/src/node/index.test.ts +++ /dev/null @@ -1,465 +0,0 @@ -jest.mock('fetch-retry', () => ({ - default: () => () => { - return Promise.resolve({ - ok: true, - status: 200, - json: () => - Promise.resolve({ - model_loaded: true, - }), - text: () => Promise.resolve(''), - }) - }, -})) - -jest.mock('path', () => ({ - default: { - isAbsolute: jest.fn(), - join: jest.fn(), - parse: () => { - return { dir: 'dir' } - }, - delimiter: { concat: () => '' }, - }, -})) - -jest.mock('decompress', () => ({ - default: () => { - return Promise.resolve() - }, -})) - -jest.mock('@janhq/core/node', () => ({ - ...jest.requireActual('@janhq/core/node'), - getJanDataFolderPath: () => '', - getSystemResourceInfo: () => { - return { - cpu: { - cores: 1, - logicalCores: 1, - threads: 1, - model: 'model', - speed: 1, - }, - memory: { - total: 1, - free: 1, - }, - gpu: { - model: 'model', - memory: 1, - cuda: { - version: 'version', - devices: 'devices', - }, - vulkan: { - version: 'version', - devices: 'devices', - }, - }, - } - }, -})) - -jest.mock('fs', () => ({ - default: { - readdirSync: () => [], - }, -})) - -jest.mock('child_process', () => ({ - exec: () => { - return { - stdout: { on: jest.fn() }, - stderr: { on: jest.fn() }, - on: jest.fn(), - } - }, - spawn: () => { - return { - stdout: { on: jest.fn() }, - stderr: { on: jest.fn() }, - on: jest.fn(), - pid: '111', - } - }, -})) - -jest.mock('tcp-port-used', () => ({ - default: { - waitUntilFree: () => Promise.resolve(true), - waitUntilUsed: () => Promise.resolve(true), - }, -})) - -jest.mock('./execute', () => ({ - executableNitroFile: () => { - return { - enginePath: 'enginePath', - executablePath: 'executablePath', - cudaVisibleDevices: 'cudaVisibleDevices', - vkVisibleDevices: 'vkVisibleDevices', - } - }, -})) - -jest.mock('terminate', () => ({ - default: (id: String, func: Function) => { - console.log(id) - func() - }, -})) - -import * as execute from './execute' -import index from './index' - -let executeMock = execute - -const modelInitOptions: any = { - modelFolder: '/path/to/model', - model: { - id: 'test', - name: 'test', - engine: 'nitro', - version: '0.0', - format: 'GGUF', - object: 'model', - sources: [], - created: 0, - description: 'test', - parameters: {}, - metadata: { - author: '', - tags: [], - size: 0, - }, - settings: { - prompt_template: '{prompt}', - llama_model_path: 'model.gguf', - }, - }, -} - -describe('loadModel', () => { - it('should load a model successfully', async () => { - // Mock the necessary parameters and system information - - const systemInfo = { - // Mock the system information if needed - } - - // Call the loadModel function - const result = await index.loadModel(modelInitOptions, systemInfo) - - // Assert that the result is as expected - expect(result).toBeUndefined() - }) - - it('should reject with an error message if the model is not a nitro model', async () => { - // Mock the necessary parameters and system information - - const systemInfo = { - // Mock the system information if needed - } - modelInitOptions.model.engine = 'not-nitro' - // Call the loadModel function - try { - await index.loadModel(modelInitOptions, systemInfo) - } catch (error) { - // Assert that the error message is as expected - expect(error).toBe('Not a cortex model') - } - modelInitOptions.model.engine = 'nitro' - }) - - it('should reject if model load failed with an error message', async () => { - // Mock the necessary parameters and system information - - const systemInfo = { - // Mock the system information if needed - } - // Mock the fetch-retry module to return a failed response - jest.mock('fetch-retry', () => ({ - default: () => () => { - return Promise.resolve({ - ok: false, - status: 500, - json: () => - Promise.resolve({ - model_loaded: false, - }), - text: () => Promise.resolve('Failed to load model'), - }) - }, - })) - - // Call the loadModel function - try { - await index.loadModel(modelInitOptions, systemInfo) - } catch (error) { - // Assert that the error message is as expected - expect(error).toBe('Failed to load model') - } - }) - - it('should reject if port not available', async () => { - // Mock the necessary parameters and system information - - const systemInfo = { - // Mock the system information if needed - } - - // Mock the tcp-port-used module to return false - jest.mock('tcp-port-used', () => ({ - default: { - waitUntilFree: () => Promise.resolve(false), - waitUntilUsed: () => Promise.resolve(false), - }, - })) - - // Call the loadModel function - try { - await index.loadModel(modelInitOptions, systemInfo) - } catch (error) { - // Assert that the error message is as expected - expect(error).toBe('Port not available') - } - }) - - it('should run on GPU model if ngl is set', async () => { - const systemInfo: any = { - gpuSetting: { - run_mode: 'gpu', - }, - } - // Spy executableNitroFile - jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({ - enginePath: '', - executablePath: '', - cudaVisibleDevices: '', - vkVisibleDevices: '', - }) - - Object.defineProperty(process, 'platform', { value: 'win32' }) - await index.loadModel( - { - ...modelInitOptions, - model: { - ...modelInitOptions.model, - settings: { - ...modelInitOptions.model.settings, - ngl: 40, - }, - }, - }, - systemInfo - ) - expect(executeMock.executableNitroFile).toHaveBeenCalledWith({ - run_mode: 'gpu', - }) - }) - - it('should run on correct CPU instructions if ngl is not set', async () => { - const systemInfo: any = { - gpuSetting: { - run_mode: 'gpu', - }, - } - // Spy executableNitroFile - jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({ - enginePath: '', - executablePath: '', - cudaVisibleDevices: '', - vkVisibleDevices: '', - }) - - Object.defineProperty(process, 'platform', { value: 'win32' }) - await index.loadModel( - { - ...modelInitOptions, - model: { - ...modelInitOptions.model, - settings: { - ...modelInitOptions.model.settings, - ngl: undefined, - }, - }, - }, - systemInfo - ) - expect(executeMock.executableNitroFile).toHaveBeenCalledWith({ - run_mode: 'cpu', - }) - }) - - it('should run on correct CPU instructions if ngl is 0', async () => { - const systemInfo: any = { - gpuSetting: { - run_mode: 'gpu', - }, - } - // Spy executableNitroFile - jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({ - enginePath: '', - executablePath: '', - cudaVisibleDevices: '', - vkVisibleDevices: '', - }) - - Object.defineProperty(process, 'platform', { value: 'win32' }) - await index.loadModel( - { - ...modelInitOptions, - model: { - ...modelInitOptions.model, - settings: { - ...modelInitOptions.model.settings, - ngl: 0, - }, - }, - }, - systemInfo - ) - expect(executeMock.executableNitroFile).toHaveBeenCalledWith({ - run_mode: 'cpu', - }) - }) -}) - -describe('unloadModel', () => { - it('should unload a model successfully', async () => { - // Call the unloadModel function - const result = await index.unloadModel() - - // Assert that the result is as expected - expect(result).toBeUndefined() - }) - - it('should reject with an error message if the model is not a nitro model', async () => { - // Call the unloadModel function - try { - await index.unloadModel() - } catch (error) { - // Assert that the error message is as expected - expect(error).toBe('Not a cortex model') - } - }) - - it('should reject if model unload failed with an error message', async () => { - // Mock the fetch-retry module to return a failed response - jest.mock('fetch-retry', () => ({ - default: () => () => { - return Promise.resolve({ - ok: false, - status: 500, - json: () => - Promise.resolve({ - model_unloaded: false, - }), - text: () => Promise.resolve('Failed to unload model'), - }) - }, - })) - - // Call the unloadModel function - try { - await index.unloadModel() - } catch (error) { - // Assert that the error message is as expected - expect(error).toBe('Failed to unload model') - } - }) - - it('should reject if port not available', async () => { - // Mock the tcp-port-used module to return false - jest.mock('tcp-port-used', () => ({ - default: { - waitUntilFree: () => Promise.resolve(false), - waitUntilUsed: () => Promise.resolve(false), - }, - })) - - // Call the unloadModel function - try { - await index.unloadModel() - } catch (error) { - // Assert that the error message is as expected - expect(error).toBe('Port not available') - } - }) -}) -describe('dispose', () => { - it('should dispose a model successfully on Mac', async () => { - Object.defineProperty(process, 'platform', { - value: 'darwin', - }) - - // Call the dispose function - const result = await index.dispose() - - // Assert that the result is as expected - expect(result).toBeUndefined() - }) - - it('should kill the subprocess successfully on Windows', async () => { - Object.defineProperty(process, 'platform', { - value: 'win32', - }) - - // Call the killSubprocess function - const result = await index.dispose() - - // Assert that the result is as expected - expect(result).toBeUndefined() - }) -}) - -describe('getCurrentNitroProcessInfo', () => { - it('should return the current nitro process info', async () => { - // Call the getCurrentNitroProcessInfo function - const result = await index.getCurrentNitroProcessInfo() - - // Assert that the result is as expected - expect(result).toEqual({ - isRunning: true, - }) - }) -}) - -describe('decompressRunner', () => { - it('should decompress the runner successfully', async () => { - jest.mock('decompress', () => ({ - default: () => { - return Promise.resolve() - }, - })) - // Call the decompressRunner function - const result = await index.decompressRunner('', '') - - // Assert that the result is as expected - expect(result).toBeUndefined() - }) - it('should not reject if decompression failed', async () => { - jest.mock('decompress', () => ({ - default: () => { - return Promise.reject('Failed to decompress') - }, - })) - // Call the decompressRunner function - const result = await index.decompressRunner('', '') - expect(result).toBeUndefined() - }) -}) - -describe('addAdditionalDependencies', () => { - it('should add additional dependencies successfully', async () => { - // Call the addAdditionalDependencies function - const result = await index.addAdditionalDependencies({ - name: 'name', - version: 'version', - }) - - // Assert that the result is as expected - expect(result).toBeUndefined() - }) -}) diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts deleted file mode 100644 index 98ca4572f..000000000 --- a/extensions/inference-nitro-extension/src/node/index.ts +++ /dev/null @@ -1,501 +0,0 @@ -import fs from 'fs' -import path from 'path' -import { ChildProcessWithoutNullStreams, spawn } from 'child_process' -import tcpPortUsed from 'tcp-port-used' -import fetchRT from 'fetch-retry' -import { - log, - getSystemResourceInfo, - InferenceEngine, - ModelSettingParams, - PromptTemplate, - SystemInformation, - getJanDataFolderPath, - ModelFile, -} from '@janhq/core/node' -import { executableNitroFile } from './execute' -import terminate from 'terminate' -import decompress from 'decompress' - -// Polyfill fetch with retry -const fetchRetry = fetchRT(fetch) - -/** - * The response object for model init operation. - */ -interface ModelInitOptions { - modelFolder: string - model: ModelFile -} -// The PORT to use for the Nitro subprocess -const PORT = 3928 -// The HOST address to use for the Nitro subprocess -const LOCAL_HOST = '127.0.0.1' -// The URL for the Nitro subprocess -const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}` -// The URL for the Nitro subprocess to load a model -const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel` -// The URL for the Nitro subprocess to validate a model -const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus` -// The URL for the Nitro subprocess to kill itself -const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` - -const NITRO_PORT_FREE_CHECK_INTERVAL = 100 - -// The supported model format -// TODO: Should be an array to support more models -const SUPPORTED_MODEL_FORMAT = '.gguf' - -// The subprocess instance for Nitro -let subprocess: ChildProcessWithoutNullStreams | undefined = undefined - -// The current model settings -let currentSettings: (ModelSettingParams & { model?: string }) | undefined = - undefined - -/** - * Stops a Nitro subprocess. - * @param wrapper - The model wrapper. - * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. - */ -function unloadModel(): Promise { - return killSubprocess() -} - -/** - * Initializes a Nitro subprocess to load a machine learning model. - * @param wrapper - The model wrapper. - * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. - * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package - */ -async function loadModel( - params: ModelInitOptions, - systemInfo?: SystemInformation -): Promise { - if (params.model.engine !== InferenceEngine.nitro) { - // Not a nitro model - return Promise.resolve() - } - - if (params.model.engine !== InferenceEngine.nitro) { - return Promise.reject('Not a cortex model') - } else { - const nitroResourceProbe = await getSystemResourceInfo() - // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt - if (params.model.settings.prompt_template) { - const promptTemplate = params.model.settings.prompt_template - const prompt = promptTemplateConverter(promptTemplate) - if (prompt?.error) { - return Promise.reject(prompt.error) - } - params.model.settings.system_prompt = prompt.system_prompt - params.model.settings.user_prompt = prompt.user_prompt - params.model.settings.ai_prompt = prompt.ai_prompt - } - - // modelFolder is the absolute path to the running model folder - // e.g. ~/jan/models/llama-2 - let modelFolder = params.modelFolder - - let llama_model_path = params.model.settings.llama_model_path - - // Absolute model path support - if ( - params.model?.sources.length && - params.model.sources.every((e) => fs.existsSync(e.url)) - ) { - llama_model_path = - params.model.sources.length === 1 - ? params.model.sources[0].url - : params.model.sources.find((e) => - e.url.includes(llama_model_path ?? params.model.id) - )?.url - } - - if (!llama_model_path || !path.isAbsolute(llama_model_path)) { - // Look for GGUF model file - const modelFiles: string[] = fs.readdirSync(modelFolder) - const ggufBinFile = modelFiles.find( - (file) => - // 1. Prioritize llama_model_path (predefined) - (llama_model_path && file === llama_model_path) || - // 2. Prioritize GGUF File (manual import) - file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) || - // 3. Fallback Model ID (for backward compatibility) - file === params.model.id - ) - if (ggufBinFile) llama_model_path = path.join(modelFolder, ggufBinFile) - } - - // Look for absolute source path for single model - - if (!llama_model_path) return Promise.reject('No GGUF model file found') - - currentSettings = { - cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), - // model.settings can override the default settings - ...params.model.settings, - llama_model_path, - model: params.model.id, - // This is critical and requires real CPU physical core count (or performance core) - ...(params.model.settings.mmproj && { - mmproj: path.isAbsolute(params.model.settings.mmproj) - ? params.model.settings.mmproj - : path.join(modelFolder, params.model.settings.mmproj), - }), - } - return runNitroAndLoadModel(params.model.id, systemInfo) - } -} - -/** - * 1. Spawn Nitro process - * 2. Load model into Nitro subprocess - * 3. Validate model status - * @returns - */ -async function runNitroAndLoadModel( - modelId: string, - systemInfo?: SystemInformation -) { - // Gather system information for CPU physical cores and memory - return killSubprocess() - .then(() => - tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) - ) - .then(() => spawnNitroProcess(systemInfo)) - .then(() => loadLLMModel(currentSettings)) - .then(() => validateModelStatus(modelId)) - .catch((err) => { - // TODO: Broadcast error so app could display proper error message - log(`[CORTEX]::Error: ${err}`) - return { error: err } - }) -} - -/** - * Parse prompt template into agrs settings - * @param promptTemplate Template as string - * @returns - */ -function promptTemplateConverter(promptTemplate: string): PromptTemplate { - // Split the string using the markers - const systemMarker = '{system_message}' - const promptMarker = '{prompt}' - - if ( - promptTemplate.includes(systemMarker) && - promptTemplate.includes(promptMarker) - ) { - // Find the indices of the markers - const systemIndex = promptTemplate.indexOf(systemMarker) - const promptIndex = promptTemplate.indexOf(promptMarker) - - // Extract the parts of the string - const system_prompt = promptTemplate.substring(0, systemIndex) - const user_prompt = promptTemplate.substring( - systemIndex + systemMarker.length, - promptIndex - ) - const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length - ) - - // Return the split parts - return { system_prompt, user_prompt, ai_prompt } - } else if (promptTemplate.includes(promptMarker)) { - // Extract the parts of the string for the case where only promptMarker is present - const promptIndex = promptTemplate.indexOf(promptMarker) - const user_prompt = promptTemplate.substring(0, promptIndex) - const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length - ) - - // Return the split parts - return { user_prompt, ai_prompt } - } - - // Return an error if none of the conditions are met - return { error: 'Cannot split prompt template' } -} - -/** - * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request. - * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. - */ -function loadLLMModel(settings: any): Promise { - if (!settings?.ngl) { - settings.ngl = 100 - } - log(`[CORTEX]:: Loading model with params ${JSON.stringify(settings)}`) - return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify(settings), - retries: 3, - retryDelay: 300, - }) - .then((res) => { - log( - `[CORTEX]:: Load model success with response ${JSON.stringify( - res - )}` - ) - return Promise.resolve(res) - }) - .catch((err) => { - log(`[CORTEX]::Error: Load model failed with error ${err}`) - return Promise.reject(err) - }) -} - -/** - * Validates the status of a model. - * @returns {Promise} A promise that resolves to an object. - * If the model is loaded successfully, the object is empty. - * If the model is not loaded successfully, the object contains an error message. - */ -async function validateModelStatus(modelId: string): Promise { - // Send a GET request to the validation URL. - // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries. - log(`[CORTEX]:: Validating model ${modelId}`) - return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { - method: 'POST', - body: JSON.stringify({ - model: modelId, - // TODO: force to use cortex llamacpp by default - engine: 'cortex.llamacpp', - }), - headers: { - 'Content-Type': 'application/json', - }, - retries: 5, - retryDelay: 300, - }).then(async (res: Response) => { - log( - `[CORTEX]:: Validate model state with response ${JSON.stringify( - res.status - )}` - ) - // If the response is OK, check model_loaded status. - if (res.ok) { - const body = await res.json() - // If the model is loaded, return an empty object. - // Otherwise, return an object with an error message. - if (body.model_loaded) { - log( - `[CORTEX]:: Validate model state success with response ${JSON.stringify( - body - )}` - ) - return Promise.resolve() - } - } - const errorBody = await res.text() - log( - `[CORTEX]:: Validate model state failed with response ${errorBody} and status is ${JSON.stringify( - res.statusText - )}` - ) - return Promise.reject('Validate model status failed') - }) -} - -/** - * Terminates the Nitro subprocess. - * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. - */ -async function killSubprocess(): Promise { - const controller = new AbortController() - setTimeout(() => controller.abort(), 5000) - log(`[CORTEX]:: Request to kill cortex`) - - const killRequest = () => { - return fetch(NITRO_HTTP_KILL_URL, { - method: 'DELETE', - signal: controller.signal, - }) - .catch(() => {}) // Do nothing with this attempt - .then(() => - tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) - ) - .then(() => log(`[CORTEX]:: cortex process is terminated`)) - .catch((err) => { - log( - `[CORTEX]:: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}` - ) - throw 'PORT_NOT_AVAILABLE' - }) - } - - if (subprocess?.pid && process.platform !== 'darwin') { - log(`[CORTEX]:: Killing PID ${subprocess.pid}`) - const pid = subprocess.pid - return new Promise((resolve, reject) => { - terminate(pid, function (err) { - if (err) { - log('[CORTEX]::Failed to kill PID - sending request to kill') - killRequest().then(resolve).catch(reject) - } else { - tcpPortUsed - .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) - .then(() => log(`[CORTEX]:: cortex process is terminated`)) - .then(() => resolve()) - .catch(() => { - log( - '[CORTEX]::Failed to kill PID (Port check timeout) - sending request to kill' - ) - killRequest().then(resolve).catch(reject) - }) - } - }) - }) - } else { - return killRequest() - } -} - -/** - * Spawns a Nitro subprocess. - * @returns A promise that resolves when the Nitro subprocess is started. - */ -function spawnNitroProcess(systemInfo?: SystemInformation): Promise { - log(`[CORTEX]:: Spawning cortex subprocess...`) - - return new Promise(async (resolve, reject) => { - let executableOptions = executableNitroFile( - // If ngl is not set or equal to 0, run on CPU with correct instructions - systemInfo?.gpuSetting - ? { - ...systemInfo.gpuSetting, - run_mode: - currentSettings?.ngl === undefined || currentSettings.ngl === 0 - ? 'cpu' - : systemInfo.gpuSetting.run_mode, - } - : undefined - ) - - const args: string[] = ['1', LOCAL_HOST, PORT.toString()] - // Execute the binary - log( - `[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}` - ) - log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`) - - // Add engine path to the PATH and LD_LIBRARY_PATH - process.env.PATH = (process.env.PATH || '').concat( - path.delimiter, - executableOptions.enginePath - ) - log(`[CORTEX] PATH: ${process.env.PATH}`) - process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( - path.delimiter, - executableOptions.enginePath - ) - - subprocess = spawn( - executableOptions.executablePath, - ['1', LOCAL_HOST, PORT.toString()], - { - cwd: path.join(path.parse(executableOptions.executablePath).dir), - env: { - ...process.env, - ENGINE_PATH: executableOptions.enginePath, - CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, - // Vulkan - Support 1 device at a time for now - ...(executableOptions.vkVisibleDevices?.length > 0 && { - GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], - }), - }, - } - ) - - // Handle subprocess output - subprocess.stdout.on('data', (data: any) => { - log(`[CORTEX]:: ${data}`) - }) - - subprocess.stderr.on('data', (data: any) => { - log(`[CORTEX]::Error: ${data}`) - }) - - subprocess.on('close', (code: any) => { - log(`[CORTEX]:: cortex exited with code: ${code}`) - subprocess = undefined - reject(`child process exited with code ${code}`) - }) - - tcpPortUsed - .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000) - .then(() => { - log(`[CORTEX]:: cortex is ready`) - resolve() - }) - }) -} - -/** - * Every module should have a dispose function - * This will be called when the extension is unloaded and should clean up any resources - * Also called when app is closed - */ -function dispose() { - // clean other registered resources here - killSubprocess() -} - -/** - * Nitro process info - */ -export interface NitroProcessInfo { - isRunning: boolean -} - -/** - * Retrieve current nitro process - */ -const getCurrentNitroProcessInfo = (): NitroProcessInfo => { - return { - isRunning: subprocess != null, - } -} - -const addAdditionalDependencies = (data: { name: string; version: string }) => { - log( - `[CORTEX]::Debug: Adding additional dependencies for ${data.name} ${data.version}` - ) - const additionalPath = path.delimiter.concat( - path.join(getJanDataFolderPath(), 'engines', data.name, data.version) - ) - // Set the updated PATH - process.env.PATH = (process.env.PATH || '').concat( - path.delimiter, - additionalPath - ) - process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( - path.delimiter, - additionalPath - ) -} - -const decompressRunner = async (zipPath: string, output: string) => { - console.debug(`Decompressing ${zipPath} to ${output}...`) - try { - const files = await decompress(zipPath, output) - console.debug('Decompress finished!', files) - } catch (err) { - console.error(`Decompress ${zipPath} failed: ${err}`) - } -} - -export default { - loadModel, - unloadModel, - dispose, - getCurrentNitroProcessInfo, - addAdditionalDependencies, - decompressRunner, -} diff --git a/extensions/inference-openai-extension/package.json b/extensions/inference-openai-extension/package.json index cd776257c..50fe12349 100644 --- a/extensions/inference-openai-extension/package.json +++ b/extensions/inference-openai-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-openai-extension", "productName": "OpenAI Inference Engine", - "version": "1.0.2", + "version": "1.0.3", "description": "This extension enables OpenAI chat completion API calls", "main": "dist/index.js", "module": "dist/module.js", diff --git a/extensions/inference-openai-extension/resources/models.json b/extensions/inference-openai-extension/resources/models.json index 72517d540..124e123b9 100644 --- a/extensions/inference-openai-extension/resources/models.json +++ b/extensions/inference-openai-extension/resources/models.json @@ -23,40 +23,7 @@ }, "metadata": { "author": "OpenAI", - "tags": [ - "General" - ] - }, - "engine": "openai" - }, - { - "sources": [ - { - "url": "https://openai.com" - } - ], - "id": "gpt-4-vision-preview", - "object": "model", - "name": "OpenAI GPT 4 with Vision (Preview)", - "version": "1.1", - "description": "OpenAI GPT-4 Vision model features vision understanding capabilities", - "format": "api", - "settings": { - "vision_model": true, - "textModel": false - }, - "parameters": { - "max_tokens": 4096, - "temperature": 0.7, - "top_p": 0.95, - "stream": true - }, - "metadata": { - "author": "OpenAI", - "tags": [ - "General", - "Vision" - ] + "tags": ["General"] }, "engine": "openai" }, @@ -84,9 +51,7 @@ }, "metadata": { "author": "OpenAI", - "tags": [ - "General" - ] + "tags": ["General"] }, "engine": "openai" }, @@ -114,9 +79,7 @@ }, "metadata": { "author": "OpenAI", - "tags": [ - "General" - ] + "tags": ["General"] }, "engine": "openai" }, @@ -144,9 +107,7 @@ }, "metadata": { "author": "OpenAI", - "tags": [ - "General" - ] + "tags": ["General"] }, "engine": "openai" }, @@ -174,9 +135,7 @@ }, "metadata": { "author": "OpenAI", - "tags": [ - "General" - ] + "tags": ["General"] }, "engine": "openai" } diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts index 44c243adf..64880b678 100644 --- a/extensions/inference-openai-extension/src/index.ts +++ b/extensions/inference-openai-extension/src/index.ts @@ -70,16 +70,17 @@ export default class JanInferenceOpenAIExtension extends RemoteOAIEngine { * Tranform the payload before sending it to the inference endpoint. * The new preview models such as o1-mini and o1-preview replaced max_tokens by max_completion_tokens parameter. * Others do not. - * @param payload - * @returns + * @param payload + * @returns */ transformPayload = (payload: OpenAIPayloadType): OpenAIPayloadType => { // Transform the payload for preview models if (this.previewModels.includes(payload.model)) { - const { max_tokens, ...params } = payload + const { max_tokens, temperature, top_p, stop, ...params } = payload return { ...params, max_completion_tokens: max_tokens, + stream: false // o1 only support stream = false } } // Pass through for non-preview models diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json index 3a694e5a0..bd834454a 100644 --- a/extensions/model-extension/package.json +++ b/extensions/model-extension/package.json @@ -4,7 +4,6 @@ "version": "1.0.34", "description": "Model Management Extension provides model exploration and seamless downloads", "main": "dist/index.js", - "node": "dist/node/index.cjs.js", "author": "Jan ", "license": "AGPL-3.0", "scripts": { @@ -36,15 +35,9 @@ "README.md" ], "dependencies": { - "@huggingface/gguf": "^0.0.11", - "@huggingface/jinja": "^0.3.0", "@janhq/core": "file:../../core", - "hyllama": "^0.2.2", - "python-shell": "^5.0.0" + "ky": "^1.7.2", + "p-queue": "^8.0.1" }, - "bundleDependencies": [ - "hyllama", - "@huggingface/gguf", - "@huggingface/jinja" - ] + "bundleDependencies": [] } diff --git a/extensions/model-extension/resources/default-model.json b/extensions/model-extension/resources/default-model.json deleted file mode 100644 index c02008cd6..000000000 --- a/extensions/model-extension/resources/default-model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "object": "model", - "version": "1.0", - "format": "gguf", - "sources": [ - { - "url": "N/A", - "filename": "N/A" - } - ], - "id": "N/A", - "name": "N/A", - "created": 0, - "description": "User self import model", - "settings": { - "ctx_len": 2048, - "embedding": false, - "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:", - "llama_model_path": "N/A" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 2048, - "stop": ["<|END_OF_TURN_TOKEN|>", "", "[/INST]", "<|end_of_text|>", "<|eot_id|>", "<|im_end|>", "<|end|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "User", - "tags": [], - "size": 0 - }, - "engine": "nitro" -} diff --git a/extensions/model-extension/rollup.config.ts b/extensions/model-extension/rollup.config.ts index d36d8ffac..64e62480f 100644 --- a/extensions/model-extension/rollup.config.ts +++ b/extensions/model-extension/rollup.config.ts @@ -6,7 +6,6 @@ import replace from '@rollup/plugin-replace' import commonjs from '@rollup/plugin-commonjs' const settingJson = require('./resources/settings.json') const packageJson = require('./package.json') -const defaultModelJson = require('./resources/default-model.json') export default [ { @@ -20,17 +19,20 @@ export default [ plugins: [ replace({ preventAssignment: true, - DEFAULT_MODEL: JSON.stringify(defaultModelJson), SETTINGS: JSON.stringify(settingJson), - NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), + API_URL: JSON.stringify('http://127.0.0.1:39291'), + SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), }), // Allow json resolution json(), // Compile TypeScript files - typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }), + typescript({ + useTsconfigDeclarationDir: true, + exclude: ['**/__tests__', '**/*.test.ts'], + }), // Compile TypeScript files // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) - // commonjs(), + commonjs(), // Allow node_modules resolution, so you can use 'external' to control // which external modules to include in the bundle // https://github.com/rollup/rollup-plugin-node-resolve#usage @@ -39,39 +41,6 @@ export default [ browser: true, }), - // Resolve source maps to the original source - sourceMaps(), - ], - }, - { - input: `src/node/index.ts`, - output: [ - { - file: 'dist/node/index.cjs.js', - format: 'cjs', - sourcemap: true, - inlineDynamicImports: true, - }, - ], - // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') - external: ['@janhq/core/node'], - watch: { - include: 'src/node/**', - }, - plugins: [ - // Allow json resolution - json(), - // Compile TypeScript files - typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }), - // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) - commonjs(), - // Allow node_modules resolution, so you can use 'external' to control - // which external modules to include in the bundle - // https://github.com/rollup/rollup-plugin-node-resolve#usage - resolve({ - extensions: ['.ts', '.js', '.json'], - }), - // Resolve source maps to the original source sourceMaps(), ], diff --git a/extensions/model-extension/src/@types/InvalidHostError.ts b/extensions/model-extension/src/@types/InvalidHostError.ts deleted file mode 100644 index 47262206e..000000000 --- a/extensions/model-extension/src/@types/InvalidHostError.ts +++ /dev/null @@ -1,6 +0,0 @@ -export class InvalidHostError extends Error { - constructor(message: string) { - super(message) - this.name = 'InvalidHostError' - } -} diff --git a/extensions/model-extension/src/@types/NotSupportModelError.ts b/extensions/model-extension/src/@types/NotSupportModelError.ts deleted file mode 100644 index 0a1946176..000000000 --- a/extensions/model-extension/src/@types/NotSupportModelError.ts +++ /dev/null @@ -1,6 +0,0 @@ -export class NotSupportedModelError extends Error { - constructor(message: string) { - super(message) - this.name = 'NotSupportedModelError' - } -} diff --git a/extensions/model-extension/src/@types/global.d.ts b/extensions/model-extension/src/@types/global.d.ts index 3878d4bf2..bff3811e3 100644 --- a/extensions/model-extension/src/@types/global.d.ts +++ b/extensions/model-extension/src/@types/global.d.ts @@ -1,7 +1,8 @@ export {} declare global { - declare const DEFAULT_MODEL: object declare const NODE: string + declare const API_URL: string + declare const SOCKET_URL: string interface Core { api: APIFunctions diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts new file mode 100644 index 000000000..7a65e8e3f --- /dev/null +++ b/extensions/model-extension/src/cortex.ts @@ -0,0 +1,185 @@ +import PQueue from 'p-queue' +import ky from 'ky' +import { extractModelLoadParams, Model } from '@janhq/core' +import { extractInferenceParams } from '@janhq/core' +/** + * cortex.cpp Model APIs interface + */ +interface ICortexAPI { + getModel(model: string): Promise + getModels(): Promise + pullModel(model: string, id?: string, name?: string): Promise + importModel( + path: string, + modelPath: string, + name?: string, + option?: string + ): Promise + deleteModel(model: string): Promise + updateModel(model: object): Promise + cancelModelPull(model: string): Promise +} + +type ModelList = { + data: any[] +} + +export class CortexAPI implements ICortexAPI { + queue = new PQueue({ concurrency: 1 }) + + constructor() { + this.queue.add(() => this.healthz()) + } + + /** + * Fetches a model detail from cortex.cpp + * @param model + * @returns + */ + getModel(model: string): Promise { + return this.queue.add(() => + ky + .get(`${API_URL}/v1/models/${model}`) + .json() + .then((e) => this.transformModel(e)) + ) + } + + /** + * Fetches models list from cortex.cpp + * @param model + * @returns + */ + getModels(): Promise { + return this.queue + .add(() => ky.get(`${API_URL}/models`).json()) + .then((e) => + typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : [] + ) + } + + /** + * Pulls a model from HuggingFace via cortex.cpp + * @param model + * @returns + */ + pullModel(model: string, id?: string, name?: string): Promise { + return this.queue.add(() => + ky + .post(`${API_URL}/v1/models/pull`, { json: { model, id, name } }) + .json() + .catch(async (e) => { + throw (await e.response?.json()) ?? e + }) + .then() + ) + } + + /** + * Imports a model from a local path via cortex.cpp + * @param model + * @returns + */ + importModel( + model: string, + modelPath: string, + name?: string, + option?: string + ): Promise { + return this.queue.add(() => + ky + .post(`${API_URL}/v1/models/import`, { + json: { model, modelPath, name, option }, + }) + .json() + .catch((e) => console.debug(e)) // Ignore error + .then() + ) + } + + /** + * Deletes a model from cortex.cpp + * @param model + * @returns + */ + deleteModel(model: string): Promise { + return this.queue.add(() => + ky.delete(`${API_URL}/models/${model}`).json().then() + ) + } + + /** + * Update a model in cortex.cpp + * @param model + * @returns + */ + updateModel(model: Partial): Promise { + return this.queue.add(() => + ky + .patch(`${API_URL}/v1/models/${model.id}`, { json: { ...model } }) + .json() + .then() + ) + } + + /** + * Cancel model pull in cortex.cpp + * @param model + * @returns + */ + cancelModelPull(model: string): Promise { + return this.queue.add(() => + ky + .delete(`${API_URL}/models/pull`, { json: { taskId: model } }) + .json() + .then() + ) + } + + /** + * Check model status + * @param model + */ + async getModelStatus(model: string): Promise { + return this.queue + .add(() => ky.get(`${API_URL}/models/status/${model}`)) + .then((e) => true) + .catch(() => false) + } + + /** + * Do health check on cortex.cpp + * @returns + */ + healthz(): Promise { + return ky + .get(`${API_URL}/healthz`, { + retry: { + limit: 10, + methods: ['get'], + }, + }) + .then(() => {}) + } + + /** + * TRansform model to the expected format (e.g. parameters, settings, metadata) + * @param model + * @returns + */ + private transformModel(model: any) { + model.parameters = { + ...extractInferenceParams(model), + ...model.parameters, + } + model.settings = { + ...extractModelLoadParams(model), + ...model.settings, + } + model.metadata = model.metadata ?? { + tags: [], + size: model.size ?? model.metadata?.size ?? 0, + } + return model as Model + } +} diff --git a/extensions/model-extension/src/helpers/path.test.ts b/extensions/model-extension/src/helpers/path.test.ts deleted file mode 100644 index 64ca65d8a..000000000 --- a/extensions/model-extension/src/helpers/path.test.ts +++ /dev/null @@ -1,87 +0,0 @@ -import { extractFileName } from './path'; - -describe('extractFileName Function', () => { - it('should correctly extract the file name with the provided file extension', () => { - const url = 'http://example.com/some/path/to/file.ext'; - const fileExtension = '.ext'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('file.ext'); - }); - - it('should correctly append the file extension if it does not already exist in the file name', () => { - const url = 'http://example.com/some/path/to/file'; - const fileExtension = '.txt'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('file.txt'); - }); - - it('should handle cases where the URL does not have a file extension correctly', () => { - const url = 'http://example.com/some/path/to/file'; - const fileExtension = '.jpg'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('file.jpg'); - }); - - it('should correctly handle URLs without a trailing slash', () => { - const url = 'http://example.com/some/path/tofile'; - const fileExtension = '.txt'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('tofile.txt'); - }); - - it('should correctly handle URLs with multiple file extensions', () => { - const url = 'http://example.com/some/path/tofile.tar.gz'; - const fileExtension = '.gz'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('tofile.tar.gz'); - }); - - it('should correctly handle URLs with special characters', () => { - const url = 'http://example.com/some/path/tófílë.extë'; - const fileExtension = '.extë'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('tófílë.extë'); - }); - - it('should correctly handle URLs that are just a file with no path', () => { - const url = 'http://example.com/file.txt'; - const fileExtension = '.txt'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('file.txt'); - }); - - it('should correctly handle URLs that have special query parameters', () => { - const url = 'http://example.com/some/path/tofile.ext?query=1'; - const fileExtension = '.ext'; - const fileName = extractFileName(url.split('?')[0], fileExtension); - expect(fileName).toBe('tofile.ext'); - }); - - it('should correctly handle URLs that have uppercase characters', () => { - const url = 'http://EXAMPLE.COM/PATH/TO/FILE.EXT'; - const fileExtension = '.ext'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('FILE.EXT'); - }); - - it('should correctly handle invalid URLs', () => { - const url = 'invalid-url'; - const fileExtension = '.txt'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('invalid-url.txt'); - }); - - it('should correctly handle empty URLs', () => { - const url = ''; - const fileExtension = '.txt'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('.txt'); - }); - - it('should correctly handle undefined URLs', () => { - const url = undefined; - const fileExtension = '.txt'; - const fileName = extractFileName(url as any, fileExtension); - expect(fileName).toBe('.txt'); - }); -}); diff --git a/extensions/model-extension/src/helpers/path.ts b/extensions/model-extension/src/helpers/path.ts deleted file mode 100644 index 6091005b8..000000000 --- a/extensions/model-extension/src/helpers/path.ts +++ /dev/null @@ -1,13 +0,0 @@ -/** - * try to retrieve the download file name from the source url - */ - -export function extractFileName(url: string, fileExtension: string): string { - if(!url) return fileExtension - - const extractedFileName = url.split('/').pop() - const fileName = extractedFileName.toLowerCase().endsWith(fileExtension) - ? extractedFileName - : extractedFileName + fileExtension - return fileName -} diff --git a/extensions/model-extension/src/index.test.ts b/extensions/model-extension/src/index.test.ts index 3f804b6d6..e514f8ce3 100644 --- a/extensions/model-extension/src/index.test.ts +++ b/extensions/model-extension/src/index.test.ts @@ -1,846 +1,89 @@ -/** - * @jest-environment jsdom - */ -const readDirSyncMock = jest.fn() -const existMock = jest.fn() -const readFileSyncMock = jest.fn() -const downloadMock = jest.fn() -const mkdirMock = jest.fn() -const writeFileSyncMock = jest.fn() -const copyFileMock = jest.fn() -const dirNameMock = jest.fn() -const executeMock = jest.fn() +import JanModelExtension from './index' + +let SETTINGS = [] +// @ts-ignore +global.SETTINGS = SETTINGS jest.mock('@janhq/core', () => ({ ...jest.requireActual('@janhq/core/node'), events: { emit: jest.fn(), }, - fs: { - existsSync: existMock, - readdirSync: readDirSyncMock, - readFileSync: readFileSyncMock, - writeFileSync: writeFileSyncMock, - mkdir: mkdirMock, - copyFile: copyFileMock, - fileStat: () => ({ - isDirectory: false, - }), - }, - dirName: dirNameMock, joinPath: (paths) => paths.join('/'), - ModelExtension: jest.fn(), - downloadFile: downloadMock, - executeOnMain: executeMock, + ModelExtension: jest.fn().mockImplementation(function () { + // @ts-ignore + this.registerSettings = () => { + return Promise.resolve() + } + // @ts-ignore + return this + }), })) -jest.mock('@huggingface/gguf') - -global.fetch = jest.fn(() => - Promise.resolve({ - json: () => Promise.resolve({ test: 100 }), - arrayBuffer: jest.fn(), - }) -) as jest.Mock - -import JanModelExtension from '.' -import { fs, dirName } from '@janhq/core' -import { gguf } from '@huggingface/gguf' - describe('JanModelExtension', () => { - let sut: JanModelExtension - - beforeAll(() => { - // @ts-ignore - sut = new JanModelExtension() - }) + let extension: JanModelExtension + let mockCortexAPI: any beforeEach(() => { - jest.clearAllMocks() - }) - - describe('getConfiguredModels', () => { - describe("when there's no models are pre-populated", () => { - it('should return empty array', async () => { - // Mock configured models data - const configuredModels = [] - existMock.mockReturnValue(true) - readDirSyncMock.mockReturnValue([]) - - const result = await sut.getConfiguredModels() - expect(result).toEqual([]) - }) - }) - - describe("when there's are pre-populated models - all flattened", () => { - it('returns configured models data - flatten folder - with correct file_path and model id', async () => { - // Mock configured models data - const configuredModels = [ - { - id: '1', - name: 'Model 1', - version: '1.0.0', - description: 'Model 1 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model1', - }, - format: 'onnx', - sources: [], - created: new Date(), - updated: new Date(), - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - { - id: '2', - name: 'Model 2', - version: '2.0.0', - description: 'Model 2 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model2', - }, - format: 'onnx', - sources: [], - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - ] - existMock.mockReturnValue(true) - - readDirSyncMock.mockImplementation((path) => { - if (path === 'file://models') return ['model1', 'model2'] - else return ['model.json'] - }) - - readFileSyncMock.mockImplementation((path) => { - if (path.includes('model1')) - return JSON.stringify(configuredModels[0]) - else return JSON.stringify(configuredModels[1]) - }) - - const result = await sut.getConfiguredModels() - expect(result).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - file_path: 'file://models/model1/model.json', - id: '1', - }), - expect.objectContaining({ - file_path: 'file://models/model2/model.json', - id: '2', - }), - ]) - ) - }) - }) - - describe("when there's are pre-populated models - there are nested folders", () => { - it('returns configured models data - flatten folder - with correct file_path and model id', async () => { - // Mock configured models data - const configuredModels = [ - { - id: '1', - name: 'Model 1', - version: '1.0.0', - description: 'Model 1 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model1', - }, - format: 'onnx', - sources: [], - created: new Date(), - updated: new Date(), - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - { - id: '2', - name: 'Model 2', - version: '2.0.0', - description: 'Model 2 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model2', - }, - format: 'onnx', - sources: [], - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - ] - existMock.mockReturnValue(true) - - readDirSyncMock.mockImplementation((path) => { - if (path === 'file://models') return ['model1', 'model2/model2-1'] - else return ['model.json'] - }) - - readFileSyncMock.mockImplementation((path) => { - if (path.includes('model1')) - return JSON.stringify(configuredModels[0]) - else if (path.includes('model2/model2-1')) - return JSON.stringify(configuredModels[1]) - }) - - const result = await sut.getConfiguredModels() - expect(result).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - file_path: 'file://models/model1/model.json', - id: '1', - }), - expect.objectContaining({ - file_path: 'file://models/model2/model2-1/model.json', - id: '2', - }), - ]) - ) - }) - }) - }) - - describe('getDownloadedModels', () => { - describe('no models downloaded', () => { - it('should return empty array', async () => { - // Mock downloaded models data - existMock.mockReturnValue(true) - readDirSyncMock.mockReturnValue([]) - - const result = await sut.getDownloadedModels() - expect(result).toEqual([]) - }) - }) - describe('only one model is downloaded', () => { - describe('flatten folder', () => { - it('returns downloaded models - with correct file_path and model id', async () => { - // Mock configured models data - const configuredModels = [ - { - id: '1', - name: 'Model 1', - version: '1.0.0', - description: 'Model 1 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model1', - }, - format: 'onnx', - sources: [], - created: new Date(), - updated: new Date(), - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - { - id: '2', - name: 'Model 2', - version: '2.0.0', - description: 'Model 2 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model2', - }, - format: 'onnx', - sources: [], - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - ] - existMock.mockReturnValue(true) - - readDirSyncMock.mockImplementation((path) => { - if (path === 'file://models') return ['model1', 'model2'] - else if (path === 'file://models/model1') - return ['model.json', 'test.gguf'] - else return ['model.json'] - }) - - readFileSyncMock.mockImplementation((path) => { - if (path.includes('model1')) - return JSON.stringify(configuredModels[0]) - else return JSON.stringify(configuredModels[1]) - }) - - const result = await sut.getDownloadedModels() - expect(result).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - file_path: 'file://models/model1/model.json', - id: '1', - }), - ]) - ) - }) - }) - }) - - describe('all models are downloaded', () => { - describe('nested folders', () => { - it('returns downloaded models - with correct file_path and model id', async () => { - // Mock configured models data - const configuredModels = [ - { - id: '1', - name: 'Model 1', - version: '1.0.0', - description: 'Model 1 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model1', - }, - format: 'onnx', - sources: [], - created: new Date(), - updated: new Date(), - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - { - id: '2', - name: 'Model 2', - version: '2.0.0', - description: 'Model 2 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model2', - }, - format: 'onnx', - sources: [], - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - ] - existMock.mockReturnValue(true) - - readDirSyncMock.mockImplementation((path) => { - if (path === 'file://models') return ['model1', 'model2/model2-1'] - else return ['model.json', 'test.gguf'] - }) - - readFileSyncMock.mockImplementation((path) => { - if (path.includes('model1')) - return JSON.stringify(configuredModels[0]) - else return JSON.stringify(configuredModels[1]) - }) - - const result = await sut.getDownloadedModels() - expect(result).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - file_path: 'file://models/model1/model.json', - id: '1', - }), - expect.objectContaining({ - file_path: 'file://models/model2/model2-1/model.json', - id: '2', - }), - ]) - ) - }) - }) - }) - - describe('all models are downloaded with uppercased GGUF files', () => { - it('returns downloaded models - with correct file_path and model id', async () => { - // Mock configured models data - const configuredModels = [ - { - id: '1', - name: 'Model 1', - version: '1.0.0', - description: 'Model 1 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model1', - }, - format: 'onnx', - sources: [], - created: new Date(), - updated: new Date(), - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - { - id: '2', - name: 'Model 2', - version: '2.0.0', - description: 'Model 2 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model2', - }, - format: 'onnx', - sources: [], - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - ] - existMock.mockReturnValue(true) - - readDirSyncMock.mockImplementation((path) => { - if (path === 'file://models') return ['model1', 'model2/model2-1'] - else if (path === 'file://models/model1') - return ['model.json', 'test.GGUF'] - else return ['model.json', 'test.gguf'] - }) - - readFileSyncMock.mockImplementation((path) => { - if (path.includes('model1')) - return JSON.stringify(configuredModels[0]) - else return JSON.stringify(configuredModels[1]) - }) - - const result = await sut.getDownloadedModels() - expect(result).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - file_path: 'file://models/model1/model.json', - id: '1', - }), - expect.objectContaining({ - file_path: 'file://models/model2/model2-1/model.json', - id: '2', - }), - ]) - ) - }) - }) - - describe('all models are downloaded - GGUF & Tensort RT', () => { - it('returns downloaded models - with correct file_path and model id', async () => { - // Mock configured models data - const configuredModels = [ - { - id: '1', - name: 'Model 1', - version: '1.0.0', - description: 'Model 1 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model1', - }, - format: 'onnx', - sources: [], - created: new Date(), - updated: new Date(), - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - { - id: '2', - name: 'Model 2', - version: '2.0.0', - description: 'Model 2 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model2', - }, - format: 'onnx', - sources: [], - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - ] - existMock.mockReturnValue(true) - - readDirSyncMock.mockImplementation((path) => { - if (path === 'file://models') return ['model1', 'model2/model2-1'] - else if (path === 'file://models/model1') - return ['model.json', 'test.gguf'] - else return ['model.json', 'test.engine'] - }) - - readFileSyncMock.mockImplementation((path) => { - if (path.includes('model1')) - return JSON.stringify(configuredModels[0]) - else return JSON.stringify(configuredModels[1]) - }) - - const result = await sut.getDownloadedModels() - expect(result).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - file_path: 'file://models/model1/model.json', - id: '1', - }), - expect.objectContaining({ - file_path: 'file://models/model2/model2-1/model.json', - id: '2', - }), - ]) - ) - }) - }) - }) - - describe('deleteModel', () => { - describe('model is a GGUF model', () => { - it('should delete the GGUF file', async () => { - fs.unlinkSync = jest.fn() - const dirMock = dirName as jest.Mock - dirMock.mockReturnValue('file://models/model1') - - fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({})) - - readDirSyncMock.mockImplementation((path) => { - return ['model.json', 'test.gguf'] - }) - - existMock.mockReturnValue(true) - - await sut.deleteModel({ - file_path: 'file://models/model1/model.json', - } as any) - - expect(fs.unlinkSync).toHaveBeenCalledWith( - 'file://models/model1/test.gguf' - ) - }) - - it('no gguf file presented', async () => { - fs.unlinkSync = jest.fn() - const dirMock = dirName as jest.Mock - dirMock.mockReturnValue('file://models/model1') - - fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({})) - - readDirSyncMock.mockReturnValue(['model.json']) - - existMock.mockReturnValue(true) - - await sut.deleteModel({ - file_path: 'file://models/model1/model.json', - } as any) - - expect(fs.unlinkSync).toHaveBeenCalledTimes(0) - }) - - it('delete an imported model', async () => { - fs.rm = jest.fn() - const dirMock = dirName as jest.Mock - dirMock.mockReturnValue('file://models/model1') - - readDirSyncMock.mockReturnValue(['model.json', 'test.gguf']) - - // MARK: This is a tricky logic implement? - // I will just add test for now but will align on the legacy implementation - fs.readFileSync = jest.fn().mockReturnValue( - JSON.stringify({ - metadata: { - author: 'user', - }, - }) - ) - - existMock.mockReturnValue(true) - - await sut.deleteModel({ - file_path: 'file://models/model1/model.json', - } as any) - - expect(fs.rm).toHaveBeenCalledWith('file://models/model1') - }) - - it('delete tensorrt-models', async () => { - fs.rm = jest.fn() - const dirMock = dirName as jest.Mock - dirMock.mockReturnValue('file://models/model1') - - readDirSyncMock.mockReturnValue(['model.json', 'test.engine']) - - fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({})) - - existMock.mockReturnValue(true) - - await sut.deleteModel({ - file_path: 'file://models/model1/model.json', - } as any) - - expect(fs.unlinkSync).toHaveBeenCalledWith( - 'file://models/model1/test.engine' - ) - }) - }) - }) - - describe('downloadModel', () => { - const model: any = { - id: 'model-id', - name: 'Test Model', - sources: [ - { url: 'http://example.com/model.gguf', filename: 'model.gguf' }, - ], - engine: 'test-engine', + mockCortexAPI = { + getModels: jest.fn().mockResolvedValue([]), + pullModel: jest.fn().mockResolvedValue(undefined), + importModel: jest.fn().mockResolvedValue(undefined), + deleteModel: jest.fn().mockResolvedValue(undefined), + updateModel: jest.fn().mockResolvedValue({}), + cancelModelPull: jest.fn().mockResolvedValue(undefined), } - const network = { - ignoreSSL: true, - proxy: 'http://proxy.example.com', - } + // @ts-ignore + extension = new JanModelExtension() + extension.cortexAPI = mockCortexAPI + }) - const gpuSettings: any = { - gpus: [{ name: 'nvidia-rtx-3080', arch: 'ampere' }], - } + it('should register settings on load', async () => { + // @ts-ignore + const registerSettingsSpy = jest.spyOn(extension, 'registerSettings') + await extension.onLoad() + expect(registerSettingsSpy).toHaveBeenCalledWith(SETTINGS) + }) - it('should reject with invalid gguf metadata', async () => { - existMock.mockImplementation(() => false) + it('should pull a model', async () => { + const model = 'test-model' + await extension.pullModel(model) + expect(mockCortexAPI.pullModel).toHaveBeenCalledWith(model) + }) - expect( - sut.downloadModel(model, gpuSettings, network) - ).rejects.toBeTruthy() - }) + it('should cancel model download', async () => { + const model = 'test-model' + await extension.cancelModelPull(model) + expect(mockCortexAPI.cancelModelPull).toHaveBeenCalledWith(model) + }) - it('should download corresponding ID', async () => { - existMock.mockImplementation(() => true) - dirNameMock.mockImplementation(() => 'file://models/model1') - downloadMock.mockImplementation(() => { - return Promise.resolve({}) - }) + it('should delete a model', async () => { + const model = 'test-model' + await extension.deleteModel(model) + expect(mockCortexAPI.deleteModel).toHaveBeenCalledWith(model) + }) - expect( - await sut.downloadModel( - { ...model, file_path: 'file://models/model1/model.json' }, - gpuSettings, - network - ) - ).toBeUndefined() + it('should get all models', async () => { + const models = await extension.getModels() + expect(models).toEqual([]) + expect(mockCortexAPI.getModels).toHaveBeenCalled() + }) - expect(downloadMock).toHaveBeenCalledWith( - { - localPath: 'file://models/model1/model.gguf', - modelId: 'model-id', - url: 'http://example.com/model.gguf', - }, - { ignoreSSL: true, proxy: 'http://proxy.example.com' } - ) - }) + it('should update a model', async () => { + const model = { id: 'test-model' } + const updatedModel = await extension.updateModel(model) + expect(updatedModel).toEqual({}) + expect(mockCortexAPI.updateModel).toHaveBeenCalledWith(model) + }) - it('should handle invalid model file', async () => { - executeMock.mockResolvedValue({}) - - fs.readFileSync = jest.fn(() => { - return JSON.stringify({ metadata: { author: 'user' } }) - }) - - expect( - sut.downloadModel( - { ...model, file_path: 'file://models/model1/model.json' }, - gpuSettings, - network - ) - ).resolves.not.toThrow() - - expect(downloadMock).not.toHaveBeenCalled() - }) - it('should handle model file with no sources', async () => { - executeMock.mockResolvedValue({}) - const modelWithoutSources = { ...model, sources: [] } - - expect( - sut.downloadModel( - { - ...modelWithoutSources, - file_path: 'file://models/model1/model.json', - }, - gpuSettings, - network - ) - ).resolves.toBe(undefined) - - expect(downloadMock).not.toHaveBeenCalled() - }) - - it('should handle model file with multiple sources', async () => { - const modelWithMultipleSources = { - ...model, - sources: [ - { url: 'http://example.com/model1.gguf', filename: 'model1.gguf' }, - { url: 'http://example.com/model2.gguf', filename: 'model2.gguf' }, - ], - } - - executeMock.mockResolvedValue({ - metadata: { 'tokenizer.ggml.eos_token_id': 0 }, - }) - ;(gguf as jest.Mock).mockResolvedValue({ - metadata: { 'tokenizer.ggml.eos_token_id': 0 }, - }) - // @ts-ignore - global.NODE = 'node' - // @ts-ignore - global.DEFAULT_MODEL = { - parameters: { stop: [] }, - } - downloadMock.mockImplementation(() => { - return Promise.resolve({}) - }) - - expect( - await sut.downloadModel( - { - ...modelWithMultipleSources, - file_path: 'file://models/model1/model.json', - }, - gpuSettings, - network - ) - ).toBeUndefined() - - expect(downloadMock).toHaveBeenCalledWith( - { - localPath: 'file://models/model1/model1.gguf', - modelId: 'model-id', - url: 'http://example.com/model1.gguf', - }, - { ignoreSSL: true, proxy: 'http://proxy.example.com' } - ) - - expect(downloadMock).toHaveBeenCalledWith( - { - localPath: 'file://models/model1/model2.gguf', - modelId: 'model-id', - url: 'http://example.com/model2.gguf', - }, - { ignoreSSL: true, proxy: 'http://proxy.example.com' } - ) - }) - - it('should handle model file with no file_path', async () => { - executeMock.mockResolvedValue({ - metadata: { 'tokenizer.ggml.eos_token_id': 0 }, - }) - ;(gguf as jest.Mock).mockResolvedValue({ - metadata: { 'tokenizer.ggml.eos_token_id': 0 }, - }) - // @ts-ignore - global.NODE = 'node' - // @ts-ignore - global.DEFAULT_MODEL = { - parameters: { stop: [] }, - } - const modelWithoutFilepath = { ...model, file_path: undefined } - - await sut.downloadModel(modelWithoutFilepath, gpuSettings, network) - - expect(downloadMock).toHaveBeenCalledWith( - expect.objectContaining({ - localPath: 'file://models/model-id/model.gguf', - }), - expect.anything() - ) - }) - - it('should handle model file with invalid file_path', async () => { - executeMock.mockResolvedValue({ - metadata: { 'tokenizer.ggml.eos_token_id': 0 }, - }) - ;(gguf as jest.Mock).mockResolvedValue({ - metadata: { 'tokenizer.ggml.eos_token_id': 0 }, - }) - // @ts-ignore - global.NODE = 'node' - // @ts-ignore - global.DEFAULT_MODEL = { - parameters: { stop: [] }, - } - const modelWithInvalidFilepath = { - ...model, - file_path: 'file://models/invalid-model.json', - } - - await sut.downloadModel(modelWithInvalidFilepath, gpuSettings, network) - - expect(downloadMock).toHaveBeenCalledWith( - expect.objectContaining({ - localPath: 'file://models/model1/model.gguf', - }), - expect.anything() - ) - }) - - it('should handle model with valid chat_template', async () => { - executeMock.mockResolvedValue('{prompt}') - ;(gguf as jest.Mock).mockResolvedValue({ - metadata: {}, - }) - // @ts-ignore - global.NODE = 'node' - // @ts-ignore - global.DEFAULT_MODEL = { - parameters: { stop: [] }, - settings: { - prompt_template: '<|im-start|>{prompt}<|im-end|>', - }, - } - - const result = await sut.retrieveGGUFMetadata({}) - - expect(result).toEqual({ - parameters: { - stop: [], - }, - settings: { - ctx_len: 4096, - ngl: 33, - prompt_template: '{prompt}', - }, - }) - }) - - it('should handle model without chat_template', async () => { - executeMock.mockRejectedValue({}) - ;(gguf as jest.Mock).mockResolvedValue({ - metadata: {}, - }) - // @ts-ignore - global.NODE = 'node' - // @ts-ignore - global.DEFAULT_MODEL = { - parameters: { stop: [] }, - settings: { - prompt_template: '<|im-start|>{prompt}<|im-end|>', - }, - } - - const result = await sut.retrieveGGUFMetadata({}) - - expect(result).toEqual({ - parameters: { - stop: [], - }, - settings: { - ctx_len: 4096, - ngl: 33, - prompt_template: '<|im-start|>{prompt}<|im-end|>', - }, - }) - }) + it('should import a model', async () => { + const model: any = { path: 'test-path' } + const optionType: any = 'test-option' + await extension.importModel(model, optionType) + expect(mockCortexAPI.importModel).toHaveBeenCalledWith( + model.path, + optionType + ) }) }) diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts index 7e7c12469..b3ad2a012 100644 --- a/extensions/model-extension/src/index.ts +++ b/extensions/model-extension/src/index.ts @@ -1,65 +1,51 @@ import { - fs, - downloadFile, - abortDownload, - InferenceEngine, - joinPath, ModelExtension, Model, - getJanDataFolderPath, + InferenceEngine, + joinPath, + dirName, + fs, + ModelManager, + abortDownload, + DownloadState, events, DownloadEvent, - DownloadRoute, - DownloadState, OptionType, - ImportingModel, - LocalImportModelEvent, - baseName, - GpuSetting, - DownloadRequest, - executeOnMain, - HuggingFaceRepoData, - getFileSize, - AllQuantizations, - ModelEvent, - ModelFile, - dirName, } from '@janhq/core' - -import { extractFileName } from './helpers/path' -import { GGUFMetadata, gguf } from '@huggingface/gguf' -import { NotSupportedModelError } from './@types/NotSupportModelError' -import { InvalidHostError } from './@types/InvalidHostError' +import { CortexAPI } from './cortex' +import { scanModelsFolder } from './legacy/model-json' +import { downloadModel } from './legacy/download' +import { systemInformation } from '@janhq/core' +import { deleteModelFiles } from './legacy/delete' declare const SETTINGS: Array -enum Settings { - huggingFaceAccessToken = 'hugging-face-access-token', + +/** + * Extension enum + */ +enum ExtensionEnum { + downloadedModels = 'downloadedModels', } /** * A extension for models */ export default class JanModelExtension extends ModelExtension { - private static readonly _homeDir = 'file://models' - private static readonly _modelMetadataFileName = 'model.json' - private static readonly _supportedModelFormat = '.gguf' - private static readonly _incompletedModelFileName = '.download' - private static readonly _offlineInferenceEngine = [ - InferenceEngine.nitro, - InferenceEngine.nitro_tensorrt_llm, - ] - private static readonly _tensorRtEngineFormat = '.engine' - private static readonly _supportedGpuArch = ['ampere', 'ada'] - - interrupted = false + cortexAPI: CortexAPI = new CortexAPI() /** * Called when the extension is loaded. * @override */ async onLoad() { - // Handle Desktop Events this.registerSettings(SETTINGS) + + // Try get models from cortex.cpp + this.getModels().then((models) => { + this.registerModels(models) + }) + + // Listen to app download events this.handleDesktopEvents() } @@ -72,574 +58,203 @@ export default class JanModelExtension extends ModelExtension { /** * Downloads a machine learning model. * @param model - The model to download. - * @param network - Optional object to specify proxy/whether to ignore SSL certificates. * @returns A Promise that resolves when the model is downloaded. */ - async downloadModel( - model: ModelFile, - gpuSettings?: GpuSetting, - network?: { ignoreSSL?: boolean; proxy?: string } - ): Promise { - // Create corresponding directory - const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id]) - if (!(await fs.existsSync(modelDirPath))) await fs.mkdir(modelDirPath) - const modelJsonPath = - model.file_path ?? (await joinPath([modelDirPath, 'model.json'])) - - // Download HF model - model.json not exist - if (!(await fs.existsSync(modelJsonPath))) { - // It supports only one source for HF download - const metadata = await this.fetchModelMetadata(model.sources[0].url) - const updatedModel = await this.retrieveGGUFMetadata(metadata) - if (updatedModel) { - // Update model settings - model.settings = { - ...model.settings, - ...updatedModel.settings, - } - model.parameters = { - ...model.parameters, - ...updatedModel.parameters, - } - } - await fs.writeFileSync(modelJsonPath, JSON.stringify(model, null, 2)) - events.emit(ModelEvent.OnModelsUpdate, {}) - } - if (model.engine === InferenceEngine.nitro_tensorrt_llm) { - if (!gpuSettings || gpuSettings.gpus.length === 0) { - console.error('No GPU found. Please check your GPU setting.') - return - } - const firstGpu = gpuSettings.gpus[0] - if (!firstGpu.name.toLowerCase().includes('nvidia')) { - console.error('No Nvidia GPU found. Please check your GPU setting.') - return - } - const gpuArch = firstGpu.arch - if (gpuArch === undefined) { - console.error( - 'No GPU architecture found. Please check your GPU setting.' - ) - return - } - - if (!JanModelExtension._supportedGpuArch.includes(gpuArch)) { - console.debug( - `Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.` - ) - return - } - - const os = 'windows' // TODO: remove this hard coded value - - const newSources = model.sources.map((source) => { - const newSource = { ...source } - newSource.url = newSource.url - .replace(//g, os) - .replace(//g, gpuArch) - return newSource - }) - model.sources = newSources - } - - console.debug(`Download sources: ${JSON.stringify(model.sources)}`) - - if (model.sources.length > 1) { - // path to model binaries - for (const source of model.sources) { - let path = extractFileName( - source.url, - JanModelExtension._supportedModelFormat - ) - if (source.filename) { - path = model.file_path - ? await joinPath([await dirName(model.file_path), source.filename]) - : await joinPath([modelDirPath, source.filename]) - } - - const downloadRequest: DownloadRequest = { - url: source.url, - localPath: path, - modelId: model.id, - } - downloadFile(downloadRequest, network) - } - // TODO: handle multiple binaries for web later - } else { - const fileName = extractFileName( - model.sources[0]?.url, - JanModelExtension._supportedModelFormat - ) - const path = model.file_path - ? await joinPath([await dirName(model.file_path), fileName]) - : await joinPath([modelDirPath, fileName]) - const downloadRequest: DownloadRequest = { - url: model.sources[0]?.url, - localPath: path, - modelId: model.id, - } - downloadFile(downloadRequest, network) - - if (window && window.core?.api && window.core.api.baseApiUrl) { - this.startPollingDownloadProgress(model.id) + async pullModel(model: string, id?: string, name?: string): Promise { + if (id) { + const model: Model = ModelManager.instance().get(id) + // Clip vision model - should not be handled by cortex.cpp + // TensorRT model - should not be handled by cortex.cpp + if ( + model && + (model.engine === InferenceEngine.nitro_tensorrt_llm || + model.settings.vision_model) + ) { + return downloadModel(model, (await systemInformation()).gpuSetting) } } - } - - private toHuggingFaceUrl(repoId: string): string { - try { - const url = new URL(repoId) - if (url.host !== 'huggingface.co') { - throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`) - } - - const paths = url.pathname.split('/').filter((e) => e.trim().length > 0) - if (paths.length < 2) { - throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`) - } - - return `${url.origin}/api/models/${paths[0]}/${paths[1]}` - } catch (err) { - if (err instanceof InvalidHostError) { - throw err - } - - if (repoId.startsWith('https')) { - throw new Error(`Cannot parse url: ${repoId}`) - } - - return `https://huggingface.co/api/models/${repoId}` - } - } - - async fetchHuggingFaceRepoData(repoId: string): Promise { - const sanitizedUrl = this.toHuggingFaceUrl(repoId) - console.debug('sanitizedUrl', sanitizedUrl) - - const huggingFaceAccessToken = ( - await this.getSetting(Settings.huggingFaceAccessToken, '') - ).trim() - - const headers = { - Accept: 'application/json', - } - - if (huggingFaceAccessToken.length > 0) { - headers['Authorization'] = `Bearer ${huggingFaceAccessToken}` - } - - const res = await fetch(sanitizedUrl, { - headers: headers, - }) - const response = await res.json() - if (response['error'] != null) { - throw new Error(response['error']) - } - - const data = response as HuggingFaceRepoData - - if (data.tags.indexOf('gguf') === -1) { - throw new NotSupportedModelError( - `${repoId} is not supported. Only GGUF models are supported.` - ) - } - - const promises: Promise[] = [] - - // fetching file sizes - const url = new URL(sanitizedUrl) - const paths = url.pathname.split('/').filter((e) => e.trim().length > 0) - - for (const sibling of data.siblings) { - const downloadUrl = `https://huggingface.co/${paths[2]}/${paths[3]}/resolve/main/${sibling.rfilename}` - sibling.downloadUrl = downloadUrl - promises.push(getFileSize(downloadUrl)) - } - - const result = await Promise.all(promises) - for (let i = 0; i < data.siblings.length; i++) { - data.siblings[i].fileSize = result[i] - } - - AllQuantizations.forEach((quantization) => { - data.siblings.forEach((sibling) => { - if (!sibling.quantization && sibling.rfilename.includes(quantization)) { - sibling.quantization = quantization - } - }) - }) - - data.modelUrl = `https://huggingface.co/${paths[2]}/${paths[3]}` - return data - } - - async fetchModelMetadata(url: string): Promise { - const { metadata } = await gguf(url) - return metadata - } - - /** - * Specifically for Jan server. - */ - private async startPollingDownloadProgress(modelId: string): Promise { - // wait for some seconds before polling - await new Promise((resolve) => setTimeout(resolve, 3000)) - - return new Promise((resolve) => { - const interval = setInterval(async () => { - fetch( - `${window.core.api.baseApiUrl}/v1/download/${DownloadRoute.getDownloadProgress}/${modelId}`, - { - method: 'GET', - headers: { contentType: 'application/json' }, - } - ).then(async (res) => { - const state: DownloadState = await res.json() - if (state.downloadState === 'end') { - events.emit(DownloadEvent.onFileDownloadSuccess, state) - clearInterval(interval) - resolve() - return - } - - if (state.downloadState === 'error') { - events.emit(DownloadEvent.onFileDownloadError, state) - clearInterval(interval) - resolve() - return - } - - events.emit(DownloadEvent.onFileDownloadUpdate, state) - }) - }, 1000) - }) + /** + * Sending POST to /models/pull/{id} endpoint to pull the model + */ + return this.cortexAPI.pullModel(model, id, name) } /** * Cancels the download of a specific machine learning model. * - * @param {string} modelId - The ID of the model whose download is to be cancelled. + * @param {string} model - The ID of the model whose download is to be cancelled. * @returns {Promise} A promise that resolves when the download has been cancelled. */ - async cancelModelDownload(modelId: string): Promise { - const path = await joinPath([JanModelExtension._homeDir, modelId, modelId]) - try { - await abortDownload(path) - await fs.unlinkSync(path) - } catch (e) { - console.error(e) + async cancelModelPull(model: string): Promise { + if (model) { + const modelDto: Model = ModelManager.instance().get(model) + // Clip vision model - should not be handled by cortex.cpp + // TensorRT model - should not be handled by cortex.cpp + if ( + modelDto && + (modelDto.engine === InferenceEngine.nitro_tensorrt_llm || + modelDto.settings.vision_model) + ) { + for (const source of modelDto.sources) { + const path = await joinPath(['models', modelDto.id, source.filename]) + await abortDownload(path) + } + } } + /** + * Sending DELETE to /models/pull/{id} endpoint to cancel a model pull + */ + return this.cortexAPI.cancelModelPull(model) } /** - * Deletes a machine learning model. - * @param filePath - The path to the model file to delete. + * Deletes a pulled model + * @param model - The model to delete * @returns A Promise that resolves when the model is deleted. */ - async deleteModel(model: ModelFile): Promise { - try { - const dirPath = await dirName(model.file_path) - const jsonFilePath = await joinPath([ - dirPath, - JanModelExtension._modelMetadataFileName, - ]) - const modelInfo = JSON.parse( - await this.readModelMetadata(jsonFilePath) - ) as Model - - // TODO: This is so tricky? - // Should depend on sources? - const isUserImportModel = - modelInfo.metadata?.author?.toLowerCase() === 'user' - if (isUserImportModel) { - // just delete the folder - return fs.rm(dirPath) - } - - // remove all files under dirPath except model.json - const files = await fs.readdirSync(dirPath) - const deletePromises = files.map(async (fileName: string) => { - if (fileName !== JanModelExtension._modelMetadataFileName) { - return fs.unlinkSync(await joinPath([dirPath, fileName])) - } + async deleteModel(model: string): Promise { + return this.cortexAPI + .deleteModel(model) + .catch((e) => console.debug(e)) + .finally(async () => { + // Delete legacy model files + await deleteModelFiles(model).catch((e) => console.debug(e)) }) - await Promise.allSettled(deletePromises) - } catch (err) { - console.error(err) - } } /** - * Gets all downloaded models. + * Gets all pulled models * @returns A Promise that resolves with an array of all models. */ - async getDownloadedModels(): Promise { - return await this.getModelsMetadata( - async (modelDir: string, model: Model) => { - if (!JanModelExtension._offlineInferenceEngine.includes(model.engine)) - return true + async getModels(): Promise { + /** + * In this action, if return empty array right away + * it would reset app cache and app will not function properly + * should compare and try import + */ + let currentModels: Model[] = [] - // model binaries (sources) are absolute path & exist - const existFiles = await Promise.all( - model.sources.map( - (source) => - // Supposed to be a local file url - !source.url.startsWith(`http://`) && - !source.url.startsWith(`https://`) - ) - ) - if (existFiles.every((exist) => exist)) return true + /** + * Legacy models should be supported + */ + let legacyModels = await scanModelsFolder() - const result = await fs - .readdirSync(await joinPath([JanModelExtension._homeDir, modelDir])) - .then((files: string[]) => { - // Model binary exists in the directory - // Model binary name can match model ID or be a .gguf file and not be an incompleted model file - return ( - files.includes(modelDir) || - files.filter((file) => { - if ( - file.endsWith(JanModelExtension._incompletedModelFileName) - ) { - return false - } - return ( - file - .toLowerCase() - .includes(JanModelExtension._supportedModelFormat) || - file - .toLowerCase() - .includes(JanModelExtension._tensorRtEngineFormat) - ) - // Check if the number of matched files equals the number of sources - })?.length >= model.sources.length - ) + try { + if (!localStorage.getItem(ExtensionEnum.downloadedModels)) { + // Updated from an older version than 0.5.5 + // Scan through the models folder and import them (Legacy flow) + // Return models immediately + currentModels = legacyModels + } else { + currentModels = JSON.parse( + localStorage.getItem(ExtensionEnum.downloadedModels) + ) as Model[] + } + } catch (e) { + currentModels = [] + console.error(e) + } + + /** + * Here we are filtering out the models that are not imported + * and are not using llama.cpp engine + */ + var toImportModels = currentModels.filter( + (e) => e.engine === InferenceEngine.nitro + ) + + await this.cortexAPI.getModels().then((models) => { + const existingIds = models.map((e) => e.id) + toImportModels = toImportModels.filter( + (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model + ) + }) + + console.log('To import models:', toImportModels.length) + /** + * There are models to import + * do not return models from cortex.cpp yet + * otherwise it will reset the app cache + * */ + if (toImportModels.length > 0) { + // Import models + await Promise.all( + toImportModels.map(async (model: Model & { file_path: string }) => { + return this.importModel( + model.id, + model.sources[0].url.startsWith('http') || + !(await fs.existsSync(model.sources[0].url)) + ? await joinPath([ + await dirName(model.file_path), + model.sources[0]?.filename ?? + model.settings?.llama_model_path ?? + model.sources[0]?.url.split('/').pop() ?? + model.id, + ]) // Copied models + : model.sources[0].url, // Symlink models, + model.name + ).then((e) => { + this.updateModel({ + id: model.id, + ...model.settings, + ...model.parameters, + } as Partial) }) + }) + ) - return result - } - ) - } - - private async getModelJsonPath( - folderFullPath: string - ): Promise { - // try to find model.json recursively inside each folder - if (!(await fs.existsSync(folderFullPath))) return undefined - - const files: string[] = await fs.readdirSync(folderFullPath) - if (files.length === 0) return undefined - - if (files.includes(JanModelExtension._modelMetadataFileName)) { - return joinPath([ - folderFullPath, - JanModelExtension._modelMetadataFileName, - ]) + return currentModels } - // continue recursive - for (const file of files) { - const path = await joinPath([folderFullPath, file]) - const fileStats = await fs.fileStat(path) - if (fileStats.isDirectory) { - const result = await this.getModelJsonPath(path) - if (result) return result - } - } - } - private async getModelsMetadata( - selector?: (path: string, model: Model) => Promise - ): Promise { - try { - if (!(await fs.existsSync(JanModelExtension._homeDir))) { - console.debug('Model folder not found') - return [] - } - - const files: string[] = await fs.readdirSync(JanModelExtension._homeDir) - - const allDirectories: string[] = [] - for (const file of files) { - if (file === '.DS_Store') continue - if (file === 'config') continue - allDirectories.push(file) - } - - const readJsonPromises = allDirectories.map(async (dirName) => { - // filter out directories that don't match the selector - // read model.json - const folderFullPath = await joinPath([ - JanModelExtension._homeDir, - dirName, - ]) - - const jsonPath = await this.getModelJsonPath(folderFullPath) - - if (await fs.existsSync(jsonPath)) { - // if we have the model.json file, read it - let model = await this.readModelMetadata(jsonPath) - - model = typeof model === 'object' ? model : JSON.parse(model) - - // This to ensure backward compatibility with `model.json` with `source_url` - if (model['source_url'] != null) { - model['sources'] = [ - { - filename: model.id, - url: model['source_url'], - }, - ] - } - model.file_path = jsonPath - model.file_name = JanModelExtension._modelMetadataFileName - - if (selector && !(await selector?.(dirName, model))) { - return - } - return model - } else { - // otherwise, we generate our own model file - // TODO: we might have more than one binary file here. This will be addressed with new version of Model file - // which is the PR from Hiro on branch Jan can see - return this.generateModelMetadata(dirName) - } + /** + * Models are imported successfully before + * Now return models from cortex.cpp and merge with legacy models which are not imported + */ + return await this.cortexAPI + .getModels() + .then((models) => { + return models.concat( + legacyModels.filter((e) => !models.some((x) => x.id === e.id)) + ) }) - const results = await Promise.allSettled(readJsonPromises) - const modelData = results.map((result) => { - if (result.status === 'fulfilled' && result.value) { - try { - const model = - typeof result.value === 'object' - ? result.value - : JSON.parse(result.value) - return model as ModelFile - } catch { - console.debug(`Unable to parse model metadata: ${result.value}`) - } - } - return undefined - }) - - return modelData.filter((e) => !!e) - } catch (err) { - console.error(err) - return [] - } - } - - private readModelMetadata(path: string) { - return fs.readFileSync(path, 'utf-8') + .catch(() => Promise.resolve(legacyModels)) } /** - * Handle the case where we have the model directory but we don't have the corresponding - * model.json file associated with it. - * - * This function will create a model.json file for the model. - * It works only with single binary file model. - * - * @param dirName the director which reside in ~/jan/models but does not have model.json file. + * Update a pulled model metadata + * @param model - The metadata of the model */ - private async generateModelMetadata(dirName: string): Promise { - const files: string[] = await fs.readdirSync( - await joinPath([JanModelExtension._homeDir, dirName]) - ) - - // sort files by name - files.sort() - - // find the first file which is not a directory - let binaryFileName: string | undefined = undefined - let binaryFileSize: number | undefined = undefined - - for (const file of files) { - if (file.endsWith(JanModelExtension._supportedModelFormat)) { - const path = await joinPath([JanModelExtension._homeDir, dirName, file]) - const fileStats = await fs.fileStat(path) - if (fileStats.isDirectory) continue - binaryFileSize = fileStats.size - binaryFileName = file - break - } - } - - if (!binaryFileName) { - console.warn(`Unable to find binary file for model ${dirName}`) - return - } - - const defaultModel = (await this.getDefaultModel()) as Model - const metadata = await executeOnMain( - NODE, - 'retrieveGGUFMetadata', - await joinPath([ - await getJanDataFolderPath(), - 'models', - dirName, - binaryFileName, - ]) - ).catch(() => undefined) - - const updatedModel = await this.retrieveGGUFMetadata(metadata) - - if (!defaultModel) { - console.error('Unable to find default model') - return - } - - const model: Model = { - ...defaultModel, - // Overwrite default N/A fields - id: dirName, - name: dirName, - sources: [ - { - url: binaryFileName, - filename: binaryFileName, - }, - ], - parameters: { - ...defaultModel.parameters, - ...updatedModel.parameters, - }, - settings: { - ...defaultModel.settings, - ...updatedModel.settings, - llama_model_path: binaryFileName, - }, - created: Date.now(), - description: '', - metadata: { - size: binaryFileSize, - author: 'User', - tags: [], - }, - } - - const modelFilePath = await joinPath([ - JanModelExtension._homeDir, - dirName, - JanModelExtension._modelMetadataFileName, - ]) - - await fs.writeFileSync(modelFilePath, JSON.stringify(model, null, 2)) - - return model - } - - override async getDefaultModel(): Promise { - const defaultModel = DEFAULT_MODEL as Model - return defaultModel + async updateModel(model: Partial): Promise { + return this.cortexAPI + ?.updateModel(model) + .then(() => this.cortexAPI!.getModel(model.id)) } /** - * Gets all available models. - * @returns A Promise that resolves with an array of all models. + * Import an existing model file + * @param model + * @param optionType */ - async getConfiguredModels(): Promise { - return this.getModelsMetadata() + async importModel( + model: string, + modelPath: string, + name?: string, + option?: OptionType + ): Promise { + return this.cortexAPI.importModel(model, modelPath, name, option) } + /** + * Check model status + * @param model + */ + async isModelLoaded(model: string): Promise { + return this.cortexAPI.getModelStatus(model) + } + + /** + * Handle download state from main app + */ handleDesktopEvents() { if (window && window.electronAPI) { window.electronAPI.onFileDownloadUpdate( @@ -663,248 +278,4 @@ export default class JanModelExtension extends ModelExtension { ) } } - - private async importModelSymlink( - modelBinaryPath: string, - modelFolderName: string, - modelFolderPath: string - ): Promise { - const fileStats = await fs.fileStat(modelBinaryPath, true) - const binaryFileSize = fileStats.size - - // Just need to generate model.json there - const defaultModel = (await this.getDefaultModel()) as Model - if (!defaultModel) { - console.error('Unable to find default model') - return - } - - const metadata = await executeOnMain( - NODE, - 'retrieveGGUFMetadata', - modelBinaryPath - ) - - const binaryFileName = await baseName(modelBinaryPath) - const updatedModel = await this.retrieveGGUFMetadata(metadata) - - const model: Model = { - ...defaultModel, - id: modelFolderName, - name: modelFolderName, - sources: [ - { - url: modelBinaryPath, - filename: binaryFileName, - }, - ], - parameters: { - ...defaultModel.parameters, - ...updatedModel.parameters, - }, - - settings: { - ...defaultModel.settings, - ...updatedModel.settings, - llama_model_path: binaryFileName, - }, - created: Date.now(), - description: '', - metadata: { - size: binaryFileSize, - author: 'User', - tags: [], - }, - } - - const modelFilePath = await joinPath([ - modelFolderPath, - JanModelExtension._modelMetadataFileName, - ]) - - await fs.writeFileSync(modelFilePath, JSON.stringify(model, null, 2)) - - return { - ...model, - file_path: modelFilePath, - file_name: JanModelExtension._modelMetadataFileName, - } - } - - async updateModelInfo(modelInfo: Partial): Promise { - if (modelInfo.id == null) throw new Error('Model ID is required') - - const model = JSON.parse( - await this.readModelMetadata(modelInfo.file_path) - ) as ModelFile - - const updatedModel: ModelFile = { - ...model, - ...modelInfo, - parameters: { - ...model.parameters, - ...modelInfo.parameters, - }, - settings: { - ...model.settings, - ...modelInfo.settings, - }, - metadata: { - ...model.metadata, - ...modelInfo.metadata, - }, - // Should not persist file_path & file_name - file_path: undefined, - file_name: undefined, - } - - await fs.writeFileSync( - modelInfo.file_path, - JSON.stringify(updatedModel, null, 2) - ) - return updatedModel - } - - private async importModel( - model: ImportingModel, - optionType: OptionType - ): Promise { - const binaryName = (await baseName(model.path)).replace(/\s/g, '') - - let modelFolderName = binaryName - if (binaryName.endsWith(JanModelExtension._supportedModelFormat)) { - modelFolderName = binaryName.replace( - JanModelExtension._supportedModelFormat, - '' - ) - } - - const modelFolderPath = await this.getModelFolderName(modelFolderName) - await fs.mkdir(modelFolderPath) - - const uniqueFolderName = await baseName(modelFolderPath) - const modelBinaryFile = binaryName.endsWith( - JanModelExtension._supportedModelFormat - ) - ? binaryName - : `${binaryName}${JanModelExtension._supportedModelFormat}` - - const binaryPath = await joinPath([modelFolderPath, modelBinaryFile]) - - if (optionType === 'SYMLINK') { - return this.importModelSymlink( - model.path, - uniqueFolderName, - modelFolderPath - ) - } - - const srcStat = await fs.fileStat(model.path, true) - - // interval getting the file size to calculate the percentage - const interval = setInterval(async () => { - const destStats = await fs.fileStat(binaryPath, true) - const percentage = destStats.size / srcStat.size - events.emit(LocalImportModelEvent.onLocalImportModelUpdate, { - ...model, - percentage, - }) - }, 1000) - - await fs.copyFile(model.path, binaryPath) - - clearInterval(interval) - - // generate model json - return this.generateModelMetadata(uniqueFolderName) - } - - private async getModelFolderName( - modelFolderName: string, - count?: number - ): Promise { - const newModelFolderName = count - ? `${modelFolderName}-${count}` - : modelFolderName - - const janDataFolderPath = await getJanDataFolderPath() - const modelFolderPath = await joinPath([ - janDataFolderPath, - 'models', - newModelFolderName, - ]) - - const isFolderExist = await fs.existsSync(modelFolderPath) - if (!isFolderExist) { - return modelFolderPath - } else { - const newCount = (count ?? 0) + 1 - return this.getModelFolderName(modelFolderName, newCount) - } - } - - async importModels( - models: ImportingModel[], - optionType: OptionType - ): Promise { - const importedModels: Model[] = [] - - for (const model of models) { - events.emit(LocalImportModelEvent.onLocalImportModelUpdate, model) - try { - const importedModel = await this.importModel(model, optionType) - events.emit(LocalImportModelEvent.onLocalImportModelSuccess, { - ...model, - modelId: importedModel.id, - }) - importedModels.push(importedModel) - } catch (err) { - events.emit(LocalImportModelEvent.onLocalImportModelFailed, { - ...model, - error: err, - }) - } - } - - events.emit( - LocalImportModelEvent.onLocalImportModelFinished, - importedModels - ) - } - - /** - * Retrieve Model Settings from GGUF Metadata - * @param metadata - * @returns - */ - async retrieveGGUFMetadata(metadata: any): Promise> { - const defaultModel = DEFAULT_MODEL as Model - var template = await executeOnMain( - NODE, - 'renderJinjaTemplate', - metadata - ).catch(() => undefined) - - const eos_id = metadata['tokenizer.ggml.eos_token_id'] - const architecture = metadata['general.architecture'] - - return { - settings: { - prompt_template: template ?? defaultModel.settings.prompt_template, - ctx_len: - metadata[`${architecture}.context_length`] ?? - metadata['llama.context_length'] ?? - 4096, - ngl: - (metadata[`${architecture}.block_count`] ?? - metadata['llama.block_count'] ?? - 32) + 1, - }, - parameters: { - stop: eos_id - ? [metadata?.['tokenizer.ggml.tokens'][eos_id] ?? ''] - : defaultModel.parameters.stop, - }, - } - } } diff --git a/extensions/model-extension/src/legacy/delete.ts b/extensions/model-extension/src/legacy/delete.ts new file mode 100644 index 000000000..5288e30ee --- /dev/null +++ b/extensions/model-extension/src/legacy/delete.ts @@ -0,0 +1,11 @@ +import { fs, joinPath } from '@janhq/core' + +export const deleteModelFiles = async (id: string) => { + try { + const dirPath = await joinPath(['file://models', id]) + // remove model folder directory + await fs.rm(dirPath) + } catch (err) { + console.error(err) + } +} diff --git a/extensions/model-extension/src/legacy/download.ts b/extensions/model-extension/src/legacy/download.ts new file mode 100644 index 000000000..d4d6c62d9 --- /dev/null +++ b/extensions/model-extension/src/legacy/download.ts @@ -0,0 +1,105 @@ +import { + downloadFile, + DownloadRequest, + fs, + GpuSetting, + InferenceEngine, + joinPath, + Model, +} from '@janhq/core' + +export const downloadModel = async ( + model: Model, + gpuSettings?: GpuSetting, + network?: { ignoreSSL?: boolean; proxy?: string } +): Promise => { + const homedir = 'file://models' + const supportedGpuArch = ['ampere', 'ada'] + // Create corresponding directory + const modelDirPath = await joinPath([homedir, model.id]) + if (!(await fs.existsSync(modelDirPath))) await fs.mkdir(modelDirPath) + + const jsonFilePath = await joinPath([modelDirPath, 'model.json']) + // Write model.json on download + if (!(await fs.existsSync(jsonFilePath))) + await fs.writeFileSync( + jsonFilePath, + JSON.stringify(model, null, 2) + ) + + if (model.engine === InferenceEngine.nitro_tensorrt_llm) { + if (!gpuSettings || gpuSettings.gpus.length === 0) { + console.error('No GPU found. Please check your GPU setting.') + return + } + const firstGpu = gpuSettings.gpus[0] + if (!firstGpu.name.toLowerCase().includes('nvidia')) { + console.error('No Nvidia GPU found. Please check your GPU setting.') + return + } + const gpuArch = firstGpu.arch + if (gpuArch === undefined) { + console.error('No GPU architecture found. Please check your GPU setting.') + return + } + + if (!supportedGpuArch.includes(gpuArch)) { + console.debug( + `Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.` + ) + return + } + + const os = 'windows' // TODO: remove this hard coded value + + const newSources = model.sources.map((source) => { + const newSource = { ...source } + newSource.url = newSource.url + .replace(//g, os) + .replace(//g, gpuArch) + return newSource + }) + model.sources = newSources + } + + console.debug(`Download sources: ${JSON.stringify(model.sources)}`) + + if (model.sources.length > 1) { + // path to model binaries + for (const source of model.sources) { + let path = extractFileName(source.url, '.gguf') + if (source.filename) { + path = await joinPath([modelDirPath, source.filename]) + } + + const downloadRequest: DownloadRequest = { + url: source.url, + localPath: path, + modelId: model.id, + } + downloadFile(downloadRequest, network) + } + } else { + const fileName = extractFileName(model.sources[0]?.url, '.gguf') + const path = await joinPath([modelDirPath, fileName]) + const downloadRequest: DownloadRequest = { + url: model.sources[0]?.url, + localPath: path, + modelId: model.id, + } + downloadFile(downloadRequest, network) + } +} + +/** + * try to retrieve the download file name from the source url + */ +function extractFileName(url: string, fileExtension: string): string { + if (!url) return fileExtension + + const extractedFileName = url.split('/').pop() + const fileName = extractedFileName.toLowerCase().endsWith(fileExtension) + ? extractedFileName + : extractedFileName + fileExtension + return fileName +} diff --git a/extensions/model-extension/src/legacy/model-json.test.ts b/extensions/model-extension/src/legacy/model-json.test.ts new file mode 100644 index 000000000..a4ea5bc0b --- /dev/null +++ b/extensions/model-extension/src/legacy/model-json.test.ts @@ -0,0 +1,80 @@ +import { scanModelsFolder, getModelJsonPath } from './model-json' + +// Mock the @janhq/core module +jest.mock('@janhq/core', () => ({ + fs: { + existsSync: jest.fn(), + readdirSync: jest.fn(), + fileStat: jest.fn(), + readFileSync: jest.fn(), + }, + joinPath: jest.fn((paths) => paths.join('/')), +})) + +// Import the mocked fs and joinPath after the mock is set up +const { fs } = jest.requireMock('@janhq/core') + +describe('model-json', () => { + beforeEach(() => { + jest.clearAllMocks() + }) + + describe('scanModelsFolder', () => { + it('should return an empty array when models folder does not exist', async () => { + fs.existsSync.mockReturnValue(false) + + const result = await scanModelsFolder() + expect(result).toEqual([]) + }) + + it('should return an array of models when valid model folders exist', async () => { + const mockModelJson = { + id: 'test-model', + sources: [ + { + filename: 'test-model', + url: 'file://models/test-model/test-model.gguf', + }, + ], + } + + fs.existsSync.mockReturnValue(true) + fs.readdirSync.mockReturnValueOnce(['test-model']) + fs.fileStat.mockResolvedValue({ isDirectory: () => true }) + fs.readFileSync.mockReturnValue(JSON.stringify(mockModelJson)) + fs.readdirSync.mockReturnValueOnce(['test-model.gguf', 'model.json']) + + const result = await scanModelsFolder() + expect(result).toHaveLength(1) + expect(result[0]).toMatchObject(mockModelJson) + }) + }) + + describe('getModelJsonPath', () => { + it('should return undefined when folder does not exist', async () => { + fs.existsSync.mockReturnValue(false) + + const result = await getModelJsonPath('non-existent-folder') + expect(result).toBeUndefined() + }) + + it('should return the path when model.json exists in the root folder', async () => { + fs.existsSync.mockReturnValue(true) + fs.readdirSync.mockReturnValue(['model.json']) + + const result = await getModelJsonPath('test-folder') + expect(result).toBe('test-folder/model.json') + }) + + it('should return the path when model.json exists in a subfolder', async () => { + fs.existsSync.mockReturnValue(true) + fs.readdirSync + .mockReturnValueOnce(['subfolder']) + .mockReturnValueOnce(['model.json']) + fs.fileStat.mockResolvedValue({ isDirectory: () => true }) + + const result = await getModelJsonPath('test-folder') + expect(result).toBe('test-folder/subfolder/model.json') + }) + }) +}) diff --git a/extensions/model-extension/src/legacy/model-json.ts b/extensions/model-extension/src/legacy/model-json.ts new file mode 100644 index 000000000..3cad6014b --- /dev/null +++ b/extensions/model-extension/src/legacy/model-json.ts @@ -0,0 +1,144 @@ +import { InferenceEngine, Model, fs, joinPath } from '@janhq/core' +//// LEGACY MODEL FOLDER //// +const LocalEngines = [ + InferenceEngine.cortex, + InferenceEngine.cortex_llamacpp, + InferenceEngine.cortex_tensorrtllm, + InferenceEngine.cortex_onnx, + InferenceEngine.nitro_tensorrt_llm, + InferenceEngine.nitro, +] +/** + * Scan through models folder and return downloaded models + * @returns + */ +export const scanModelsFolder = async (): Promise => { + const _homeDir = 'file://models' + try { + if (!(await fs.existsSync(_homeDir))) { + console.debug('Model folder not found') + return [] + } + + const files: string[] = await fs.readdirSync(_homeDir) + + const allDirectories: string[] = [] + + for (const modelFolder of files) { + const fullModelFolderPath = await joinPath([_homeDir, modelFolder]) + if (!(await fs.fileStat(fullModelFolderPath)).isDirectory) continue + allDirectories.push(modelFolder) + } + + const readJsonPromises = allDirectories.map(async (dirName) => { + // filter out directories that don't match the selector + // read model.json + const folderFullPath = await joinPath([_homeDir, dirName]) + + const jsonPath = await getModelJsonPath(folderFullPath) + + if (await fs.existsSync(jsonPath)) { + // if we have the model.json file, read it + let model = await fs.readFileSync(jsonPath, 'utf-8') + + model = typeof model === 'object' ? model : JSON.parse(model) + + // This to ensure backward compatibility with `model.json` with `source_url` + if (model['source_url'] != null) { + model['sources'] = [ + { + filename: model.id, + url: model['source_url'], + }, + ] + } + model.file_path = jsonPath + model.file_name = 'model.json' + + // Check model file exist + // model binaries (sources) are absolute path & exist (symlinked) + const existFiles = await Promise.all( + model.sources.map( + (source) => + // Supposed to be a local file url + !source.url.startsWith(`http://`) && + !source.url.startsWith(`https://`) + ) + ) + if ( + !LocalEngines.includes(model.engine) || + existFiles.every((exist) => exist) + ) + return model + + const result = await fs + .readdirSync(await joinPath([_homeDir, dirName])) + .then((files: string[]) => { + // Model binary exists in the directory + // Model binary name can match model ID or be a .gguf file and not be an incompleted model file + return ( + files.includes(dirName) || // Legacy model GGUF without extension + files.filter((file) => { + return ( + file.toLowerCase().endsWith('.gguf') || // GGUF + file.toLowerCase().endsWith('.engine') // Tensort-LLM + ) + })?.length >= (model.engine === InferenceEngine.nitro_tensorrt_llm ? 1 : (model.sources?.length ?? 1)) + ) + }) + + if (result) return model + else return undefined + } + }) + const results = await Promise.allSettled(readJsonPromises) + const modelData = results + .map((result) => { + if (result.status === 'fulfilled' && result.value) { + try { + const model = + typeof result.value === 'object' + ? result.value + : JSON.parse(result.value) + return model as Model + } catch { + console.debug(`Unable to parse model metadata: ${result.value}`) + } + } + return undefined + }) + .filter((e) => !!e) + + return modelData + } catch (err) { + console.error(err) + return [] + } +} + +/** + * Retrieve the model.json path from a folder + * @param folderFullPath + * @returns + */ +export const getModelJsonPath = async ( + folderFullPath: string +): Promise => { + // try to find model.json recursively inside each folder + if (!(await fs.existsSync(folderFullPath))) return undefined + const files: string[] = await fs.readdirSync(folderFullPath) + if (files.length === 0) return undefined + if (files.includes('model.json')) { + return joinPath([folderFullPath, 'model.json']) + } + // continue recursive + for (const file of files) { + const path = await joinPath([folderFullPath, file]) + const fileStats = await fs.fileStat(path) + if (fileStats.isDirectory) { + const result = await getModelJsonPath(path) + if (result) return result + } + } +} +//// END LEGACY MODEL FOLDER //// diff --git a/extensions/model-extension/src/migration.test.ts b/extensions/model-extension/src/migration.test.ts new file mode 100644 index 000000000..a3ddfa87c --- /dev/null +++ b/extensions/model-extension/src/migration.test.ts @@ -0,0 +1,167 @@ +import { Model, InferenceEngine } from '@janhq/core' +import JanModelExtension from './index' + +// Mock the @janhq/core module +jest.mock('@janhq/core', () => ({ + ModelExtension: class {}, + InferenceEngine: { + nitro: 'nitro', + }, + joinPath: jest.fn(), + dirName: jest.fn(), +})) + +// Mock the CortexAPI +jest.mock('./cortex', () => ({ + CortexAPI: jest.fn().mockImplementation(() => ({ + getModels: jest.fn(), + importModel: jest.fn(), + })), +})) + +// Mock the model-json module +jest.mock('./model-json', () => ({ + scanModelsFolder: jest.fn(), +})) + +// Import the mocked scanModelsFolder after the mock is set up +const { scanModelsFolder } = jest.requireMock('./model-json') + +describe('JanModelExtension', () => { + let extension: JanModelExtension + let mockLocalStorage: { [key: string]: string } + let mockCortexAPI: jest.Mock + + beforeEach(() => { + // @ts-ignore + extension = new JanModelExtension() + mockLocalStorage = {} + mockCortexAPI = extension.cortexAPI as any + + // Mock localStorage + Object.defineProperty(global, 'localStorage', { + value: { + getItem: jest.fn((key) => mockLocalStorage[key]), + setItem: jest.fn((key, value) => { + mockLocalStorage[key] = value + }), + }, + writable: true, + }) + }) + + describe('getModels', () => { + it('should scan models folder when localStorage is empty', async () => { + const mockModels: Model[] = [ + { + id: 'model1', + object: 'model', + version: '1', + format: 'gguf', + engine: InferenceEngine.nitro, + sources: [ + { filename: 'model1.gguf', url: 'file://models/model1.gguf' }, + ], + file_path: '/path/to/model1', + }, + { + id: 'model2', + object: 'model', + version: '1', + format: 'gguf', + engine: InferenceEngine.nitro, + sources: [ + { filename: 'model2.gguf', url: 'file://models/model2.gguf' }, + ], + file_path: '/path/to/model2', + }, + ] as any + scanModelsFolder.mockResolvedValue(mockModels) + extension.cortexAPI.importModel = jest + .fn() + .mockResolvedValueOnce(mockModels[0]) + extension.cortexAPI.getModels = jest + .fn() + .mockResolvedValue([mockModels[0]]) + extension.cortexAPI.importModel = jest + .fn() + .mockResolvedValueOnce(mockModels[1]) + extension.cortexAPI.getModels = jest + .fn() + .mockResolvedValue([mockModels[0], mockModels[1]]) + + const result = await extension.getModels() + expect(scanModelsFolder).toHaveBeenCalled() + expect(result).toEqual(mockModels) + }) + + it('should import models when there are models to import', async () => { + const mockModels: Model[] = [ + { + id: 'model1', + object: 'model', + version: '1', + format: 'gguf', + engine: InferenceEngine.nitro, + file_path: '/path/to/model1', + sources: [ + { filename: 'model1.gguf', url: 'file://models/model1.gguf' }, + ], + }, + { + id: 'model2', + object: 'model', + version: '1', + format: 'gguf', + engine: InferenceEngine.nitro, + file_path: '/path/to/model2', + sources: [ + { filename: 'model2.gguf', url: 'file://models/model2.gguf' }, + ], + }, + ] as any + mockLocalStorage['downloadedModels'] = JSON.stringify(mockModels) + + extension.cortexAPI.getModels = jest.fn().mockResolvedValue([]) + extension.importModel = jest.fn().mockResolvedValue(undefined) + + const result = await extension.getModels() + + expect(extension.importModel).toHaveBeenCalledTimes(2) + expect(result).toEqual(mockModels) + }) + + it('should return models from cortexAPI when all models are already imported', async () => { + const mockModels: Model[] = [ + { + id: 'model1', + object: 'model', + version: '1', + format: 'gguf', + engine: InferenceEngine.nitro, + sources: [ + { filename: 'model1.gguf', url: 'file://models/model1.gguf' }, + ], + }, + { + id: 'model2', + object: 'model', + version: '1', + format: 'gguf', + engine: InferenceEngine.nitro, + sources: [ + { filename: 'model2.gguf', url: 'file://models/model2.gguf' }, + ], + }, + ] as any + mockLocalStorage['downloadedModels'] = JSON.stringify(mockModels) + + extension.cortexAPI.getModels = jest.fn().mockResolvedValue(mockModels) + + const result = await extension.getModels() + + expect(extension.cortexAPI.getModels).toHaveBeenCalled() + expect(result).toEqual(mockModels) + }) + }) +}) diff --git a/extensions/model-extension/src/node/index.ts b/extensions/model-extension/src/node/index.ts deleted file mode 100644 index 2acf6ec4a..000000000 --- a/extensions/model-extension/src/node/index.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { closeSync, openSync, readSync } from 'fs' -import { Template } from '@huggingface/jinja' -/** - * This is to retrieve the metadata from a GGUF file - * It uses hyllama and jinja from @huggingface module - */ -export const retrieveGGUFMetadata = async (ggufPath: string) => { - try { - const { ggufMetadata } = await import('hyllama') - // Read first 10mb of gguf file - const fd = openSync(ggufPath, 'r') - const buffer = new Uint8Array(10_000_000) - readSync(fd, buffer, 0, 10_000_000, 0) - closeSync(fd) - - // Parse metadata and tensor info - const { metadata } = ggufMetadata(buffer.buffer) - - return metadata - } catch (e) { - console.log('[MODEL_EXT]', e) - } -} - -/** - * Convert metadata to jinja template - * @param metadata - */ -export const renderJinjaTemplate = (metadata: any): string => { - const template = new Template(metadata['tokenizer.chat_template']) - const eos_id = metadata['tokenizer.ggml.eos_token_id'] - const bos_id = metadata['tokenizer.ggml.bos_token_id'] - if (eos_id === undefined || bos_id === undefined) { - return '' - } - const eos_token = metadata['tokenizer.ggml.tokens'][eos_id] - const bos_token = metadata['tokenizer.ggml.tokens'][bos_id] - // Parse jinja template - return template.render({ - add_generation_prompt: true, - eos_token, - bos_token, - messages: [ - { - role: 'system', - content: '{system_message}', - }, - { - role: 'user', - content: '{prompt}', - }, - ], - }) -} diff --git a/extensions/model-extension/src/node/node.test.ts b/extensions/model-extension/src/node/node.test.ts deleted file mode 100644 index afd2b8470..000000000 --- a/extensions/model-extension/src/node/node.test.ts +++ /dev/null @@ -1,53 +0,0 @@ -import { renderJinjaTemplate } from './index' -import { Template } from '@huggingface/jinja' - -jest.mock('@huggingface/jinja', () => ({ - Template: jest.fn((template: string) => ({ - render: jest.fn(() => `${template}_rendered`), - })), -})) - -describe('renderJinjaTemplate', () => { - beforeEach(() => { - jest.clearAllMocks() // Clear mocks between tests - }) - - it('should render the template with correct parameters', () => { - const metadata = { - 'tokenizer.chat_template': 'Hello, {{ messages }}!', - 'tokenizer.ggml.eos_token_id': 0, - 'tokenizer.ggml.bos_token_id': 1, - 'tokenizer.ggml.tokens': ['EOS', 'BOS'], - } - - const renderedTemplate = renderJinjaTemplate(metadata) - - expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!') - - expect(renderedTemplate).toBe('Hello, {{ messages }}!_rendered') - }) - - it('should handle missing token IDs gracefully', () => { - const metadata = { - 'tokenizer.chat_template': 'Hello, {{ messages }}!', - 'tokenizer.ggml.eos_token_id': 0, - 'tokenizer.ggml.tokens': ['EOS'], - } - - const renderedTemplate = renderJinjaTemplate(metadata) - - expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!') - - expect(renderedTemplate).toBe('') - }) - - it('should handle empty template gracefully', () => { - const metadata = {} - - const renderedTemplate = renderJinjaTemplate(metadata) - - expect(Template).toHaveBeenCalledWith(undefined) - - expect(renderedTemplate).toBe("") - }) -}) diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts index 11c86a9a7..d9c89242f 100644 --- a/extensions/tensorrt-llm-extension/src/index.ts +++ b/extensions/tensorrt-llm-extension/src/index.ts @@ -7,9 +7,7 @@ import { DownloadEvent, DownloadRequest, DownloadState, - GpuSetting, InstallationState, - Model, baseName, downloadFile, events, @@ -23,7 +21,7 @@ import { ModelEvent, getJanDataFolderPath, SystemInformation, - ModelFile, + Model, } from '@janhq/core' /** @@ -137,7 +135,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { events.emit(ModelEvent.OnModelsUpdate, {}) } - override async loadModel(model: ModelFile): Promise { + override async loadModel(model: Model): Promise { if ((await this.installationState()) === 'Installed') return super.loadModel(model) @@ -177,7 +175,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { override async inference(data: MessageRequest) { if (!this.loadedModel) return // TensorRT LLM Extension supports streaming only - if (data.model) data.model.parameters.stream = true + if (data.model && data.model.parameters) data.model.parameters.stream = true super.inference(data) } diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts index 77003389f..d02427170 100644 --- a/extensions/tensorrt-llm-extension/src/node/index.ts +++ b/extensions/tensorrt-llm-extension/src/node/index.ts @@ -41,7 +41,7 @@ async function loadModel( // e.g. ~/jan/models/llama-2 let modelFolder = params.modelFolder - if (params.model.settings.prompt_template) { + if (params.model.settings?.prompt_template) { const promptTemplate = params.model.settings.prompt_template const prompt = promptTemplateConverter(promptTemplate) if (prompt?.error) { diff --git a/extensions/tensorrt-llm-extension/tsconfig.json b/extensions/tensorrt-llm-extension/tsconfig.json index be07e716c..94465ebb6 100644 --- a/extensions/tensorrt-llm-extension/tsconfig.json +++ b/extensions/tensorrt-llm-extension/tsconfig.json @@ -1,7 +1,7 @@ { "compilerOptions": { "moduleResolution": "node", - "target": "es5", + "target": "ES2015", "module": "ES2020", "lib": ["es2015", "es2016", "es2017", "dom"], "strict": true, diff --git a/joi/rollup.config.mjs b/joi/rollup.config.mjs index 333a61c5c..8f20635a5 100644 --- a/joi/rollup.config.mjs +++ b/joi/rollup.config.mjs @@ -38,7 +38,11 @@ export default [ postcss({ plugins: [autoprefixer(), tailwindcss(tailwindConfig)], sourceMap: true, - use: ['sass'], + use: { + sass: { + silenceDeprecations: ['legacy-js-api'], + }, + }, minimize: true, extract: 'main.css', }), diff --git a/joi/src/core/Modal/styles.scss b/joi/src/core/Modal/styles.scss index 11af9418a..717ce2ac7 100644 --- a/joi/src/core/Modal/styles.scss +++ b/joi/src/core/Modal/styles.scss @@ -3,7 +3,6 @@ button, fieldset, .modal { &__overlay { - @apply backdrop-blur-lg; background-color: hsla(var(--modal-overlay)); z-index: 200; position: fixed; diff --git a/themes/dark-dimmed/theme.json b/themes/dark-dimmed/theme.json index c96085092..062469072 100644 --- a/themes/dark-dimmed/theme.json +++ b/themes/dark-dimmed/theme.json @@ -114,7 +114,7 @@ }, "modal": { - "overlay": "0, 0%, 0%, 0.5", + "overlay": "0, 0%, 0%, 0.7", "bg": "215, 25%, 9%, 1", "fg": "0, 0%, 100%, 11" }, diff --git a/themes/joi-dark/theme.json b/themes/joi-dark/theme.json index d389d853c..c299e3b1e 100644 --- a/themes/joi-dark/theme.json +++ b/themes/joi-dark/theme.json @@ -52,7 +52,7 @@ }, "left-panel": { - "bg": "0, 0%, 13%, 0", + "bg": "0, 0%, 13%, 1", "menu": "0, 0%, 95%, 1", "menu-hover": "0, 0%, 28%, 0.2", "menu-active": "0, 0%, 100%, 1", @@ -64,7 +64,7 @@ }, "right-panel": { - "bg": "0, 0%, 13%, 0" + "bg": "0, 0%, 13%, 1" }, "tooltip": { diff --git a/themes/joi-light/theme.json b/themes/joi-light/theme.json index 36b7a0a50..7468ff976 100644 --- a/themes/joi-light/theme.json +++ b/themes/joi-light/theme.json @@ -114,7 +114,7 @@ }, "modal": { - "overlay": "0, 0%, 0%, 0.5", + "overlay": "0, 0%, 0%, 0.7", "bg": "0, 0%, 100%, 1", "fg": "0, 0%, 0%, 1" }, diff --git a/themes/night-blue/theme.json b/themes/night-blue/theme.json index c09442b43..96d2e242c 100644 --- a/themes/night-blue/theme.json +++ b/themes/night-blue/theme.json @@ -114,7 +114,7 @@ }, "modal": { - "overlay": "0, 0%, 0%, 0.5", + "overlay": "0, 0%, 0%, 0.7", "bg": "222, 96%, 16%, 1", "fg": "0, 0%, 100%, 11" }, diff --git a/web/containers/ErrorMessage/index.test.tsx b/web/containers/ErrorMessage/index.test.tsx index 99dad5415..d2ae5aa81 100644 --- a/web/containers/ErrorMessage/index.test.tsx +++ b/web/containers/ErrorMessage/index.test.tsx @@ -1,54 +1,43 @@ // ErrorMessage.test.tsx -import React from 'react'; -import { render, screen, fireEvent } from '@testing-library/react'; -import '@testing-library/jest-dom'; -import ErrorMessage from './index'; -import { ThreadMessage, MessageStatus, ErrorCode } from '@janhq/core'; -import { useAtomValue, useSetAtom } from 'jotai'; -import useSendChatMessage from '@/hooks/useSendChatMessage'; +import React from 'react' +import { render, screen, fireEvent } from '@testing-library/react' +import '@testing-library/jest-dom' +import ErrorMessage from './index' +import { ThreadMessage, MessageStatus, ErrorCode } from '@janhq/core' +import { useAtomValue, useSetAtom } from 'jotai' +import useSendChatMessage from '@/hooks/useSendChatMessage' // Mock the dependencies jest.mock('jotai', () => { - const originalModule = jest.requireActual('jotai') - return { - ...originalModule, - useAtomValue: jest.fn(), - useSetAtom: jest.fn(), - } - }) + const originalModule = jest.requireActual('jotai') + return { + ...originalModule, + useAtomValue: jest.fn(), + useSetAtom: jest.fn(), + } +}) jest.mock('@/hooks/useSendChatMessage', () => ({ __esModule: true, default: jest.fn(), -})); +})) describe('ErrorMessage Component', () => { - const mockSetMainState = jest.fn(); - const mockSetSelectedSettingScreen = jest.fn(); - const mockSetModalTroubleShooting = jest.fn(); - const mockResendChatMessage = jest.fn(); + const mockSetMainState = jest.fn() + const mockSetSelectedSettingScreen = jest.fn() + const mockSetModalTroubleShooting = jest.fn() + const mockResendChatMessage = jest.fn() beforeEach(() => { - jest.clearAllMocks(); - (useAtomValue as jest.Mock).mockReturnValue([]); - (useSetAtom as jest.Mock).mockReturnValue(mockSetMainState); - (useSetAtom as jest.Mock).mockReturnValue(mockSetSelectedSettingScreen); - (useSetAtom as jest.Mock).mockReturnValue(mockSetModalTroubleShooting); - (useSendChatMessage as jest.Mock).mockReturnValue({ resendChatMessage: mockResendChatMessage }); - }); - - it('renders stopped message correctly', () => { - const message: ThreadMessage = { - id: '1', - status: MessageStatus.Stopped, - content: [{ text: { value: 'Test message' } }], - } as ThreadMessage; - - render(); - - expect(screen.getByText("Oops! The generation was interrupted. Let's give it another go!")).toBeInTheDocument(); - expect(screen.getByText('Regenerate')).toBeInTheDocument(); - }); + jest.clearAllMocks() + ;(useAtomValue as jest.Mock).mockReturnValue([]) + ;(useSetAtom as jest.Mock).mockReturnValue(mockSetMainState) + ;(useSetAtom as jest.Mock).mockReturnValue(mockSetSelectedSettingScreen) + ;(useSetAtom as jest.Mock).mockReturnValue(mockSetModalTroubleShooting) + ;(useSendChatMessage as jest.Mock).mockReturnValue({ + resendChatMessage: mockResendChatMessage, + }) + }) it('renders error message with InvalidApiKey correctly', () => { const message: ThreadMessage = { @@ -56,13 +45,13 @@ describe('ErrorMessage Component', () => { status: MessageStatus.Error, error_code: ErrorCode.InvalidApiKey, content: [{ text: { value: 'Invalid API Key' } }], - } as ThreadMessage; + } as ThreadMessage - render(); - - expect(screen.getByTestId('invalid-API-key-error')).toBeInTheDocument(); - expect(screen.getByText('Settings')).toBeInTheDocument(); - }); + render() + + expect(screen.getByTestId('invalid-API-key-error')).toBeInTheDocument() + expect(screen.getByText('Settings')).toBeInTheDocument() + }) it('renders general error message correctly', () => { const message: ThreadMessage = { @@ -70,26 +59,15 @@ describe('ErrorMessage Component', () => { status: MessageStatus.Error, error_code: ErrorCode.Unknown, content: [{ text: { value: 'Unknown error occurred' } }], - } as ThreadMessage; + } as ThreadMessage - render(); - - expect(screen.getByText("Apologies, something’s amiss!")).toBeInTheDocument(); - expect(screen.getByText('troubleshooting assistance')).toBeInTheDocument(); - }); + render() - it('calls regenerateMessage when Regenerate button is clicked', () => { - const message: ThreadMessage = { - id: '1', - status: MessageStatus.Stopped, - content: [{ text: { value: 'Test message' } }], - } as ThreadMessage; - - render(); - - fireEvent.click(screen.getByText('Regenerate')); - expect(mockResendChatMessage).toHaveBeenCalled(); - }); + expect( + screen.getByText('Apologies, something’s amiss!') + ).toBeInTheDocument() + expect(screen.getByText('troubleshooting assistance')).toBeInTheDocument() + }) it('opens troubleshooting modal when link is clicked', () => { const message: ThreadMessage = { @@ -97,11 +75,11 @@ describe('ErrorMessage Component', () => { status: MessageStatus.Error, error_code: ErrorCode.Unknown, content: [{ text: { value: 'Unknown error occurred' } }], - } as ThreadMessage; + } as ThreadMessage - render(); - - fireEvent.click(screen.getByText('troubleshooting assistance')); - expect(mockSetModalTroubleShooting).toHaveBeenCalledWith(true); - }); -}); + render() + + fireEvent.click(screen.getByText('troubleshooting assistance')) + expect(mockSetModalTroubleShooting).toHaveBeenCalledWith(true) + }) +}) diff --git a/web/containers/ErrorMessage/index.tsx b/web/containers/ErrorMessage/index.tsx index bcd056b93..18558c1d8 100644 --- a/web/containers/ErrorMessage/index.tsx +++ b/web/containers/ErrorMessage/index.tsx @@ -4,9 +4,8 @@ import { MessageStatus, ThreadMessage, } from '@janhq/core' -import { Button } from '@janhq/joi' + import { useAtomValue, useSetAtom } from 'jotai' -import { RefreshCcw } from 'lucide-react' import AutoLink from '@/containers/AutoLink' import ModalTroubleShooting, { @@ -15,27 +14,17 @@ import ModalTroubleShooting, { import { MainViewState } from '@/constants/screens' -import useSendChatMessage from '@/hooks/useSendChatMessage' - import { mainViewStateAtom } from '@/helpers/atoms/App.atom' -import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom' + import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom' import { activeThreadAtom } from '@/helpers/atoms/Thread.atom' const ErrorMessage = ({ message }: { message: ThreadMessage }) => { - const messages = useAtomValue(getCurrentChatMessagesAtom) - const { resendChatMessage } = useSendChatMessage() const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom) const setMainState = useSetAtom(mainViewStateAtom) const setSelectedSettingScreen = useSetAtom(selectedSettingAtom) const activeThread = useAtomValue(activeThreadAtom) - const regenerateMessage = async () => { - const lastMessageIndex = messages.length - 1 - const message = messages[lastMessageIndex] - resendChatMessage(message) - } - const getErrorTitle = () => { switch (message.error_code) { case ErrorCode.Unknown: @@ -77,23 +66,6 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => { return (
- {message.status === MessageStatus.Stopped && ( -
- - Oops! The generation was interrupted. Let's give it another go! - - -
- )} {message.status === MessageStatus.Error && (
{ > {getErrorTitle()}

- Jan’s in beta. Access  + {`Something's wrong.`} Access  setModalTroubleShooting(true)} diff --git a/web/containers/Layout/BottomPanel/DownloadingState/index.tsx b/web/containers/Layout/BottomPanel/DownloadingState/index.tsx index ddc2eab91..dc9ffca89 100644 --- a/web/containers/Layout/BottomPanel/DownloadingState/index.tsx +++ b/web/containers/Layout/BottomPanel/DownloadingState/index.tsx @@ -2,18 +2,19 @@ import { Fragment } from 'react' import { Progress, Modal, Button } from '@janhq/joi' -import { useAtomValue } from 'jotai' +import { useAtomValue, useSetAtom } from 'jotai' import useDownloadModel from '@/hooks/useDownloadModel' -import { modelDownloadStateAtom } from '@/hooks/useDownloadState' +import { + modelDownloadStateAtom, + removeDownloadStateAtom, +} from '@/hooks/useDownloadState' import { formatDownloadPercentage } from '@/utils/converter' -import { getDownloadingModelAtom } from '@/helpers/atoms/Model.atom' - export default function DownloadingState() { const downloadStates = useAtomValue(modelDownloadStateAtom) - const downloadingModels = useAtomValue(getDownloadingModelAtom) + const removeDownloadState = useSetAtom(removeDownloadStateAtom) const { abortModelDownload } = useDownloadModel() const totalCurrentProgress = Object.values(downloadStates) @@ -76,10 +77,8 @@ export default function DownloadingState() { theme="destructive" onClick={() => { if (item?.modelId) { - const model = downloadingModels.find( - (model) => model.id === item.modelId - ) - if (model) abortModelDownload(model) + removeDownloadState(item?.modelId) + abortModelDownload(item?.modelId) } }} > diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx index 5e8549c7f..8ad16eeba 100644 --- a/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx +++ b/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx @@ -10,8 +10,6 @@ import { isLocalEngine } from '@/utils/modelEngine' import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom' -const Column = ['Model', 'Size', ''] - const TableActiveModel = () => { const { activeModel, stateModel, stopModel } = useActiveModel() @@ -21,37 +19,23 @@ const TableActiveModel = () => {

- - - {Column.map((col, i) => { - return ( - - ) - })} - - {activeModel && isLocalEngine(activeModel.engine) ? ( -
- {col} -

{activeModel.name}

- {activeModel.metadata.size - ? toGibibytes(activeModel.metadata.size) + {activeModel.metadata?.size + ? toGibibytes(activeModel.metadata?.size) : '-'} + { const [showSystemMonitorPanel, setShowSystemMonitorPanel] = useAtom( showSystemMonitorPanelAtom ) - const [control, setControl] = useState(null) - const [elementExpand, setElementExpand] = useState( - null - ) + const reduceTransparent = useAtomValue(reduceTransparentAtom) const { watch, stopWatching } = useGetSystemResources() - useClickOutside( - () => { - toggleShowSystemMonitorPanel(false) - setShowFullScreen(false) - }, - null, - [control, elementExpand] - ) - const toggleShowSystemMonitorPanel = useCallback( (isShow: boolean) => { setShowSystemMonitorPanel(isShow) @@ -76,7 +63,6 @@ const SystemMonitor = () => { return (
{
{showSystemMonitorPanel && (
{ {ramUtilitized}%
- {gpus.length > 0 && (
{gpus.map((gpu, index) => { diff --git a/web/containers/Layout/RibbonPanel/index.tsx b/web/containers/Layout/RibbonPanel/index.tsx index 2eb1bad70..13116dc16 100644 --- a/web/containers/Layout/RibbonPanel/index.tsx +++ b/web/containers/Layout/RibbonPanel/index.tsx @@ -1,5 +1,4 @@ import { Tooltip, useMediaQuery } from '@janhq/joi' -import { motion as m } from 'framer-motion' import { useAtom, useAtomValue, useSetAtom } from 'jotai' import { MessageCircleIcon, @@ -95,10 +94,11 @@ export default function RibbonPanel() { return (
onMenuClick(menu.state)} > onMenuClick(menu.state)} > {menu.icon}
- {isActive && ( - - )}
} content={ diff --git a/web/containers/Loader/ModelReload.tsx b/web/containers/Loader/ModelReload.tsx index fbe673788..29709c0da 100644 --- a/web/containers/Loader/ModelReload.tsx +++ b/web/containers/Loader/ModelReload.tsx @@ -44,6 +44,11 @@ export default function ModelReload() { Reloading model {stateModel.model?.id} +
+ + Model is reloading to apply new changes. + +
) } diff --git a/web/containers/ModalCancelDownload/index.tsx b/web/containers/ModalCancelDownload/index.tsx index e8d3842a8..1826c78a7 100644 --- a/web/containers/ModalCancelDownload/index.tsx +++ b/web/containers/ModalCancelDownload/index.tsx @@ -4,16 +4,17 @@ import { Model } from '@janhq/core' import { Modal, Button, Progress, ModalClose } from '@janhq/joi' -import { useAtomValue } from 'jotai' +import { useAtomValue, useSetAtom } from 'jotai' import useDownloadModel from '@/hooks/useDownloadModel' -import { modelDownloadStateAtom } from '@/hooks/useDownloadState' +import { + modelDownloadStateAtom, + removeDownloadStateAtom, +} from '@/hooks/useDownloadState' import { formatDownloadPercentage } from '@/utils/converter' -import { getDownloadingModelAtom } from '@/helpers/atoms/Model.atom' - type Props = { model: Model isFromList?: boolean @@ -21,20 +22,16 @@ type Props = { const ModalCancelDownload = ({ model, isFromList }: Props) => { const { abortModelDownload } = useDownloadModel() - const downloadingModels = useAtomValue(getDownloadingModelAtom) + const removeDownloadState = useSetAtom(removeDownloadStateAtom) const allDownloadStates = useAtomValue(modelDownloadStateAtom) const downloadState = allDownloadStates[model.id] - const cancelText = `Cancel ${formatDownloadPercentage(downloadState.percent)}` + const cancelText = `Cancel ${formatDownloadPercentage(downloadState?.percent ?? 0)}` const onAbortDownloadClick = useCallback(() => { - if (downloadState?.modelId) { - const model = downloadingModels.find( - (model) => model.id === downloadState.modelId - ) - if (model) abortModelDownload(model) - } - }, [downloadState, downloadingModels, abortModelDownload]) + removeDownloadState(model.id) + abortModelDownload(downloadState?.modelId ?? model.id) + }, [downloadState, abortModelDownload, removeDownloadState, model]) return ( { - {formatDownloadPercentage(downloadState.percent)} + {formatDownloadPercentage(downloadState?.percent ?? 0)} diff --git a/web/containers/ModalTroubleShoot/CortexLogs.tsx b/web/containers/ModalTroubleShoot/CortexLogs.tsx new file mode 100644 index 000000000..3323a1694 --- /dev/null +++ b/web/containers/ModalTroubleShoot/CortexLogs.tsx @@ -0,0 +1,226 @@ +import React, { useEffect, useState, memo } from 'react' + +import { Button } from '@janhq/joi' + +import { CopyIcon, CheckIcon, FolderIcon } from 'lucide-react' + +import { twMerge } from 'tailwind-merge' + +import { useClipboard } from '@/hooks/useClipboard' +import { useLogs } from '@/hooks/useLogs' +import { usePath } from '@/hooks/usePath' + +const CortexLogs = () => { + const { getLogs } = useLogs() + const [logs, setLogs] = useState([]) + const { onRevealInFinder } = usePath() + + useEffect(() => { + getLogs('cortex').then((log) => { + if (typeof log?.split === 'function') { + if (log.length > 0) { + setLogs(log.split(/\r?\n|\r|\n/g)) + } + } + }) + + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []) + + const clipboard = useClipboard({ timeout: 1000 }) + + return ( +
+
+
+ + +
+
+
+ {logs.length > 0 ? ( + + {logs.slice(-100).map((log, i) => { + return ( +

+ {log} +

+ ) + })} +
+ ) : ( +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Empty logs

+
+ )} +
+
+ ) +} + +export default memo(CortexLogs) diff --git a/web/containers/ModalTroubleShoot/index.tsx b/web/containers/ModalTroubleShoot/index.tsx index 67ccbe22f..77ee51034 100644 --- a/web/containers/ModalTroubleShoot/index.tsx +++ b/web/containers/ModalTroubleShoot/index.tsx @@ -8,10 +8,11 @@ import { twMerge } from 'tailwind-merge' import ServerLogs from '@/containers/ServerLogs' import AppLogs from './AppLogs' +import CortexLogs from './CortexLogs' import DeviceSpecs from './DeviceSpecs' export const modalTroubleShootingAtom = atom(false) -const logOption = ['App Logs', 'Server Logs', 'Device Specs'] +const logOption = ['App Logs', 'Cortex Logs', 'Server Logs', 'Device Specs'] const ModalTroubleShooting = () => { const [modalTroubleShooting, setModalTroubleShooting] = useAtom( @@ -144,10 +145,15 @@ const ModalTroubleShooting = () => {
- +
+ +
+
diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx index 59f19586a..66a20a854 100644 --- a/web/containers/ModelDropdown/index.tsx +++ b/web/containers/ModelDropdown/index.tsx @@ -88,7 +88,7 @@ const ModelDropdown = ({ const searchInputRef = useRef(null) const configuredModels = useAtomValue(configuredModelsAtom) const featuredModel = configuredModels.filter((x) => - x.metadata.tags.includes('Featured') + x.metadata?.tags?.includes('Featured') ) const { updateThreadMetadata } = useCreateNewThread() @@ -108,6 +108,11 @@ const ModelDropdown = ({ const filteredDownloadedModels = useMemo( () => configuredModels + .concat( + downloadedModels.filter( + (e) => !configuredModels.some((x) => x.id === e.id) + ) + ) .filter((e) => e.name.toLowerCase().includes(searchText.toLowerCase().trim()) ) @@ -200,7 +205,6 @@ const ModelDropdown = ({ if (model) updateModelParameter(activeThread, { params: modelParams, - modelPath: model.file_path, modelId: model.id, engine: model.engine, }) @@ -262,8 +266,13 @@ const ModelDropdown = ({ }, []) const findByEngine = filteredDownloadedModels - .filter((x) => !inActiveEngineProvider.includes(x.engine)) - .map((x) => x.engine) + .map((x) => { + // Legacy engine support - they will be grouped under Cortex LlamaCPP + if (x.engine === InferenceEngine.nitro) + return InferenceEngine.cortex_llamacpp + return x.engine + }) + .filter((x) => !inActiveEngineProvider.includes(x)) const groupByEngine = findByEngine .filter(function (item, index) { @@ -444,7 +453,7 @@ const ModelDropdown = ({
    {featuredModel.map((model) => { const isDownloading = downloadingModels.some( - (md) => md.id === model.id + (md) => md === model.id ) return (
  • - {toGibibytes(model.metadata.size)} + {toGibibytes(model.metadata?.size)} {!isDownloading ? ( downloadModel(model)} + onClick={() => + downloadModel( + model.sources[0].url, + model.id + ) + } /> ) : ( Object.values(downloadStates) @@ -500,7 +514,12 @@ const ModelDropdown = ({
      {filteredDownloadedModels - .filter((x) => x.engine === engine) + .filter( + (x) => + x.engine === engine || + (x.engine === InferenceEngine.nitro && + engine === InferenceEngine.cortex_llamacpp) + ) .filter((y) => { if (isLocalEngine(y.engine) && !searchText.length) { return downloadedModels.find((c) => c.id === y.id) @@ -511,7 +530,7 @@ const ModelDropdown = ({ .map((model) => { if (!showModel) return null const isDownloading = downloadingModels.some( - (md) => md.id === model.id + (md) => md === model.id ) const isDownloaded = downloadedModels.some( (c) => c.id === model.id @@ -549,14 +568,19 @@ const ModelDropdown = ({
      {!isDownloaded && ( - {toGibibytes(model.metadata.size)} + {toGibibytes(model.metadata?.size)} )} {!isDownloading && !isDownloaded ? ( downloadModel(model)} + onClick={() => + downloadModel( + model.sources[0].url, + model.id + ) + } /> ) : ( Object.values(downloadStates) diff --git a/web/containers/ModelLabel/index.tsx b/web/containers/ModelLabel/index.tsx index b0a3da96f..a6237ada6 100644 --- a/web/containers/ModelLabel/index.tsx +++ b/web/containers/ModelLabel/index.tsx @@ -42,7 +42,7 @@ const ModelLabel = ({ metadata, compact }: Props) => { const availableRam = settings?.run_mode === 'gpu' ? availableVram * 1000000 // MB to bytes - : totalRam - usedRam + (activeModel?.metadata.size ?? 0) + : totalRam - usedRam + (activeModel?.metadata?.size ?? 0) if (minimumRamModel > totalRam) { return ( { return null } - return metadata.tags.includes('Coming Soon') ? ( + return metadata?.tags?.includes('Coming Soon') ? ( ) : ( - getLabel(metadata.size ?? 0) + getLabel(metadata?.size ?? 0) ) } diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx index 5cc92219c..6cad910f7 100644 --- a/web/containers/Providers/EventHandler.tsx +++ b/web/containers/Providers/EventHandler.tsx @@ -15,6 +15,8 @@ import { Thread, EngineManager, InferenceEngine, + extractInferenceParams, + ModelExtension, } from '@janhq/core' import { useAtomValue, useSetAtom } from 'jotai' import { ulid } from 'ulidx' @@ -22,7 +24,6 @@ import { ulid } from 'ulidx' import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel' import { isLocalEngine } from '@/utils/modelEngine' -import { extractInferenceParams } from '@/utils/modelParam' import { extensionManager } from '@/extension' import { @@ -179,6 +180,17 @@ export default function EventHandler({ children }: { children: ReactNode }) { setIsGeneratingResponse(false) } return + } else if (message.status === MessageStatus.Error) { + ;(async () => { + if ( + !(await extensionManager + .get(ExtensionTypeEnum.Model) + ?.isModelLoaded(activeModelRef.current?.id as string)) + ) { + setActiveModel(undefined) + setStateModel({ state: 'start', loading: false, model: undefined }) + } + })() } // Mark the thread as not waiting for response updateThreadWaiting(message.thread_id, false) diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx index b35ab2e43..5cb0debab 100644 --- a/web/containers/Providers/EventListener.tsx +++ b/web/containers/Providers/EventListener.tsx @@ -2,7 +2,17 @@ import { PropsWithChildren, useCallback, useEffect } from 'react' import React from 'react' -import { DownloadEvent, events, DownloadState, ModelEvent } from '@janhq/core' +import { + DownloadEvent, + events, + DownloadState, + ModelEvent, + ExtensionTypeEnum, + ModelExtension, + ModelManager, + Model, +} from '@janhq/core' + import { useSetAtom } from 'jotai' import { setDownloadStateAtom } from '@/hooks/useDownloadState' @@ -18,16 +28,23 @@ import EventHandler from './EventHandler' import ModelImportListener from './ModelImportListener' import QuickAskListener from './QuickAskListener' +import { extensionManager } from '@/extension' import { InstallingExtensionState, removeInstallingExtensionAtom, setInstallingExtensionAtom, } from '@/helpers/atoms/Extension.atom' +import { + addDownloadingModelAtom, + removeDownloadingModelAtom, +} from '@/helpers/atoms/Model.atom' const EventListenerWrapper = ({ children }: PropsWithChildren) => { const setDownloadState = useSetAtom(setDownloadStateAtom) const setInstallingExtension = useSetAtom(setInstallingExtensionAtom) const removeInstallingExtension = useSetAtom(removeInstallingExtensionAtom) + const addDownloadingModel = useSetAtom(addDownloadingModelAtom) + const removeDownloadingModel = useSetAtom(removeDownloadingModelAtom) const onFileDownloadUpdate = useCallback( async (state: DownloadState) => { @@ -40,10 +57,11 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => { } setInstallingExtension(state.extensionId!, installingExtensionState) } else { + addDownloadingModel(state.modelId) setDownloadState(state) } }, - [setDownloadState, setInstallingExtension] + [addDownloadingModel, setDownloadState, setInstallingExtension] ) const onFileDownloadError = useCallback( @@ -52,21 +70,52 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => { if (state.downloadType === 'extension') { removeInstallingExtension(state.extensionId!) } else { + state.downloadState = 'error' setDownloadState(state) + removeDownloadingModel(state.modelId) } }, - [setDownloadState, removeInstallingExtension] + [removeInstallingExtension, setDownloadState, removeDownloadingModel] + ) + + const onFileDownloadStopped = useCallback( + (state: DownloadState) => { + console.debug('onFileDownloadError', state) + if (state.downloadType === 'extension') { + removeInstallingExtension(state.extensionId!) + } else { + state.downloadState = 'error' + state.error = 'aborted' + setDownloadState(state) + removeDownloadingModel(state.modelId) + } + }, + [removeInstallingExtension, setDownloadState, removeDownloadingModel] ) const onFileDownloadSuccess = useCallback( - (state: DownloadState) => { + async (state: DownloadState) => { console.debug('onFileDownloadSuccess', state) if (state.downloadType !== 'extension') { + // Update model metadata accordingly + const model = ModelManager.instance().models.get(state.modelId) + if (model) { + await extensionManager + .get(ExtensionTypeEnum.Model) + ?.updateModel({ + id: model.id, + ...model.settings, + ...model.parameters, + } as Partial) + .catch((e) => console.debug(e)) + } + state.downloadState = 'end' setDownloadState(state) + removeDownloadingModel(state.modelId) } events.emit(ModelEvent.OnModelsUpdate, {}) }, - [setDownloadState] + [removeDownloadingModel, setDownloadState] ) const onFileUnzipSuccess = useCallback( @@ -87,6 +136,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => { events.on(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate) events.on(DownloadEvent.onFileDownloadError, onFileDownloadError) events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess) + events.on(DownloadEvent.onFileDownloadStopped, onFileDownloadStopped) events.on(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess) return () => { @@ -94,6 +144,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => { events.off(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate) events.off(DownloadEvent.onFileDownloadError, onFileDownloadError) events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess) + events.off(DownloadEvent.onFileDownloadStopped, onFileDownloadStopped) events.off(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess) } }, [ @@ -101,6 +152,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => { onFileDownloadError, onFileDownloadSuccess, onFileUnzipSuccess, + onFileDownloadStopped, ]) return ( diff --git a/web/extension/ExtensionManager.ts b/web/extension/ExtensionManager.ts index aa1a7674b..811126f85 100644 --- a/web/extension/ExtensionManager.ts +++ b/web/extension/ExtensionManager.ts @@ -8,6 +8,7 @@ import Extension from './Extension' * Manages the registration and retrieval of extensions. */ export class ExtensionManager { + date = new Date().toISOString() // Registered extensions private extensions = new Map() diff --git a/web/helpers/atoms/Model.atom.test.ts b/web/helpers/atoms/Model.atom.test.ts index 57827efec..923f24df4 100644 --- a/web/helpers/atoms/Model.atom.test.ts +++ b/web/helpers/atoms/Model.atom.test.ts @@ -32,13 +32,22 @@ describe('Model.atom.ts', () => { }) describe('showEngineListModelAtom', () => { - it('should initialize as an empty array', () => { - expect(ModelAtoms.showEngineListModelAtom.init).toEqual(['nitro']) + it('should initialize with local engines', () => { + expect(ModelAtoms.showEngineListModelAtom.init).toEqual([ + 'nitro', + 'cortex', + 'llama-cpp', + 'onnxruntime', + 'tensorrt-llm', + ]) }) }) describe('addDownloadingModelAtom', () => { it('should add downloading model', async () => { + const { result: reset } = renderHook(() => + useSetAtom(ModelAtoms.downloadingModelsAtom) + ) const { result: setAtom } = renderHook(() => useSetAtom(ModelAtoms.addDownloadingModelAtom) ) @@ -49,11 +58,16 @@ describe('Model.atom.ts', () => { setAtom.current({ id: '1' } as any) }) expect(getAtom.current).toEqual([{ id: '1' }]) + reset.current([]) }) }) describe('removeDownloadingModelAtom', () => { it('should remove downloading model', async () => { + const { result: reset } = renderHook(() => + useSetAtom(ModelAtoms.downloadingModelsAtom) + ) + const { result: setAtom } = renderHook(() => useSetAtom(ModelAtoms.addDownloadingModelAtom) ) @@ -63,16 +77,21 @@ describe('Model.atom.ts', () => { const { result: getAtom } = renderHook(() => useAtomValue(ModelAtoms.getDownloadingModelAtom) ) + expect(getAtom.current).toEqual([]) act(() => { - setAtom.current({ id: '1' } as any) + setAtom.current('1') removeAtom.current('1') }) expect(getAtom.current).toEqual([]) + reset.current([]) }) }) describe('removeDownloadedModelAtom', () => { it('should remove downloaded model', async () => { + const { result: reset } = renderHook(() => + useSetAtom(ModelAtoms.downloadingModelsAtom) + ) const { result: setAtom } = renderHook(() => useSetAtom(ModelAtoms.downloadedModelsAtom) ) @@ -94,6 +113,7 @@ describe('Model.atom.ts', () => { removeAtom.current('1') }) expect(getAtom.current).toEqual([]) + reset.current([]) }) }) @@ -284,10 +304,4 @@ describe('Model.atom.ts', () => { expect(importAtom.current[0]).toEqual([]) }) }) - - describe('defaultModelAtom', () => { - it('should initialize as undefined', () => { - expect(ModelAtoms.defaultModelAtom.init).toBeUndefined() - }) - }) }) diff --git a/web/helpers/atoms/Model.atom.ts b/web/helpers/atoms/Model.atom.ts index 6abc42c9e..dd4414801 100644 --- a/web/helpers/atoms/Model.atom.ts +++ b/web/helpers/atoms/Model.atom.ts @@ -1,4 +1,4 @@ -import { ImportingModel, InferenceEngine, Model, ModelFile } from '@janhq/core' +import { ImportingModel, InferenceEngine, Model } from '@janhq/core' import { atom } from 'jotai' import { atomWithStorage } from 'jotai/utils' @@ -14,7 +14,7 @@ enum ModelStorageAtomKeys { * Downloaded Models Atom * This atom stores the list of models that have been downloaded. */ -export const downloadedModelsAtom = atomWithStorage( +export const downloadedModelsAtom = atomWithStorage( ModelStorageAtomKeys.DownloadedModels, [] ) @@ -23,7 +23,7 @@ export const downloadedModelsAtom = atomWithStorage( * Configured Models Atom * This atom stores the list of models that have been configured and available to download */ -export const configuredModelsAtom = atomWithStorage( +export const configuredModelsAtom = atomWithStorage( ModelStorageAtomKeys.AvailableModels, [] ) @@ -43,12 +43,18 @@ export const removeDownloadedModelAtom = atom( /** * Atom to store the selected model (from ModelDropdown) */ -export const selectedModelAtom = atom(undefined) +export const selectedModelAtom = atom(undefined) /** * Atom to store the expanded engine sections (from ModelDropdown) */ -export const showEngineListModelAtom = atom([InferenceEngine.nitro]) +export const showEngineListModelAtom = atom([ + InferenceEngine.nitro, + InferenceEngine.cortex, + InferenceEngine.cortex_llamacpp, + InferenceEngine.cortex_onnx, + InferenceEngine.cortex_tensorrtllm, +]) /// End Models Atom /// Model Download Atom @@ -58,13 +64,13 @@ export const stateModel = atom({ state: 'start', loading: false, model: '' }) /** * Stores the list of models which are being downloaded. */ -const downloadingModelsAtom = atom([]) +export const downloadingModelsAtom = atom([]) export const getDownloadingModelAtom = atom((get) => get(downloadingModelsAtom)) -export const addDownloadingModelAtom = atom(null, (get, set, model: Model) => { +export const addDownloadingModelAtom = atom(null, (get, set, model: string) => { const downloadingModels = get(downloadingModelsAtom) - if (!downloadingModels.find((e) => e.id === model.id)) { + if (!downloadingModels.includes(model)) { set(downloadingModelsAtom, [...downloadingModels, model]) } }) @@ -76,7 +82,7 @@ export const removeDownloadingModelAtom = atom( set( downloadingModelsAtom, - downloadingModels.filter((e) => e.id !== modelId) + downloadingModels.filter((e) => e !== modelId) ) } ) @@ -88,10 +94,6 @@ export const removeDownloadingModelAtom = atom( // store the paths of the models that are being imported export const importingModelsAtom = atom([]) -// DEPRECATED: Remove when moving to cortex.cpp -// Default model template when importing -export const defaultModelAtom = atom(undefined) - /** * Importing progress Atom */ diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts index 2d53678c3..7b9ee98e0 100644 --- a/web/hooks/useActiveModel.ts +++ b/web/hooks/useActiveModel.ts @@ -1,6 +1,6 @@ import { useCallback, useEffect, useRef } from 'react' -import { EngineManager, Model, ModelFile } from '@janhq/core' +import { EngineManager, Model } from '@janhq/core' import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai' import { toaster } from '@/containers/Toast' @@ -11,7 +11,7 @@ import { vulkanEnabledAtom } from '@/helpers/atoms/AppConfig.atom' import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom' import { activeThreadAtom } from '@/helpers/atoms/Thread.atom' -export const activeModelAtom = atom(undefined) +export const activeModelAtom = atom(undefined) export const loadModelErrorAtom = atom(undefined) type ModelState = { @@ -37,7 +37,7 @@ export function useActiveModel() { const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom) const isVulkanEnabled = useAtomValue(vulkanEnabledAtom) - const downloadedModelsRef = useRef([]) + const downloadedModelsRef = useRef([]) useEffect(() => { downloadedModelsRef.current = downloadedModels @@ -51,15 +51,14 @@ export function useActiveModel() { console.debug(`Model ${modelId} is already initialized. Ignore..`) return Promise.resolve() } + + if (activeModel) { + await stopModel(activeModel) + } setPendingModelLoad(true) let model = downloadedModelsRef?.current.find((e) => e.id === modelId) - const error = await stopModel().catch((error: Error) => error) - if (error) { - return Promise.reject(error) - } - setLoadModelError(undefined) setActiveModel(undefined) @@ -118,7 +117,7 @@ export function useActiveModel() { setStateModel(() => ({ state: 'start', loading: false, - model, + undefined, })) if (!pendingModelLoad && abortable) { @@ -135,28 +134,30 @@ export function useActiveModel() { }) } - const stopModel = useCallback(async () => { - const stoppingModel = activeModel || stateModel.model - if (!stoppingModel || (stateModel.state === 'stop' && stateModel.loading)) - return + const stopModel = useCallback( + async (model?: Model) => { + const stoppingModel = model ?? activeModel ?? stateModel.model + if (!stoppingModel || (stateModel.state === 'stop' && stateModel.loading)) + return - setStateModel({ state: 'stop', loading: true, model: stoppingModel }) - const engine = EngineManager.instance().get(stoppingModel.engine) - return engine - ?.unloadModel(stoppingModel) - .catch() - .then(() => { - setActiveModel(undefined) - setStateModel({ state: 'start', loading: false, model: undefined }) - setPendingModelLoad(false) - }) - }, [ - activeModel, - setActiveModel, - setStateModel, - setPendingModelLoad, - stateModel, - ]) + const engine = EngineManager.instance().get(stoppingModel.engine) + return engine + ?.unloadModel(stoppingModel) + .catch((e) => console.error(e)) + .then(() => { + setActiveModel(undefined) + setStateModel({ state: 'start', loading: false, model: undefined }) + setPendingModelLoad(false) + }) + }, + [ + activeModel, + setStateModel, + setActiveModel, + setPendingModelLoad, + stateModel, + ] + ) const stopInference = useCallback(async () => { // Loading model diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts index e65353753..75aa99c27 100644 --- a/web/hooks/useCreateNewThread.ts +++ b/web/hooks/useCreateNewThread.ts @@ -8,7 +8,7 @@ import { ThreadAssistantInfo, ThreadState, AssistantTool, - ModelFile, + Model, } from '@janhq/core' import { atom, useAtomValue, useSetAtom } from 'jotai' @@ -76,7 +76,7 @@ export const useCreateNewThread = () => { const requestCreateNewThread = async ( assistant: Assistant, - model?: ModelFile | undefined + model?: Model | undefined ) => { // Stop generating if any setIsGeneratingResponse(false) diff --git a/web/hooks/useDeleteModel.test.ts b/web/hooks/useDeleteModel.test.ts index 336a1cd0c..3ee0926f9 100644 --- a/web/hooks/useDeleteModel.test.ts +++ b/web/hooks/useDeleteModel.test.ts @@ -16,7 +16,7 @@ describe('useDeleteModel', () => { const mockModel: any = { id: 'test-model', name: 'Test Model', - // Add other required properties of ModelFile + // Add other required properties of Model } const mockDeleteModel = jest.fn() @@ -35,7 +35,7 @@ describe('useDeleteModel', () => { await result.current.deleteModel(mockModel) }) - expect(mockDeleteModel).toHaveBeenCalledWith(mockModel) + expect(mockDeleteModel).toHaveBeenCalledWith('test-model') expect(toaster).toHaveBeenCalledWith({ title: 'Model Deletion Successful', description: `Model ${mockModel.name} has been successfully deleted.`, @@ -67,7 +67,7 @@ describe('useDeleteModel', () => { ) }) - expect(mockDeleteModel).toHaveBeenCalledWith(mockModel) + expect(mockDeleteModel).toHaveBeenCalledWith("test-model") expect(toaster).not.toHaveBeenCalled() }) }) diff --git a/web/hooks/useDeleteModel.ts b/web/hooks/useDeleteModel.ts index 5a7a319b2..5621a78b8 100644 --- a/web/hooks/useDeleteModel.ts +++ b/web/hooks/useDeleteModel.ts @@ -1,6 +1,6 @@ import { useCallback } from 'react' -import { ExtensionTypeEnum, ModelExtension, ModelFile } from '@janhq/core' +import { ExtensionTypeEnum, ModelExtension, Model } from '@janhq/core' import { useSetAtom } from 'jotai' @@ -13,8 +13,8 @@ export default function useDeleteModel() { const removeDownloadedModel = useSetAtom(removeDownloadedModelAtom) const deleteModel = useCallback( - async (model: ModelFile) => { - await localDeleteModel(model) + async (model: Model) => { + await localDeleteModel(model.id) removeDownloadedModel(model.id) toaster({ title: 'Model Deletion Successful', @@ -28,7 +28,7 @@ export default function useDeleteModel() { return { deleteModel } } -const localDeleteModel = async (model: ModelFile) => +const localDeleteModel = async (model: string) => extensionManager .get(ExtensionTypeEnum.Model) ?.deleteModel(model) diff --git a/web/hooks/useDownloadModel.test.ts b/web/hooks/useDownloadModel.test.ts index fc0b7c21f..7e9d7b518 100644 --- a/web/hooks/useDownloadModel.test.ts +++ b/web/hooks/useDownloadModel.test.ts @@ -13,12 +13,6 @@ jest.mock('jotai', () => ({ })) jest.mock('@janhq/core') jest.mock('@/extension/ExtensionManager') -jest.mock('./useGpuSetting', () => ({ - __esModule: true, - default: () => ({ - getGpuSettings: jest.fn().mockResolvedValue({ some: 'gpuSettings' }), - }), -})) describe('useDownloadModel', () => { beforeEach(() => { @@ -29,25 +23,25 @@ describe('useDownloadModel', () => { it('should download a model', async () => { const mockModel: core.Model = { id: 'test-model', - sources: [{ filename: 'test.bin' }], + sources: [{ filename: 'test.bin', url: 'https://fake.url' }], } as core.Model const mockExtension = { - downloadModel: jest.fn().mockResolvedValue(undefined), + pullModel: jest.fn().mockResolvedValue(undefined), } ;(useSetAtom as jest.Mock).mockReturnValue(() => undefined) ;(extensionManager.get as jest.Mock).mockReturnValue(mockExtension) const { result } = renderHook(() => useDownloadModel()) - await act(async () => { - await result.current.downloadModel(mockModel) + act(() => { + result.current.downloadModel(mockModel.sources[0].url, mockModel.id) }) - expect(mockExtension.downloadModel).toHaveBeenCalledWith( - mockModel, - { some: 'gpuSettings' }, - { ignoreSSL: undefined, proxy: '' } + expect(mockExtension.pullModel).toHaveBeenCalledWith( + mockModel.sources[0].url, + mockModel.id, + undefined ) }) @@ -58,15 +52,18 @@ describe('useDownloadModel', () => { } as core.Model ;(core.joinPath as jest.Mock).mockResolvedValue('/path/to/model/test.bin') - ;(core.abortDownload as jest.Mock).mockResolvedValue(undefined) + const mockExtension = { + cancelModelPull: jest.fn().mockResolvedValue(undefined), + } ;(useSetAtom as jest.Mock).mockReturnValue(() => undefined) + ;(extensionManager.get as jest.Mock).mockReturnValue(mockExtension) const { result } = renderHook(() => useDownloadModel()) - await act(async () => { - await result.current.abortModelDownload(mockModel) + act(() => { + result.current.abortModelDownload(mockModel.id) }) - expect(core.abortDownload).toHaveBeenCalledWith('/path/to/model/test.bin') + expect(mockExtension.cancelModelPull).toHaveBeenCalledWith('test-model') }) it('should handle proxy settings', async () => { @@ -76,7 +73,7 @@ describe('useDownloadModel', () => { } as core.Model const mockExtension = { - downloadModel: jest.fn().mockResolvedValue(undefined), + pullModel: jest.fn().mockResolvedValue(undefined), } ;(useSetAtom as jest.Mock).mockReturnValue(() => undefined) ;(extensionManager.get as jest.Mock).mockReturnValue(mockExtension) @@ -85,14 +82,14 @@ describe('useDownloadModel', () => { const { result } = renderHook(() => useDownloadModel()) - await act(async () => { - await result.current.downloadModel(mockModel) + act(() => { + result.current.downloadModel(mockModel.sources[0].url, mockModel.id) }) - expect(mockExtension.downloadModel).toHaveBeenCalledWith( - mockModel, - expect.objectContaining({ some: 'gpuSettings' }), - expect.anything() + expect(mockExtension.pullModel).toHaveBeenCalledWith( + mockModel.sources[0].url, + mockModel.id, + undefined ) }) }) diff --git a/web/hooks/useDownloadModel.ts b/web/hooks/useDownloadModel.ts index 0cd21ea83..c616f8769 100644 --- a/web/hooks/useDownloadModel.ts +++ b/web/hooks/useDownloadModel.ts @@ -1,106 +1,55 @@ import { useCallback } from 'react' -import { - Model, - ExtensionTypeEnum, - ModelExtension, - abortDownload, - joinPath, - ModelArtifact, - DownloadState, - GpuSetting, - ModelFile, - dirName, -} from '@janhq/core' +import { ExtensionTypeEnum, ModelExtension } from '@janhq/core' -import { useAtomValue, useSetAtom } from 'jotai' +import { useSetAtom } from 'jotai' + +import { toaster } from '@/containers/Toast' import { setDownloadStateAtom } from './useDownloadState' -import useGpuSetting from './useGpuSetting' - import { extensionManager } from '@/extension/ExtensionManager' + import { - ignoreSslAtom, - proxyAtom, - proxyEnabledAtom, -} from '@/helpers/atoms/AppConfig.atom' -import { addDownloadingModelAtom } from '@/helpers/atoms/Model.atom' + addDownloadingModelAtom, + removeDownloadingModelAtom, +} from '@/helpers/atoms/Model.atom' export default function useDownloadModel() { - const ignoreSSL = useAtomValue(ignoreSslAtom) - const proxy = useAtomValue(proxyAtom) - const proxyEnabled = useAtomValue(proxyEnabledAtom) - const setDownloadState = useSetAtom(setDownloadStateAtom) + const removeDownloadingModel = useSetAtom(removeDownloadingModelAtom) const addDownloadingModel = useSetAtom(addDownloadingModelAtom) - - const { getGpuSettings } = useGpuSetting() + const setDownloadStates = useSetAtom(setDownloadStateAtom) const downloadModel = useCallback( - async (model: Model) => { - const childProgresses: DownloadState[] = model.sources.map( - (source: ModelArtifact) => ({ - fileName: source.filename, - modelId: model.id, - time: { - elapsed: 0, - remaining: 0, - }, - speed: 0, - percent: 0, - size: { - total: 0, - transferred: 0, - }, - downloadState: 'downloading', - }) - ) - - // set an initial download state - setDownloadState({ - fileName: '', - modelId: model.id, - time: { - elapsed: 0, - remaining: 0, - }, - speed: 0, - percent: 0, + async (model: string, id?: string, name?: string) => { + addDownloadingModel(id ?? model) + setDownloadStates({ + modelId: id ?? model, + downloadState: 'downloading', + fileName: id ?? model, size: { total: 0, transferred: 0, }, - children: childProgresses, - downloadState: 'downloading', + percent: 0, }) + downloadLocalModel(model, id, name).catch((error) => { + if (error.message) { + toaster({ + title: 'Download failed', + description: error.message, + type: 'error', + }) + } - addDownloadingModel(model) - const gpuSettings = await getGpuSettings() - await localDownloadModel( - model, - ignoreSSL, - proxyEnabled ? proxy : '', - gpuSettings - ) + removeDownloadingModel(model) + }) }, - [ - ignoreSSL, - proxy, - proxyEnabled, - getGpuSettings, - addDownloadingModel, - setDownloadState, - ] + [removeDownloadingModel, addDownloadingModel, setDownloadStates] ) - const abortModelDownload = useCallback(async (model: Model | ModelFile) => { - for (const source of model.sources) { - const path = - 'file_path' in model - ? await joinPath([await dirName(model.file_path), source.filename]) - : await joinPath(['models', model.id, source.filename]) - await abortDownload(path) - } + const abortModelDownload = useCallback(async (model: string) => { + await cancelModelDownload(model) }, []) return { @@ -109,12 +58,12 @@ export default function useDownloadModel() { } } -const localDownloadModel = async ( - model: Model, - ignoreSSL: boolean, - proxy: string, - gpuSettings?: GpuSetting -) => +const downloadLocalModel = async (model: string, id?: string, name?: string) => extensionManager .get(ExtensionTypeEnum.Model) - ?.downloadModel(model, gpuSettings, { ignoreSSL, proxy }) + ?.pullModel(model, id, name) + +const cancelModelDownload = async (model: string) => + extensionManager + .get(ExtensionTypeEnum.Model) + ?.cancelModelPull(model) diff --git a/web/hooks/useDownloadState.ts b/web/hooks/useDownloadState.ts index 03a8883cb..32a9d3255 100644 --- a/web/hooks/useDownloadState.ts +++ b/web/hooks/useDownloadState.ts @@ -10,8 +10,18 @@ import { } from '@/helpers/atoms/Model.atom' // download states + export const modelDownloadStateAtom = atom>({}) +/** + * Remove a download state for a particular model. + */ +export const removeDownloadStateAtom = atom(null, (get, set, id: string) => { + const currentState = { ...get(modelDownloadStateAtom) } + delete currentState[id] + set(modelDownloadStateAtom, currentState) + set(removeDownloadingModelAtom, id) +}) /** * Used to set the download state for a particular model. */ @@ -77,7 +87,7 @@ export const setDownloadStateAtom = atom( } } else { // download in progress - if (state.size.total === 0) { + if (state.size.total === 0 || !currentState[state.modelId]) { // this is initial state, just set the state currentState[state.modelId] = state set(modelDownloadStateAtom, currentState) @@ -108,6 +118,7 @@ export const setDownloadStateAtom = atom( ) modelDownloadState.children = updatedChildren + if (isAnyChildDownloadNotReady) { // just update the children currentState[state.modelId] = modelDownloadState @@ -115,23 +126,18 @@ export const setDownloadStateAtom = atom( return } - const parentTotalSize = modelDownloadState.size.total - if (parentTotalSize === 0) { - // calculate the total size of the parent by sum all children total size - const totalSize = updatedChildren.reduce( - (acc, m) => acc + m.size.total, - 0 - ) - - modelDownloadState.size.total = totalSize - } - + const parentTotalSize = updatedChildren.reduce( + (acc, m) => acc + m.size.total, + 0 + ) // calculate the total transferred size by sum all children transferred size const transferredSize = updatedChildren.reduce( (acc, m) => acc + m.size.transferred, 0 ) + modelDownloadState.size.total = parentTotalSize modelDownloadState.size.transferred = transferredSize + modelDownloadState.percent = parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize currentState[state.modelId] = modelDownloadState diff --git a/web/hooks/useDropModelBinaries.ts b/web/hooks/useDropModelBinaries.ts index d87e96627..7c87355f7 100644 --- a/web/hooks/useDropModelBinaries.ts +++ b/web/hooks/useDropModelBinaries.ts @@ -29,7 +29,7 @@ export default function useDropModelBinaries() { const importingModels: ImportingModel[] = supportedFiles.map((file) => ({ importId: uuidv4(), modelId: undefined, - name: file.name.replace('.gguf', ''), + name: file.name.replace(/ /g, '').replace('.gguf', ''), description: '', path: file.path, tags: [], diff --git a/web/hooks/useGetHFRepoData.test.ts b/web/hooks/useGetHFRepoData.test.ts index eaf86d79a..01055612d 100644 --- a/web/hooks/useGetHFRepoData.test.ts +++ b/web/hooks/useGetHFRepoData.test.ts @@ -1,6 +1,10 @@ +/** + * @jest-environment jsdom + */ import { renderHook, act } from '@testing-library/react' import { useGetHFRepoData } from './useGetHFRepoData' import { extensionManager } from '@/extension' +import * as hf from '@/utils/huggingface' jest.mock('@/extension', () => ({ extensionManager: { @@ -8,6 +12,8 @@ jest.mock('@/extension', () => ({ }, })) +jest.mock('@/utils/huggingface') + describe('useGetHFRepoData', () => { beforeEach(() => { jest.clearAllMocks() @@ -15,10 +21,7 @@ describe('useGetHFRepoData', () => { it('should fetch HF repo data successfully', async () => { const mockData = { name: 'Test Repo', stars: 100 } - const mockFetchHuggingFaceRepoData = jest.fn().mockResolvedValue(mockData) - ;(extensionManager.get as jest.Mock).mockReturnValue({ - fetchHuggingFaceRepoData: mockFetchHuggingFaceRepoData, - }) + ;(hf.fetchHuggingFaceRepoData as jest.Mock).mockReturnValue(mockData) const { result } = renderHook(() => useGetHFRepoData()) @@ -34,6 +37,5 @@ describe('useGetHFRepoData', () => { expect(result.current.error).toBeUndefined() expect(await data).toEqual(mockData) - expect(mockFetchHuggingFaceRepoData).toHaveBeenCalledWith('test-repo') }) }) diff --git a/web/hooks/useGetHFRepoData.ts b/web/hooks/useGetHFRepoData.ts index 3dab2c72e..6f2ec2b57 100644 --- a/web/hooks/useGetHFRepoData.ts +++ b/web/hooks/useGetHFRepoData.ts @@ -1,12 +1,8 @@ import { useCallback, useState } from 'react' -import { - ExtensionTypeEnum, - HuggingFaceRepoData, - ModelExtension, -} from '@janhq/core' +import { HuggingFaceRepoData } from '@janhq/core' -import { extensionManager } from '@/extension' +import { fetchHuggingFaceRepoData } from '@/utils/huggingface' export const useGetHFRepoData = () => { const [error, setError] = useState(undefined) @@ -35,7 +31,5 @@ export const useGetHFRepoData = () => { const extensionGetHfRepoData = async ( repoId: string ): Promise => { - return extensionManager - .get(ExtensionTypeEnum.Model) - ?.fetchHuggingFaceRepoData(repoId) + return fetchHuggingFaceRepoData(repoId) } diff --git a/web/hooks/useImportModel.test.ts b/web/hooks/useImportModel.test.ts index 2148f581b..571947903 100644 --- a/web/hooks/useImportModel.test.ts +++ b/web/hooks/useImportModel.test.ts @@ -18,7 +18,7 @@ describe('useImportModel', () => { it('should import models successfully', async () => { const mockImportModels = jest.fn().mockResolvedValue(undefined) const mockExtension = { - importModels: mockImportModels, + importModel: mockImportModels, } as any jest.spyOn(extensionManager, 'get').mockReturnValue(mockExtension) @@ -26,15 +26,16 @@ describe('useImportModel', () => { const { result } = renderHook(() => useImportModel()) const models = [ - { importId: '1', name: 'Model 1', path: '/path/to/model1' }, - { importId: '2', name: 'Model 2', path: '/path/to/model2' }, + { modelId: '1', path: '/path/to/model1' }, + { modelId: '2', path: '/path/to/model2' }, ] as any await act(async () => { - await result.current.importModels(models, 'local' as any) + await result.current.importModels(models, 'copy') }) - expect(mockImportModels).toHaveBeenCalledWith(models, 'local') + expect(mockImportModels).toHaveBeenCalledWith('1', '/path/to/model1', undefined,'copy') + expect(mockImportModels).toHaveBeenCalledWith('2', '/path/to/model2', undefined, 'copy') }) it('should update model info successfully', async () => { @@ -42,7 +43,7 @@ describe('useImportModel', () => { .fn() .mockResolvedValue({ id: 'model-1', name: 'Updated Model' }) const mockExtension = { - updateModelInfo: mockUpdateModelInfo, + updateModel: mockUpdateModelInfo, } as any jest.spyOn(extensionManager, 'get').mockReturnValue(mockExtension) diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts index effc64f86..c49ddb964 100644 --- a/web/hooks/useImportModel.ts +++ b/web/hooks/useImportModel.ts @@ -3,13 +3,16 @@ import { useCallback } from 'react' import { ExtensionTypeEnum, ImportingModel, + LocalImportModelEvent, Model, ModelExtension, OptionType, + events, fs, + baseName, } from '@janhq/core' -import { atom, useSetAtom } from 'jotai' +import { atom, useAtomValue, useSetAtom } from 'jotai' import { v4 as uuidv4 } from 'uuid' @@ -18,7 +21,12 @@ import { snackbar } from '@/containers/Toast' import { FilePathWithSize } from '@/utils/file' import { extensionManager } from '@/extension' -import { importingModelsAtom } from '@/helpers/atoms/Model.atom' +import { + addDownloadingModelAtom, + downloadedModelsAtom, + importingModelsAtom, + removeDownloadingModelAtom, +} from '@/helpers/atoms/Model.atom' export type ImportModelStage = | 'NONE' @@ -49,11 +57,42 @@ export type ModelUpdate = { const useImportModel = () => { const setImportModelStage = useSetAtom(setImportModelStageAtom) const setImportingModels = useSetAtom(importingModelsAtom) + const addDownloadingModel = useSetAtom(addDownloadingModelAtom) + const removeDownloadingModel = useSetAtom(removeDownloadingModelAtom) + const downloadedModels = useAtomValue(downloadedModelsAtom) + + const incrementalModelName = useCallback( + (name: string, startIndex: number = 0): string => { + const newModelName = startIndex ? `${name}-${startIndex}` : name + if (downloadedModels.some((model) => model.id === newModelName)) { + return incrementalModelName(name, startIndex + 1) + } else { + return newModelName + } + }, + [downloadedModels] + ) const importModels = useCallback( - (models: ImportingModel[], optionType: OptionType) => - localImportModels(models, optionType), - [] + (models: ImportingModel[], optionType: OptionType) => { + models.map(async (model) => { + const modelId = model.modelId ?? incrementalModelName(model.name) + if (modelId) { + addDownloadingModel(modelId) + extensionManager + .get(ExtensionTypeEnum.Model) + ?.importModel(modelId, model.path, model.name, optionType) + .finally(() => { + removeDownloadingModel(modelId) + events.emit(LocalImportModelEvent.onLocalImportModelSuccess, { + importId: model.importId, + modelId: modelId, + }) + }) + } + }) + }, + [addDownloadingModel, incrementalModelName, removeDownloadingModel] ) const updateModelInfo = useCallback( @@ -75,7 +114,7 @@ const useImportModel = () => { ({ path, name, size }: FilePathWithSize) => ({ importId: uuidv4(), modelId: undefined, - name: name.replace('.gguf', ''), + name: name.replace(/ /g, '').replace('.gguf', ''), description: '', path: path, tags: [], @@ -101,19 +140,11 @@ const useImportModel = () => { return { importModels, updateModelInfo, sanitizeFilePaths } } -const localImportModels = async ( - models: ImportingModel[], - optionType: OptionType -): Promise => - extensionManager - .get(ExtensionTypeEnum.Model) - ?.importModels(models, optionType) - const localUpdateModelInfo = async ( modelInfo: Partial ): Promise => extensionManager .get(ExtensionTypeEnum.Model) - ?.updateModelInfo(modelInfo) + ?.updateModel(modelInfo) export default useImportModel diff --git a/web/hooks/useModels.test.ts b/web/hooks/useModels.test.ts index 4c53ffaa7..9b6b898ad 100644 --- a/web/hooks/useModels.test.ts +++ b/web/hooks/useModels.test.ts @@ -1,28 +1,23 @@ // useModels.test.ts - import { renderHook, act } from '@testing-library/react' -import { events, ModelEvent } from '@janhq/core' +import { events, ModelEvent, ModelManager } from '@janhq/core' import { extensionManager } from '@/extension' // Mock dependencies jest.mock('@janhq/core') jest.mock('@/extension') +jest.mock('use-debounce', () => ({ + useDebouncedCallback: jest.fn().mockImplementation((fn) => fn), +})) import useModels from './useModels' // Mock data -const mockDownloadedModels = [ +const models = [ { id: 'model-1', name: 'Model 1' }, { id: 'model-2', name: 'Model 2' }, ] -const mockConfiguredModels = [ - { id: 'model-3', name: 'Model 3' }, - { id: 'model-4', name: 'Model 4' }, -] - -const mockDefaultModel = { id: 'default-model', name: 'Default Model' } - describe('useModels', () => { beforeEach(() => { jest.clearAllMocks() @@ -30,20 +25,28 @@ describe('useModels', () => { it('should fetch and set models on mount', async () => { const mockModelExtension = { - getDownloadedModels: jest.fn().mockResolvedValue(mockDownloadedModels), - getConfiguredModels: jest.fn().mockResolvedValue(mockConfiguredModels), - getDefaultModel: jest.fn().mockResolvedValue(mockDefaultModel), + getModels: jest.fn().mockResolvedValue(models), } as any + ;(ModelManager.instance as jest.Mock).mockReturnValue({ + models: { + values: () => ({ + toArray: () => ({ + filter: () => models, + }), + }), + get: () => undefined, + has: () => true, + // set: () => {} + }, + }) jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension) - await act(async () => { + act(() => { renderHook(() => useModels()) }) - expect(mockModelExtension.getDownloadedModels).toHaveBeenCalled() - expect(mockModelExtension.getConfiguredModels).toHaveBeenCalled() - expect(mockModelExtension.getDefaultModel).toHaveBeenCalled() + expect(mockModelExtension.getModels).toHaveBeenCalled() }) it('should remove event listener on unmount', async () => { diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts index 58def79c6..400e02793 100644 --- a/web/hooks/useModels.ts +++ b/web/hooks/useModels.ts @@ -5,16 +5,19 @@ import { Model, ModelEvent, ModelExtension, - ModelFile, events, + ModelManager, } from '@janhq/core' import { useSetAtom } from 'jotai' +import { useDebouncedCallback } from 'use-debounce' + +import { isLocalEngine } from '@/utils/modelEngine' + import { extensionManager } from '@/extension' import { configuredModelsAtom, - defaultModelAtom, downloadedModelsAtom, } from '@/helpers/atoms/Model.atom' @@ -25,62 +28,68 @@ import { */ const useModels = () => { const setDownloadedModels = useSetAtom(downloadedModelsAtom) - const setConfiguredModels = useSetAtom(configuredModelsAtom) - const setDefaultModel = useSetAtom(defaultModelAtom) + const setExtensionModels = useSetAtom(configuredModelsAtom) const getData = useCallback(() => { const getDownloadedModels = async () => { - const models = await getLocalDownloadedModels() - setDownloadedModels(models) + const localModels = (await getModels()).map((e) => ({ + ...e, + name: ModelManager.instance().models.get(e.id)?.name ?? e.id, + metadata: + ModelManager.instance().models.get(e.id)?.metadata ?? e.metadata, + })) + + const remoteModels = ModelManager.instance() + .models.values() + .toArray() + .filter((e) => !isLocalEngine(e.engine)) + const toUpdate = [ + ...localModels, + ...remoteModels.filter( + (e: Model) => !localModels.some((g: Model) => g.id === e.id) + ), + ] + + setDownloadedModels(toUpdate) + + let isUpdated = false + toUpdate.forEach((model) => { + if (!ModelManager.instance().models.has(model.id)) { + ModelManager.instance().models.set(model.id, model) + isUpdated = true + } + }) + if (isUpdated) { + getExtensionModels() + } } - const getConfiguredModels = async () => { - const models = await getLocalConfiguredModels() - setConfiguredModels(models) + const getExtensionModels = () => { + const models = ModelManager.instance().models.values().toArray() + setExtensionModels(models) } - - const getDefaultModel = async () => { - const defaultModel = await getLocalDefaultModel() - setDefaultModel(defaultModel) - } - // Fetch all data - Promise.all([ - getDownloadedModels(), - getConfiguredModels(), - getDefaultModel(), - ]) - }, [setDownloadedModels, setConfiguredModels, setDefaultModel]) + getExtensionModels() + getDownloadedModels() + }, [setDownloadedModels, setExtensionModels]) + + const reloadData = useDebouncedCallback(() => getData(), 300) useEffect(() => { // Try get data on mount - getData() + reloadData() // Listen for model updates - events.on(ModelEvent.OnModelsUpdate, async () => getData()) + events.on(ModelEvent.OnModelsUpdate, async () => reloadData()) return () => { // Remove listener on unmount events.off(ModelEvent.OnModelsUpdate, async () => {}) } - }, [getData]) + }, [getData, reloadData]) } -// TODO: Deprecated - Remove when moving to cortex.cpp -const getLocalDefaultModel = async (): Promise => - extensionManager - .get(ExtensionTypeEnum.Model) - ?.getDefaultModel() - -// TODO: Deprecated - Remove when moving to cortex.cpp -const getLocalConfiguredModels = async (): Promise => - extensionManager - .get(ExtensionTypeEnum.Model) - ?.getConfiguredModels() ?? [] - -// TODO: Deprecated - Remove when moving to cortex.cpp -const getLocalDownloadedModels = async (): Promise => - extensionManager - .get(ExtensionTypeEnum.Model) - ?.getDownloadedModels() ?? [] +const getModels = async (): Promise => + extensionManager.get(ExtensionTypeEnum.Model)?.getModels() ?? + [] export default useModels diff --git a/web/hooks/useRecommendedModel.ts b/web/hooks/useRecommendedModel.ts index ed56efa55..21a9c69e7 100644 --- a/web/hooks/useRecommendedModel.ts +++ b/web/hooks/useRecommendedModel.ts @@ -1,6 +1,6 @@ import { useCallback, useEffect, useState } from 'react' -import { Model, InferenceEngine, ModelFile } from '@janhq/core' +import { Model, InferenceEngine } from '@janhq/core' import { atom, useAtomValue } from 'jotai' @@ -24,16 +24,12 @@ export const LAST_USED_MODEL_ID = 'last-used-model-id' */ export default function useRecommendedModel() { const activeModel = useAtomValue(activeModelAtom) - const [sortedModels, setSortedModels] = useState([]) - const [recommendedModel, setRecommendedModel] = useState< - ModelFile | undefined - >() + const [sortedModels, setSortedModels] = useState([]) + const [recommendedModel, setRecommendedModel] = useState() const activeThread = useAtomValue(activeThreadAtom) const downloadedModels = useAtomValue(downloadedModelsAtom) - const getAndSortDownloadedModels = useCallback(async (): Promise< - ModelFile[] - > => { + const getAndSortDownloadedModels = useCallback(async (): Promise => { const models = downloadedModels.sort((a, b) => a.engine !== InferenceEngine.nitro && b.engine === InferenceEngine.nitro ? 1 diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts index 1dbd5b45e..cda53b24a 100644 --- a/web/hooks/useSendChatMessage.ts +++ b/web/hooks/useSendChatMessage.ts @@ -12,6 +12,7 @@ import { ToolManager, ChatCompletionMessage, } from '@janhq/core' +import { extractInferenceParams, extractModelLoadParams } from '@janhq/core' import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai' import { @@ -23,10 +24,6 @@ import { import { Stack } from '@/utils/Stack' import { compressImage, getBase64 } from '@/utils/base64' import { MessageRequestBuilder } from '@/utils/messageRequestBuilder' -import { - extractInferenceParams, - extractModelLoadParams, -} from '@/utils/modelParam' import { ThreadMessageBuilder } from '@/utils/threadMessageBuilder' @@ -123,65 +120,27 @@ export default function useSendChatMessage() { } const resendChatMessage = async (currentMessage: ThreadMessage) => { - if (!activeThreadRef.current) { - console.error('No active thread') - return - } - updateThreadWaiting(activeThreadRef.current.id, true) + // Delete last response before regenerating + const newConvoData = currentMessages + let toSendMessage = currentMessage - const requestBuilder = new MessageRequestBuilder( - MessageRequestType.Thread, - activeThreadRef.current.assistants[0].model ?? selectedModelRef.current, - activeThreadRef.current, - currentMessages - ) - .addSystemMessage(activeThreadRef.current.assistants[0]?.instructions) - .removeLastAssistantMessage() + do { + deleteMessage(currentMessage.id) + const msg = newConvoData.pop() + if (!msg) break + toSendMessage = msg + deleteMessage(toSendMessage.id ?? '') + } while (toSendMessage.role !== ChatCompletionRole.User) - const modelId = - selectedModelRef.current?.id ?? - activeThreadRef.current.assistants[0].model.id - - if (modelRef.current?.id !== modelId) { - const error = await startModel(modelId).catch((error: Error) => error) - if (error) { - updateThreadWaiting(activeThreadRef.current.id, false) - return - } + if (activeThreadRef.current) { + await extensionManager + .get(ExtensionTypeEnum.Conversational) + ?.writeMessages(activeThreadRef.current.id, newConvoData) } - setIsGeneratingResponse(true) - - if (currentMessage.role !== ChatCompletionRole.User) { - // Delete last response before regenerating - deleteMessage(currentMessage.id ?? '') - if (activeThreadRef.current) { - await extensionManager - .get(ExtensionTypeEnum.Conversational) - ?.writeMessages( - activeThreadRef.current.id, - currentMessages.filter((msg) => msg.id !== currentMessage.id) - ) - } - } - // Process message request with Assistants tools - const request = await ToolManager.instance().process( - requestBuilder.build(), - activeThreadRef.current.assistants?.flatMap( - (assistant) => assistant.tools ?? [] - ) ?? [] - ) - - request.messages = normalizeMessages(request.messages ?? []) - - const engine = - requestBuilder.model?.engine ?? selectedModelRef.current?.engine ?? '' - - EngineManager.instance().get(engine)?.inference(request) + sendChatMessage(toSendMessage.content[0]?.text.value) } - // Define interface extending Array prototype - const sendChatMessage = async (message: string) => { if (!message || message.trim().length === 0) return @@ -254,7 +213,7 @@ export default function useSendChatMessage() { ...activeThreadRef.current, updated: newMessage.created, metadata: { - ...(activeThreadRef.current.metadata ?? {}), + ...activeThreadRef.current.metadata, lastMessage: prompt, }, } diff --git a/web/hooks/useUpdateModelParameters.ts b/web/hooks/useUpdateModelParameters.ts index 2af6e3323..6eb7c3c5a 100644 --- a/web/hooks/useUpdateModelParameters.ts +++ b/web/hooks/useUpdateModelParameters.ts @@ -6,15 +6,12 @@ import { InferenceEngine, Thread, ThreadAssistantInfo, + extractInferenceParams, + extractModelLoadParams, } from '@janhq/core' import { useAtom, useAtomValue, useSetAtom } from 'jotai' -import { - extractInferenceParams, - extractModelLoadParams, -} from '@/utils/modelParam' - import { extensionManager } from '@/extension' import { selectedModelAtom } from '@/helpers/atoms/Model.atom' import { diff --git a/web/package.json b/web/package.json index bed94aed4..d3ee82a33 100644 --- a/web/package.json +++ b/web/package.json @@ -38,7 +38,7 @@ "react": "18.2.0", "react-circular-progressbar": "^2.1.0", "react-dom": "18.2.0", - "react-dropzone": "^14.2.3", + "react-dropzone": "14.2.3", "react-hook-form": "^7.47.0", "react-hot-toast": "^2.4.1", "react-icons": "^4.12.0", @@ -52,8 +52,9 @@ "uuid": "^9.0.1", "zod": "^3.22.4", "slate": "latest", - "slate-react": "latest", - "slate-history": "latest" + "slate-dom": "0.111.0", + "slate-react": "0.110.3", + "slate-history": "0.110.3" }, "devDependencies": { "@next/eslint-plugin-next": "^14.0.1", diff --git a/web/screens/Hub/ModelList/ModelHeader/index.tsx b/web/screens/Hub/ModelList/ModelHeader/index.tsx index 44a3fd278..da98e41e3 100644 --- a/web/screens/Hub/ModelList/ModelHeader/index.tsx +++ b/web/screens/Hub/ModelList/ModelHeader/index.tsx @@ -1,6 +1,6 @@ import { useCallback } from 'react' -import { ModelFile } from '@janhq/core' +import { Model } from '@janhq/core' import { Button, Badge, Tooltip } from '@janhq/joi' import { useAtomValue, useSetAtom } from 'jotai' @@ -38,7 +38,7 @@ import { } from '@/helpers/atoms/SystemBar.atom' type Props = { - model: ModelFile + model: Model onClick: () => void open: string } @@ -64,7 +64,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => { const assistants = useAtomValue(assistantsAtom) const onDownloadClick = useCallback(() => { - downloadModel(model) + downloadModel(model.sources[0].url, model.id, model.name) }, [model, downloadModel]) const isDownloaded = downloadedModels.find((md) => md.id === model.id) != null @@ -81,7 +81,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => { ) - const isDownloading = downloadingModels.some((md) => md.id === model.id) + const isDownloading = downloadingModels.some((md) => md === model.id) const onUseModelClick = useCallback(async () => { if (assistants.length === 0) { @@ -123,17 +123,6 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => { className="cursor-pointer rounded-t-md bg-[hsla(var(--app-bg))]" onClick={onClick} > - {/* TODO: @faisal are we still using cover? */} - {/* {model.metadata.cover && imageLoaded && ( -
      - setImageLoaded(false)} - src={model.metadata.cover} - className="h-[250px] w-full object-cover" - alt={`Cover - ${model.id}`} - /> -
      - )} */}
      @@ -144,7 +133,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
      - {toGibibytes(model.metadata.size)} + {toGibibytes(model.metadata?.size)}
      diff --git a/web/screens/Hub/ModelList/ModelItem/index.tsx b/web/screens/Hub/ModelList/ModelItem/index.tsx index ec9d885a1..a077dbffc 100644 --- a/web/screens/Hub/ModelList/ModelItem/index.tsx +++ b/web/screens/Hub/ModelList/ModelItem/index.tsx @@ -1,6 +1,6 @@ import { useState } from 'react' -import { ModelFile } from '@janhq/core' +import { Model } from '@janhq/core' import { Badge } from '@janhq/joi' import { twMerge } from 'tailwind-merge' @@ -12,7 +12,7 @@ import ModelItemHeader from '@/screens/Hub/ModelList/ModelHeader' import { toGibibytes } from '@/utils/converter' type Props = { - model: ModelFile + model: Model } const ModelItem: React.FC = ({ model }) => { @@ -34,7 +34,7 @@ const ModelItem: React.FC = ({ model }) => {
      - {toGibibytes(model.metadata.size)} + {toGibibytes(model.metadata?.size)}
      @@ -49,9 +49,9 @@ const ModelItem: React.FC = ({ model }) => { Author

      - {model.metadata.author} + {model.metadata?.author}

      @@ -66,7 +66,7 @@ const ModelItem: React.FC = ({ model }) => {
      Tags
      - {model.metadata.tags.map((tag: string) => ( + {model.metadata?.tags?.map((tag: string) => ( {tag} diff --git a/web/screens/Hub/ModelList/index.tsx b/web/screens/Hub/ModelList/index.tsx index 8fc30d541..0d7865a81 100644 --- a/web/screens/Hub/ModelList/index.tsx +++ b/web/screens/Hub/ModelList/index.tsx @@ -1,6 +1,6 @@ import { useMemo } from 'react' -import { ModelFile } from '@janhq/core' +import { Model } from '@janhq/core' import { useAtomValue } from 'jotai' @@ -9,16 +9,16 @@ import ModelItem from '@/screens/Hub/ModelList/ModelItem' import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom' type Props = { - models: ModelFile[] + models: Model[] } const ModelList = ({ models }: Props) => { const downloadedModels = useAtomValue(downloadedModelsAtom) - const sortedModels: ModelFile[] = useMemo(() => { - const featuredModels: ModelFile[] = [] - const remoteModels: ModelFile[] = [] - const localModels: ModelFile[] = [] - const remainingModels: ModelFile[] = [] + const sortedModels: Model[] = useMemo(() => { + const featuredModels: Model[] = [] + const remoteModels: Model[] = [] + const localModels: Model[] = [] + const remainingModels: Model[] = [] models.forEach((m) => { if (m.metadata?.tags?.includes('Featured')) { featuredModels.push(m) @@ -30,9 +30,9 @@ const ModelList = ({ models }: Props) => { remainingModels.push(m) } }) - featuredModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size) - localModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size) - remainingModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size) + featuredModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size) + localModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size) + remainingModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size) remoteModels.sort((m1, m2) => m1.name.localeCompare(m2.name)) return [ ...featuredModels, diff --git a/web/screens/Hub/index.tsx b/web/screens/Hub/index.tsx index 8148a6bb5..382cf5667 100644 --- a/web/screens/Hub/index.tsx +++ b/web/screens/Hub/index.tsx @@ -52,7 +52,7 @@ const HubScreen = () => { } else if (sortSelected === 'featured') { return ( x.name.toLowerCase().includes(searchValue.toLowerCase()) && - x.metadata.tags.includes('Featured') + x.metadata?.tags?.includes('Featured') ) } else { return x.name.toLowerCase().includes(searchValue.toLowerCase()) diff --git a/web/screens/LocalServer/LocalServerRightPanel/index.tsx b/web/screens/LocalServer/LocalServerRightPanel/index.tsx index 13e3cad57..a59e83e7e 100644 --- a/web/screens/LocalServer/LocalServerRightPanel/index.tsx +++ b/web/screens/LocalServer/LocalServerRightPanel/index.tsx @@ -1,8 +1,9 @@ import { useCallback, useEffect, useMemo, useState } from 'react' -import { Accordion, AccordionItem } from '@janhq/joi' +import { extractInferenceParams, extractModelLoadParams } from '@janhq/core' +import { Accordion, AccordionItem, Input } from '@janhq/joi' import { useAtomValue, useSetAtom } from 'jotai' -import { AlertTriangleIcon, InfoIcon } from 'lucide-react' +import { AlertTriangleIcon, CheckIcon, CopyIcon, InfoIcon } from 'lucide-react' import EngineSetting from '@/containers/EngineSetting' import { modalTroubleShootingAtom } from '@/containers/ModalTroubleShoot' @@ -12,12 +13,9 @@ import RightPanelContainer from '@/containers/RightPanelContainer' import { loadModelErrorAtom } from '@/hooks/useActiveModel' -import { getConfigurationsData } from '@/utils/componentSettings' +import { useClipboard } from '@/hooks/useClipboard' -import { - extractInferenceParams, - extractModelLoadParams, -} from '@/utils/modelParam' +import { getConfigurationsData } from '@/utils/componentSettings' import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom' import { selectedModelAtom } from '@/helpers/atoms/Model.atom' @@ -29,6 +27,8 @@ const LocalServerRightPanel = () => { const selectedModel = useAtomValue(selectedModelAtom) + const clipboard = useClipboard({ timeout: 1000 }) + const [currentModelSettingParams, setCurrentModelSettingParams] = useState( extractModelLoadParams(selectedModel?.settings) ) @@ -92,6 +92,30 @@ const LocalServerRightPanel = () => { +
      + { + clipboard.copy(selectedModel?.id) + }} + suffixIcon={ + clipboard.copied ? ( + + ) : ( + + ) + } + /> +
      + {loadModelError && serverEnabled && (
      { * @param gpuId * @returns */ - const handleGPUChange = (gpuId: string) => { + const handleGPUChange = async (gpuId: string) => { let updatedGpusInUse = [...gpusInUse] if (updatedGpusInUse.includes(gpuId)) { updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId) @@ -208,7 +208,8 @@ const Advanced = () => { updatedGpusInUse.push(gpuId) } setGpusInUse(updatedGpusInUse) - saveSettings({ gpusInUse: updatedGpusInUse }) + await saveSettings({ gpusInUse: updatedGpusInUse }) + window.core?.api?.relaunch() } const gpuSelectionPlaceHolder = @@ -305,7 +306,13 @@ const Advanced = () => { }) } // Stop any running model to apply the changes - if (e.target.checked !== gpuEnabled) stopModel() + if (e.target.checked !== gpuEnabled) { + stopModel().finally(() => { + setTimeout(() => { + window.location.reload() + }, 300) + }) + } }} /> } diff --git a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx index c3f09f171..96d8ac04e 100644 --- a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx +++ b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx @@ -1,11 +1,6 @@ -import { useCallback, useMemo } from 'react' +import { useCallback } from 'react' -import { - DownloadState, - HuggingFaceRepoData, - Model, - Quantization, -} from '@janhq/core' +import { DownloadState, HuggingFaceRepoData, Quantization } from '@janhq/core' import { Badge, Button, Progress } from '@janhq/joi' import { useAtomValue, useSetAtom } from 'jotai' @@ -20,13 +15,15 @@ import { modelDownloadStateAtom } from '@/hooks/useDownloadState' import { formatDownloadPercentage, toGibibytes } from '@/utils/converter' +import { normalizeModelId } from '@/utils/model' + import { mainViewStateAtom } from '@/helpers/atoms/App.atom' import { assistantsAtom } from '@/helpers/atoms/Assistant.atom' import { importHuggingFaceModelStageAtom } from '@/helpers/atoms/HuggingFace.atom' import { - defaultModelAtom, downloadedModelsAtom, + getDownloadingModelAtom, } from '@/helpers/atoms/Model.atom' type Props = { @@ -39,7 +36,6 @@ type Props = { } const ModelDownloadRow: React.FC = ({ - repoData, downloadUrl, fileName, fileSize = 0, @@ -49,51 +45,31 @@ const ModelDownloadRow: React.FC = ({ const { downloadModel, abortModelDownload } = useDownloadModel() const allDownloadStates = useAtomValue(modelDownloadStateAtom) const downloadState: DownloadState | undefined = allDownloadStates[fileName] + const downloadingModels = useAtomValue(getDownloadingModelAtom) const { requestCreateNewThread } = useCreateNewThread() const setMainViewState = useSetAtom(mainViewStateAtom) const assistants = useAtomValue(assistantsAtom) const downloadedModel = downloadedModels.find((md) => md.id === fileName) + const isDownloading = downloadingModels.some((md) => md === fileName) const setHfImportingStage = useSetAtom(importHuggingFaceModelStageAtom) - const defaultModel = useAtomValue(defaultModelAtom) - - const model = useMemo(() => { - if (!defaultModel) { - return undefined - } - - const model: Model = { - ...defaultModel, - sources: [ - { - url: downloadUrl, - filename: fileName, - }, - ], - id: fileName, - name: fileName, - created: Date.now(), - metadata: { - author: 'User', - tags: repoData.tags, - size: fileSize, - }, - } - return model - }, [fileName, fileSize, repoData, downloadUrl, defaultModel]) const onAbortDownloadClick = useCallback(() => { - if (model) { - abortModelDownload(model) + if (downloadUrl) { + abortModelDownload(normalizeModelId(downloadUrl)) } - }, [model, abortModelDownload]) + }, [downloadUrl, abortModelDownload]) const onDownloadClick = useCallback(async () => { - if (model) { - downloadModel(model) + if (downloadUrl) { + downloadModel( + downloadUrl, + normalizeModelId(downloadUrl), + normalizeModelId(downloadUrl) + ) } - }, [model, downloadModel]) + }, [downloadUrl, downloadModel]) const onUseModelClick = useCallback(async () => { if (assistants.length === 0) { @@ -111,14 +87,14 @@ const ModelDownloadRow: React.FC = ({ setHfImportingStage, ]) - if (!model) { + if (!downloadUrl) { return null } return (
      -
      -
      +
      +
      {quantization && ( {quantization} @@ -126,17 +102,18 @@ const ModelDownloadRow: React.FC = ({ )}

      {fileName}

      - - {toGibibytes(fileSize)} - +
      + + {toGibibytes(fileSize)} + +
      {downloadedModel ? ( @@ -144,11 +121,11 @@ const ModelDownloadRow: React.FC = ({ variant="soft" className="min-w-[98px]" onClick={onUseModelClick} - data-testid={`use-model-btn-${model.id}`} + data-testid={`use-model-btn-${downloadUrl}`} > Use - ) : downloadState != null ? ( + ) : isDownloading ? ( diff --git a/web/screens/Settings/ImportModelOptionModal/index.tsx b/web/screens/Settings/ImportModelOptionModal/index.tsx index 5a2af2335..f185b9015 100644 --- a/web/screens/Settings/ImportModelOptionModal/index.tsx +++ b/web/screens/Settings/ImportModelOptionModal/index.tsx @@ -15,13 +15,13 @@ import { importingModelsAtom } from '@/helpers/atoms/Model.atom' const importOptions: ModelImportOption[] = [ { - type: 'SYMLINK', + type: 'symlink', title: 'Keep Original Files & Symlink', description: 'You maintain your model files outside of Jan. Keeping your files where they are, and Jan will create a smart link to them.', }, { - type: 'MOVE_BINARY_FILE', + type: 'copy', title: 'Move model binary file', description: 'Jan will move your model binary file from your current folder into Jan Data Folder.', diff --git a/web/screens/Settings/MyModels/MyModelList/index.tsx b/web/screens/Settings/MyModels/MyModelList/index.tsx index c9ca6e867..2e87f3080 100644 --- a/web/screens/Settings/MyModels/MyModelList/index.tsx +++ b/web/screens/Settings/MyModels/MyModelList/index.tsx @@ -1,6 +1,6 @@ import { memo, useState } from 'react' -import { InferenceEngine, ModelFile } from '@janhq/core' +import { Model } from '@janhq/core' import { Badge, Button, Tooltip, useClickOutside } from '@janhq/joi' import { useAtom } from 'jotai' import { @@ -21,7 +21,7 @@ import { isLocalEngine } from '@/utils/modelEngine' import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom' type Props = { - model: ModelFile + model: Model groupTitle?: string } @@ -54,14 +54,14 @@ const MyModelList = ({ model }: Props) => {
      {model.name}
      - {model.engine === InferenceEngine.nitro && ( + {isLocalEngine(model.engine) && (

      {

      - {toGibibytes(model.metadata.size)} + {model.metadata?.size ? toGibibytes(model.metadata?.size) : '-'}
      {stateModel.loading && stateModel.model?.id === model.id ? ( @@ -99,7 +99,7 @@ const MyModelList = ({ model }: Props) => { Active @@ -107,7 +107,7 @@ const MyModelList = ({ model }: Props) => { ) : ( Inactive diff --git a/web/screens/Settings/MyModels/index.tsx b/web/screens/Settings/MyModels/index.tsx index 547e6153b..218f8cb62 100644 --- a/web/screens/Settings/MyModels/index.tsx +++ b/web/screens/Settings/MyModels/index.tsx @@ -116,7 +116,12 @@ const MyModels = () => { getAllSettings() }, []) - const findByEngine = filteredDownloadedModels.map((x) => x.engine) + const findByEngine = filteredDownloadedModels.map((x) => { + // Legacy engine support - they will be grouped under Cortex LlamaCPP + if (x.engine === InferenceEngine.nitro) + return InferenceEngine.cortex_llamacpp + return x.engine + }) const groupByEngine = findByEngine .filter(function (item, index) { if (findByEngine.indexOf(item) === index) return item @@ -245,7 +250,12 @@ const MyModels = () => {
      {filteredDownloadedModels ? filteredDownloadedModels - .filter((x) => x.engine === engine) + .filter( + (x) => + x.engine === engine || + (x.engine === InferenceEngine.nitro && + engine === InferenceEngine.cortex_llamacpp) + ) .map((model) => { if (!showModel) return null return ( diff --git a/web/screens/Settings/SelectingModelModal/index.tsx b/web/screens/Settings/SelectingModelModal/index.tsx index 6273d0032..9a2f4fe82 100644 --- a/web/screens/Settings/SelectingModelModal/index.tsx +++ b/web/screens/Settings/SelectingModelModal/index.tsx @@ -21,7 +21,7 @@ const SelectingModelModal = () => { const onSelectFileClick = useCallback(async () => { const platform = (await systemInformation()).osInfo?.platform - if (platform === 'win32') { + if (platform !== 'darwin') { setImportModelStage('CHOOSE_WHAT_TO_IMPORT') return } diff --git a/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx b/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx index 3127f1578..647263ffe 100644 --- a/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx +++ b/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx @@ -7,7 +7,13 @@ import { } from '@janhq/core' import { Input } from '@janhq/joi' -import { CopyIcon, EyeIcon, FolderOpenIcon } from 'lucide-react' +import { + CheckIcon, + CopyIcon, + EyeIcon, + EyeOffIcon, + FolderOpenIcon, +} from 'lucide-react' import { Marked, Renderer } from 'marked' type Props = { @@ -34,6 +40,7 @@ const SettingDetailTextInputItem = ({ const { value, type, placeholder, textAlign, inputActions } = settingProps.controllerProps as InputComponentProps const [obscure, setObscure] = useState(type === 'password') + const [copied, setCopied] = useState(false) const description = marked.parse(settingProps.description ?? '', { async: false, @@ -45,6 +52,10 @@ const SettingDetailTextInputItem = ({ const copy = useCallback(() => { navigator.clipboard.writeText(value) + if (value.length > 0) { + setCopied(true) + } + setTimeout(() => setCopied(false), 2000) // Reset icon after 2 seconds }, [value]) const onAction = useCallback( @@ -84,6 +95,8 @@ const SettingDetailTextInputItem = ({ } /> @@ -95,33 +108,51 @@ const SettingDetailTextInputItem = ({ type InputActionProps = { actions: InputAction[] onAction: (action: InputAction) => void + copied: boolean + obscure: boolean } const InputExtraActions: React.FC = ({ actions, onAction, + copied, + obscure, }) => { if (actions.length === 0) return return (
      {actions.map((action) => { + console.log(action) switch (action) { case 'copy': - return ( + return copied ? ( + onAction('copy')} + className="text-green-600" + /> + ) : ( onAction(action)} + onClick={() => onAction('copy')} /> ) case 'unobscure': - return ( + return obscure ? ( onAction(action)} + onClick={() => onAction('unobscure')} + /> + ) : ( + onAction('unobscure')} /> ) diff --git a/web/screens/Settings/SettingLeftPanel/SettingItem/index.tsx b/web/screens/Settings/SettingLeftPanel/SettingItem/index.tsx index 505a2cf40..b8c1994c5 100644 --- a/web/screens/Settings/SettingLeftPanel/SettingItem/index.tsx +++ b/web/screens/Settings/SettingLeftPanel/SettingItem/index.tsx @@ -1,6 +1,5 @@ import { useCallback } from 'react' -import { motion as m } from 'framer-motion' import { useAtom } from 'jotai' import { twMerge } from 'tailwind-merge' @@ -22,23 +21,20 @@ const SettingItem = ({ name, setting }: Props) => { return (
      {name} - {isActive && ( - - )}
      ) } diff --git a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx index 4dab6bfa8..f73efb486 100644 --- a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx +++ b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx @@ -38,20 +38,20 @@ const AssistantSetting: React.FC = ({ componentData }) => { (key === 'chunk_overlap' || key === 'chunk_size') ) { if ( - activeThread.assistants[0].tools[0]?.settings.chunk_size < - activeThread.assistants[0].tools[0]?.settings.chunk_overlap + activeThread.assistants[0].tools[0]?.settings?.chunk_size < + activeThread.assistants[0].tools[0]?.settings?.chunk_overlap ) { activeThread.assistants[0].tools[0].settings.chunk_overlap = activeThread.assistants[0].tools[0].settings.chunk_size } if ( key === 'chunk_size' && - value < activeThread.assistants[0].tools[0].settings.chunk_overlap + value < activeThread.assistants[0].tools[0].settings?.chunk_overlap ) { activeThread.assistants[0].tools[0].settings.chunk_overlap = value } else if ( key === 'chunk_overlap' && - value > activeThread.assistants[0].tools[0].settings.chunk_size + value > activeThread.assistants[0].tools[0].settings?.chunk_size ) { activeThread.assistants[0].tools[0].settings.chunk_size = value } diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx index 6fc05d44b..403370ade 100644 --- a/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx +++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx @@ -1,6 +1,5 @@ import { memo } from 'react' -import { InferenceEngine } from '@janhq/core' import { Button } from '@janhq/joi' import { useAtomValue, useSetAtom } from 'jotai' @@ -8,6 +7,8 @@ import LogoMark from '@/containers/Brand/Logo/Mark' import { MainViewState } from '@/constants/screens' +import { isLocalEngine } from '@/utils/modelEngine' + import { mainViewStateAtom } from '@/helpers/atoms/App.atom' import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom' @@ -15,8 +16,7 @@ const EmptyThread = () => { const downloadedModels = useAtomValue(downloadedModelsAtom) const setMainViewState = useSetAtom(mainViewStateAtom) const showOnboardingStep = - downloadedModels.filter((e) => e.engine === InferenceEngine.nitro) - .length === 0 + downloadedModels.filter((e) => isLocalEngine(e.engine)).length === 0 return (
      diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx index 0ef9a9ba1..0433a8688 100644 --- a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx +++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx @@ -69,7 +69,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => { return x.id === recommendModel[0] || x.id === recommendModel[1] } else { return ( - x.metadata.tags.includes('Featured') && x.metadata.size < 5000000000 + x.metadata?.tags?.includes('Featured') && x.metadata?.size < 5000000000 ) } }) @@ -143,7 +143,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => { ) : ( filteredModels.map((model) => { const isDownloading = downloadingModels.some( - (md) => md.id === model.id + (md) => md === model.id ) return (
      {
      - {toGibibytes(model.metadata.size)} + {toGibibytes(model.metadata?.size)} {!isDownloading ? ( downloadModel(model)} + onClick={() => + downloadModel( + model.sources[0].url, + model.id, + model.name + ) + } /> ) : ( Object.values(downloadStates) @@ -210,24 +216,24 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => { {featuredModel.slice(0, 2).map((featModel) => { const isDownloading = downloadingModels.some( - (md) => md.id === featModel.id + (md) => md === featModel.id ) return (
      -
      {featModel.name}
      +
      {featModel.name}
      {isDownloading ? ( -
      +
      {Object.values(downloadStates) .filter((x) => x.modelId === featModel.id) .map((item, i) => (
      {
      ))} + + {toGibibytes(featModel.metadata?.size)} +
      ) : (
      - {toGibibytes(featModel.metadata.size)} + {toGibibytes(featModel.metadata?.size)}
      )} diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx index 6e6beda07..70fecb8a9 100644 --- a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx +++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx @@ -5,7 +5,7 @@ import { MessageStatus } from '@janhq/core' import hljs from 'highlight.js' import { useAtom, useAtomValue } from 'jotai' -import { BaseEditor, createEditor, Editor, Element, Transforms } from 'slate' +import { BaseEditor, createEditor, Editor, Transforms } from 'slate' import { withHistory } from 'slate-history' // Import withHistory import { Editable, @@ -69,6 +69,9 @@ const RichTextEditor = ({ }: RichTextEditorProps) => { const [editor] = useState(() => withHistory(withReact(createEditor()))) const currentLanguage = useRef('plaintext') + const hasStartBackticks = useRef(false) + const hasEndBackticks = useRef(false) + const [currentPrompt, setCurrentPrompt] = useAtom(currentPromptAtom) const textareaRef = useRef(null) const activeThreadId = useAtomValue(getActiveThreadIdAtom) @@ -129,57 +132,35 @@ const RichTextEditor = ({ }) } - if (Editor.isBlock(editor, node) && node.type === 'code') { + if (Editor.isBlock(editor, node) && node.type === 'paragraph') { node.children.forEach((child: { text: any }, childIndex: number) => { const text = child.text - // Match code block start and end - const startMatch = text.match(/^```(\w*)$/) - const endMatch = text.match(/^```$/) - const inlineMatch = text.match(/^`([^`]+)`$/) // Match inline code + const codeBlockStartRegex = /```(\w*)/g + const matches = [...currentPrompt.matchAll(codeBlockStartRegex)] - if (startMatch) { + if (matches.length % 2 !== 0) { + hasEndBackticks.current = false + } + + // Match code block start and end + const lang = text.match(/^```(\w*)$/) + const endMatch = text.match(/^```$/) + + if (lang) { // If it's the start of a code block, store the language - currentLanguage.current = startMatch[1] || 'plaintext' + currentLanguage.current = lang[1] || 'plaintext' } else if (endMatch) { // Reset language when code block ends currentLanguage.current = 'plaintext' - } else if (inlineMatch) { - // Apply syntax highlighting to inline code - const codeContent = inlineMatch[1] // Get the content within the backticks - try { - hljs.highlight(codeContent, { - language: - currentLanguage.current.length > 1 - ? currentLanguage.current - : 'plaintext', - }).value - } catch (err) { - hljs.highlight(codeContent, { - language: 'javascript', - }).value - } - - // Calculate the range for the inline code - const length = codeContent.length - ranges.push({ - anchor: { - path: [...path, childIndex], - offset: inlineMatch.index + 1, - }, - focus: { - path: [...path, childIndex], - offset: inlineMatch.index + 1 + length, - }, - type: 'code', - code: true, - language: currentLanguage.current, - className: '', // Specify class name if needed - }) - } else if (currentLanguage.current !== 'plaintext') { + } else if ( + hasStartBackticks.current && + hasEndBackticks.current && + currentLanguage.current !== 'plaintext' + ) { // Highlight entire code line if in a code block - const leadingSpaces = text.match(/^\s*/)?.[0] ?? '' // Capture leading spaces - const codeContent = text.trimStart() // Remove leading spaces for highlighting + + const codeContent = text.trim() // Remove leading spaces for highlighting let highlighted = '' highlighted = hljs.highlightAuto(codeContent).value @@ -201,21 +182,9 @@ const RichTextEditor = ({ let slateTextIndex = 0 - // Adjust to include leading spaces in the ranges and preserve formatting - ranges.push({ - anchor: { path: [...path, childIndex], offset: 0 }, - focus: { - path: [...path, childIndex], - offset: leadingSpaces.length, - }, - type: 'code', - code: true, - language: currentLanguage.current, - className: '', // No class for leading spaces - }) - doc.body.childNodes.forEach((childNode) => { const childText = childNode.textContent || '' + const length = childText.length const className = childNode.nodeType === Node.ELEMENT_NODE @@ -225,11 +194,11 @@ const RichTextEditor = ({ ranges.push({ anchor: { path: [...path, childIndex], - offset: slateTextIndex + leadingSpaces.length, + offset: slateTextIndex, }, focus: { path: [...path, childIndex], - offset: slateTextIndex + leadingSpaces.length + length, + offset: slateTextIndex + length, }, type: 'code', code: true, @@ -240,6 +209,7 @@ const RichTextEditor = ({ slateTextIndex += length }) } else { + currentLanguage.current = 'plaintext' ranges.push({ anchor: { path: [...path, childIndex], offset: 0 }, focus: { path: [...path, childIndex], offset: text.length }, @@ -252,7 +222,7 @@ const RichTextEditor = ({ return ranges }, - [editor] + [currentPrompt, editor] ) // RenderLeaf applies the decoration styles @@ -287,10 +257,13 @@ const RichTextEditor = ({ ) useEffect(() => { + if (!ReactEditor.isFocused(editor)) { + ReactEditor.focus(editor) + } if (textareaRef.current) { textareaRef.current.focus() } - }, [activeThreadId]) + }, [activeThreadId, editor]) useEffect(() => { if (textareaRef.current?.clientHeight) { @@ -298,6 +271,10 @@ const RichTextEditor = ({ ? '100px' : '40px' textareaRef.current.style.height = textareaRef.current.scrollHeight + 'px' + textareaRef.current?.scrollTo({ + top: textareaRef.current.scrollHeight, + behavior: 'instant', + }) textareaRef.current.style.overflow = textareaRef.current.clientHeight >= 390 ? 'auto' : 'hidden' } @@ -341,35 +318,6 @@ const RichTextEditor = ({ resetEditor() } else onStopInferenceClick() } - - if (event.key === '`') { - // Determine whether any of the currently selected blocks are code blocks. - const [match] = Editor.nodes(editor, { - match: (n) => - Element.isElement(n) && (n as CustomElement).type === 'code', - }) - // Toggle the block type dependsing on whether there's already a match. - Transforms.setNodes( - editor, - { type: match ? 'paragraph' : 'code' }, - { match: (n) => Element.isElement(n) && Editor.isBlock(editor, n) } - ) - } - - if (event.key === 'Tab') { - const [match] = Editor.nodes(editor, { - match: (n) => { - return (n as CustomElement).type === 'code' - }, - mode: 'lowest', - }) - - if (match) { - event.preventDefault() - // Insert a tab character - Editor.insertText(editor, ' ') // Insert 2 spaces - } - } }, // eslint-disable-next-line react-hooks/exhaustive-deps [currentPrompt, editor, messages] @@ -390,6 +338,25 @@ const RichTextEditor = ({ .join('\n') setCurrentPrompt(combinedText) + if (combinedText.trim() === '') { + currentLanguage.current = 'plaintext' + } + const hasCodeBlockStart = combinedText.match(/^```(\w*)/m) + const hasCodeBlockEnd = combinedText.match(/^```$/m) + + // Set language to plaintext if no code block with language identifier is found + if (!hasCodeBlockStart) { + currentLanguage.current = 'plaintext' + hasStartBackticks.current = false + } else { + hasStartBackticks.current = true + } + if (!hasCodeBlockEnd) { + currentLanguage.current = 'plaintext' + hasEndBackticks.current = false + } else { + hasEndBackticks.current = true + } }} > {
      {renderPreview(fileUpload)} - { disabled={stateModel.loading || !activeThread} />