diff --git a/.github/workflows/jan-docs.yml b/.github/workflows/jan-docs.yml index 2993d1525..bc8473687 100644 --- a/.github/workflows/jan-docs.yml +++ b/.github/workflows/jan-docs.yml @@ -6,7 +6,7 @@ on: - main paths: - 'docs/**' - tags: ["v[0-9]+.[0-9]+.[0-9]+-docs"] + - '.github/workflows/jan-docs.yml' pull_request: branches: - main @@ -24,7 +24,7 @@ jobs: CLOUDFLARE_PROJECT_NAME: jan runs-on: ubuntu-latest permissions: - contents: read + contents: write deployments: write pull-requests: write steps: @@ -78,13 +78,13 @@ jobs: Preview URL: ${{ steps.deployCloudflarePages.outputs.url }} - name: Add Custome Domain file - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + if: github.event_name == 'push' && github.event.pull_request.head.repo.full_name != github.repository run: echo "${{ vars.DOCUSAURUS_DOMAIN }}" > ./docs/build/CNAME # Popular action to deploy to GitHub Pages: # Docs: https://github.com/peaceiris/actions-gh-pages#%EF%B8%8F-docusaurus - name: Deploy to GitHub Pages - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + if: github.event_name == 'push' && github.event.pull_request.head.repo.full_name != github.repository uses: peaceiris/actions-gh-pages@v3 with: github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/jan-electron-build.yml b/.github/workflows/jan-electron-build.yml index db585e3c1..118d6c3a0 100644 --- a/.github/workflows/jan-electron-build.yml +++ b/.github/workflows/jan-electron-build.yml @@ -28,12 +28,7 @@ jobs: - name: Update app version base on tag run: | - if [[ ! "${VERSION_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then - echo "Error: Tag is not valid!" - exit 1 - fi - jq --arg version "${VERSION_TAG#v}" '.version = $version' electron/package.json > /tmp/package.json - mv /tmp/package.json electron/package.json + make update-app-version env: VERSION_TAG: ${{ steps.tag.outputs.tag }} @@ -49,25 +44,9 @@ jobs: p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }} p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }} - - - name: Build uikit - run: | - cd uikit - yarn install - yarn build - - - name: Install yarn dependencies - run: | - yarn build:core - yarn install - yarn build:plugins - env: - APP_PATH: "." - DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }} - - name: Build and publish app run: | - yarn build:publish + make build-and-publish env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} CSC_LINK: "/tmp/codesign.p12" @@ -75,6 +54,8 @@ jobs: CSC_IDENTITY_AUTO_DISCOVERY: "true" APPLE_ID: ${{ secrets.APPLE_ID }} APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }} + APP_PATH: "." + DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }} build-windows-x64: runs-on: windows-latest @@ -99,34 +80,13 @@ jobs: - name: Update app version base on tag shell: bash run: | - if [[ ! "${VERSION_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then - echo "Error: Tag is not valid!" - exit 1 - fi - jq --arg version "${VERSION_TAG#v}" '.version = $version' electron/package.json > /tmp/package.json - mv /tmp/package.json electron/package.json + make update-app-version env: VERSION_TAG: ${{ steps.tag.outputs.tag }} - - name: Build uikit - run: | - cd uikit - yarn config set network-timeout 300000 - yarn install - yarn build - - - name: Install yarn dependencies - shell: powershell - run: | - yarn config set network-timeout 300000 - yarn build:core - yarn install - $env:NITRO_VERSION = Get-Content .\plugins\inference-plugin\nitro\version.txt; echo $env:NITRO_VERSION - yarn build:plugins - - name: Build and publish app run: | - yarn build:publish + make build-and-publish env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -158,31 +118,13 @@ jobs: - name: Update app version base on tag run: | - if [[ ! "${VERSION_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then - echo "Error: Tag is not valid!" - exit 1 - fi - jq --arg version "${VERSION_TAG#v}" '.version = $version' electron/package.json > /tmp/package.json - mv /tmp/package.json electron/package.json + make update-app-version env: VERSION_TAG: ${{ steps.tag.outputs.tag }} - - name: Build uikit - run: | - cd uikit - yarn install - yarn build - - - name: Install yarn dependencies - run: | - yarn config set network-timeout 300000 - yarn build:core - yarn install - yarn build:plugins - - name: Build and publish app run: | - yarn build:publish + make build-and-publish env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/jan-electron-linter-and-test.yml b/.github/workflows/jan-electron-linter-and-test.yml index 92f29c368..7717afbd4 100644 --- a/.github/workflows/jan-electron-linter-and-test.yml +++ b/.github/workflows/jan-electron-linter-and-test.yml @@ -43,21 +43,9 @@ jobs: with: node-version: 20 - - name: Build uikit - run: | - cd uikit - yarn install - yarn build - - name: Linter and test run: | - yarn config set network-timeout 300000 - yarn build:core - yarn install - yarn lint - yarn build:plugins - yarn build:test - yarn test + make test env: CSC_IDENTITY_AUTO_DISCOVERY: "false" @@ -81,24 +69,10 @@ jobs: with: node-version: 20 - - name: Build uikit - run: | - yarn config set network-timeout 300000 - cd uikit - yarn install - yarn build - - name: Linter and test shell: powershell run: | - yarn config set network-timeout 300000 - yarn build:core - yarn install - $env:NITRO_VERSION = Get-Content .\plugins\inference-plugin\nitro\version.txt; echo $env:NITRO_VERSION - yarn build:plugins - yarn build:test - $env:CI="e2e" - yarn test + make test test-on-ubuntu: runs-on: [self-hosted, Linux, ubuntu-desktop] @@ -118,19 +92,8 @@ jobs: with: node-version: 20 - - name: Build uikit - run: | - cd uikit - yarn install - yarn build - - name: Linter and test run: | export DISPLAY=$(w -h | awk 'NR==1 {print $2}') echo -e "Display ID: $DISPLAY" - yarn config set network-timeout 300000 - yarn build:core - yarn install - yarn build:plugins - yarn build:test - yarn test + make test diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..3d04b745f --- /dev/null +++ b/Makefile @@ -0,0 +1,62 @@ +# Makefile for Jan Electron App - Build, Lint, Test, and Clean + +# Default target, does nothing +all: + @echo "Specify a target to run" + +# Builds the UI kit +build-uikit: +ifeq ($(OS),Windows_NT) + cd uikit && yarn config set network-timeout 300000 && yarn install && yarn build +else + cd uikit && yarn install && yarn build +endif +# Updates the app version based on the tag +update-app-version: + if [[ ! "${VERSION_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then \ + echo "Error: Tag is not valid!"; \ + exit 1; \ + fi + jq --arg version "${VERSION_TAG#v}" '.version = $version' electron/package.json > /tmp/package.json + mv /tmp/package.json electron/package.json + +# Installs yarn dependencies and builds core and plugins +install-and-build: build-uikit +ifeq ($(OS),Windows_NT) + powershell -Command "yarn config set network-timeout 300000; \ + $$env:NITRO_VERSION = Get-Content .\\plugins\\inference-plugin\\nitro\\version.txt; \ + Write-Output \"Nitro version: $$env:NITRO_VERSION\"; yarn build:core; yarn install; yarn build:plugins" +else + yarn build:core + yarn install + yarn build:plugins +endif + +dev: install-and-build + yarn dev + +# Linting +lint: install-and-build + yarn lint + +# Testing +test: lint + yarn build:test + yarn test + +# Builds and publishes the app +build-and-publish: install-and-build + yarn build:publish + +# Build +build: install-and-build + yarn build + +clean: +ifeq ($(OS),Windows_NT) + powershell -Command "Get-ChildItem -Path . -Include node_modules, .next, dist -Recurse -Directory | Remove-Item -Recurse -Force" +else + find . -name "node_modules" -type d -prune -exec rm -rf '{}' + + find . -name ".next" -type d -exec rm -rf '{}' + + find . -name "dist" -type d -exec rm -rf '{}' + +endif diff --git a/README.md b/README.md index 8405eb74e..75214d940 100644 --- a/README.md +++ b/README.md @@ -91,11 +91,10 @@ Contributions are welcome! Please read the [CONTRIBUTING.md](CONTRIBUTING.md) fi - node >= 20.0.0 - yarn >= 1.22.0 +- make >= 3.81 ### Instructions -Note: This instruction is tested on MacOS only. - 1. **Clone the Repository:** ```bash @@ -104,25 +103,10 @@ Note: This instruction is tested on MacOS only. cd jan ``` -2. **Install dependencies:** - -```bash - yarn install - - # Build core module - yarn build:core - - # Packing base plugins - yarn build:plugins - - # Packing uikit - yarn build:uikit -``` - -3. **Run development and Using Jan Desktop** +2. **Run development and Using Jan Desktop** ``` - yarn dev + make dev ``` This will start the development server and open the desktop app. @@ -134,19 +118,9 @@ Note: This instruction is tested on MacOS only. # Do step 1 and 2 in previous section git clone https://github.com/janhq/jan cd jan -yarn install - -# Build core module -yarn build:core - -# Package base plugins -yarn build:plugins - -# Packing uikit -yarn build:uikit # Build the app -yarn build +make build ``` This will build the app MacOS m1/m2 for production (with code signing already done) and put the result in `dist` folder. diff --git a/core/src/plugins/inference.ts b/core/src/plugins/inference.ts index 663d0b258..6fc93ed37 100644 --- a/core/src/plugins/inference.ts +++ b/core/src/plugins/inference.ts @@ -1,4 +1,4 @@ -import { MessageRequest } from "../index"; +import { MessageRequest, ThreadMessage } from "../index"; import { JanPlugin } from "../plugin"; /** @@ -16,10 +16,15 @@ export abstract class InferencePlugin extends JanPlugin { */ abstract stopModel(): Promise; + /** + * Stops the streaming inference. + */ + abstract stopInference(): Promise; + /** * Processes an inference request. * @param data - The data for the inference request. * @returns The result of the inference request. */ - abstract inferenceRequest(data: MessageRequest): Promise; + abstract inferenceRequest(data: MessageRequest): Promise; } diff --git a/docs/README.md b/docs/README.md index 9baac2b29..63d21064b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -43,9 +43,9 @@ If you are using GitHub pages for hosting, this command is a convenient way to b ### Preview URL, Pre-release and Publishing Documentation -When a PR is created, the preview URL will be automatically commented on the PR. For staging or pre-release documentation, use the following domains [https://pre-release.jan.ai/](https://pre-release.jan.ai/) +When a PR is created, the preview URL will be automatically commented on the PR. -To officially publish documentation, create a tag in the format `vx.y.z-docs` (e.g., `v0.1.1-docs`) on the `main` branch. The documentation will then be published to [https://jan.ai/](https://jan.ai/) +The documentation will then be published to [https://jan.ai/](https://jan.ai/) when the PR is merged to `main`. ### Additional Plugins - @docusaurus/theme-live-codeblock diff --git a/docs/docs/specs/messages.md b/docs/docs/specs/messages.md index 66e4eeaa2..dbee33fe4 100644 --- a/docs/docs/specs/messages.md +++ b/docs/docs/specs/messages.md @@ -2,57 +2,114 @@ title: Messages --- -:::warning +:::caution -Draft Specification: functionality has not been implemented yet. - -Feedback: [HackMD: Threads Spec](https://hackmd.io/BM_8o_OCQ-iLCYhunn2Aug) +This is currently under development. ::: -Messages are within `threads` and capture additional metadata. -> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/messages +## Overview -## Message Object -> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/messages/object -```json -{ - // Jan specific properties - "updatedAt": "...", // that's it I think +`Messages` capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats). - // OpenAI compatible properties: https://platform.openai.com/docs/api-reference/messages) - "id": "msg_dKYDWyQvtjDBi3tudL1yWKDa", - "object": "thread.message", - "created_at": 1698983503, - "thread_id": "thread_RGUhOuO9b2nrktrmsQ2uSR6I", - "role": "assistant", - "content": [ - { - "type": "text", - "text": { - "value": "Hi! How can I help you today?", - "annotations": [] - } - } - ], - "file_ids": [], - "assistant_id": "asst_ToSF7Gb04YMj8AMMm50ZLLtY", - "run_id": "run_BjylUJgDqYK9bOhy4yjAiMrn", - "metadata": {} -} +- Users and assistants can send multimedia messages. +- An [OpenAI Message API](https://platform.openai.com/docs/api-reference/messages) compatible endpoint at `localhost:3000/v1/messages`. + +## Folder Structure + +Messages are saved in the `/threads/{thread_id}` folder in `messages.jsonl` files + +```sh +jan/ + threads/ + assistant_name_unix_timestamp/ + ... + messages.jsonl + jan_2341243134/ + ... + messages.jsonl ``` -## Messages API -> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/messages +## `message.jsonl` +Individual messages are saved in `jsonl` format for indexing purposes. + +```json +{...message_2} +{...message_1} +{...message_0} +``` + +### Examples + +Here's a standard example `message` sent from a user. + +```json +"id": "0", // Sequential or UUID +"object": "thread.message", // Defaults to "thread.message" +"created_at": 1698983503, +"thread_id": "thread_asdf", // Defaults to parent thread +"assistant_id": "jan", // Defaults to parent thread +"role": "user", // From either "user" or "assistant" +"content": [ + { + "type": "text", + "text": { + "value": "Hi!?", + "annotations": [] + } + } +], +"metadata": {}, // Defaults to {} +// "run_id": "...", // Rather than `run` id abstraction +// "file_ids": [], +``` + +Here's an example `message` response from an assistant. + +```json +"id": "0", // Sequential or UUID +"object": "thread.message", // Defaults to "thread.message" +"created_at": 1698983503, +"thread_id": "thread_asdf", // Defaults to parent thread +"assistant_id": "jan", // Defaults to parent thread +"role": "assistant", // From either "user" or "assistant" +"content": [ // Usually from Chat Completion obj + { + "type": "text", + "text": { + "value": "Hi! How can I help you today?", + "annotations": [] + } + } +], +"metadata": {}, // Defaults to {} +// "run_id": "...", // KIV +// "file_ids": [], // KIV +// "usage": {} // KIV: saving chat completion properties https://platform.openai.com/docs/api-reference/chat/object +``` + +## API Reference + +Jan's `messages` API is compatible with [OpenAI's Messages API](https://platform.openai.com/docs/api-reference/messages), with additional methods for managing messages locally. + +See [Jan Messages API](https://jan.ai/api-reference#tag/Messages) + + + diff --git a/docs/docs/specs/models.md b/docs/docs/specs/models.md index 851626431..471904bc5 100644 --- a/docs/docs/specs/models.md +++ b/docs/docs/specs/models.md @@ -4,7 +4,7 @@ title: Models :::caution -Draft Specification: functionality has not been implemented yet. +This is currently under development. ::: @@ -46,19 +46,19 @@ jan/ # Jan root folder - `model.json` contains metadata and default parameters used to run a model. - The only required field is `source_url`. -### GGUF Example +### Example Here's a standard example `model.json` for a GGUF model. - `source_url`: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/. ```json -"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf", -"type": "model", // Defaults to "model" -"version": "1", // Defaults to 1 "id": "zephyr-7b" // Defaults to foldername +"object": "model", // Defaults to "model" +"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf", "name": "Zephyr 7B" // Defaults to foldername "owned_by": "you" // Defaults to you +"version": "1", // Defaults to 1 "created": 1231231 // Defaults to file creation time "description": "" "state": enum[null, "downloading", "ready", "starting", "stopping", ...] @@ -95,7 +95,7 @@ See [Jan Models API](https://jan.ai/api-reference#tag/Models) :::caution -This is current under development. +This is currently under development. ::: diff --git a/docs/docs/specs/threads.md b/docs/docs/specs/threads.md index d5bc15c8f..c9d9c9d4c 100644 --- a/docs/docs/specs/threads.md +++ b/docs/docs/specs/threads.md @@ -2,89 +2,101 @@ title: Threads --- -:::warning +:::caution -Draft Specification: functionality has not been implemented yet. - -Feedback: [HackMD: Threads Spec](https://hackmd.io/BM_8o_OCQ-iLCYhunn2Aug) +This is currently under development. ::: -## User Stories +## Overview -_Users can chat with an assistant in a thread_ +`Threads` are conversations between an `assistant` and the user: -- See [Messages Spec](./messages.md) +- Users can tweak `model` params and `assistant` behavior within each thread. +- Users can import and export threads. +- An [OpenAI Thread API](https://platform.openai.com/docs/api-reference/threads) compatible endpoint at `localhost:3000/v1/threads`. -_Users can change assistant and model parameters in a thread_ +## Folder Structure -- Wireframes of - -_Users can delete all thread history_ - -- Wireframes of settings page. - -## Jan Thread Object - -- A `Jan Thread Object` is a "representation of a conversation thread" between an `assistant` and the user -- Objects are defined by `thread-uuid.json` files in `json` format -- Objects are designed to be compatible with `OpenAI Thread Objects` with additional properties needed to run on our infrastructure. -- Objects contain a `models` field, to track when the user overrides the assistant's default model parameters. - -| Property | Type | Description | Validation | -| ---------- | ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------ | -| `object` | enum: `model`, `assistant`, `thread`, `message` | The Jan Object type | Defaults to `thread` | -| `assistants` | array | An array of Jan Assistant Objects. Threads can "override" an assistant's parameters. Thread-level model parameters are directly saved in the `thread.models` property! (see Models spec) | Defaults to `assistant.name` | -| `messages` | array | An array of Jan Message Objects. (see Messages spec) | Defaults to `[]` | -| `metadata` | map | Useful for storing additional information about the object in a structured format. | Defaults to `{}` | - -### Generic Example - -```json -// janroot/threads/jan_1700123404.json -"assistants": ["assistant-123"], -"messages": [ - {...message0}, {...message1} -], -"metadata": { - "summary": "funny physics joke", -}, -``` - -## Filesystem - -- `Jan Thread Objects`'s `json` files always has the naming schema: `assistant_uuid` + `unix_time_thread_created_at. See below. -- Threads are all saved in the `janroot/threads` folder in a flat folder structure. -- The folder is standalone and can be easily zipped, exported, and cleared. +- Threads are saved in the `/threads` folder. +- Threads are organized by folders, one for each thread, and can be easily zipped, exported, and cleared. +- Thread folders follow the naming: `assistant_id` + `thread_created_at`. +- Thread folders also contain `messages.jsonl` files. See [messages](/specs/messages). ```sh -janroot/ +jan/ threads/ - jan_1700123404.json - homework_helper_700120003.json + assistant_name_unix_timestamp/ + thread.json + messages.jsonl + jan_2341243134/ + thread.json ``` -## Jan API +## `thread.json` + +- Each `thread` folder contains a `thread.json` file, which is a representation of a thread. +- `thread.json` contains metadata and model parameter overrides. +- There are no required fields. + +### Example + +Here's a standard example `thread.json` for a conversation between the user and the default Jan assistant. + +```json +"id": "thread_....", // Defaults to foldername +"object": "thread", // Defaults to "thread" +"title": "funny physics joke", // Defaults to "" +"assistants": [ + { + "assistant_id": "jan", // Defaults to "jan" + "model": { // Defaults to 1 currently active model (can be changed before thread is begun) + "settings": {}, // Defaults to and overrides assistant.json's "settings" (and if none, then model.json "settings") + "parameters": {}, // Defaults to and overrides assistant.json's "parameters" (and if none, then model.json "parameters") + } + }, +], +"created": 1231231 // Defaults to file creation time +"metadata": {}, // Defaults to {} +``` + +## API Reference + +Jan's Threads API is compatible with [OpenAI's Threads API](https://platform.openai.com/docs/api-reference/threads), with additional methods for managing threads locally. + +See [Jan Threads API](https://jan.ai/api-reference#tag/Threads) + + + Can achieve this goal by calling `Get Thread` API + +-> Can achieve this goal by calling `Get Thread` API --> diff --git a/docs/sidebars.js b/docs/sidebars.js index 83eb50f5f..035e51bd0 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -67,8 +67,8 @@ const sidebars = { items: [ "specs/chats", "specs/models", - // "specs/threads", - // "specs/messages", + "specs/threads", + "specs/messages", // "specs/assistants", // "specs/files", // "specs/jan", diff --git a/docs/src/styles/tweaks.scss b/docs/src/styles/tweaks.scss index 8c5c7a8e9..d5d016436 100644 --- a/docs/src/styles/tweaks.scss +++ b/docs/src/styles/tweaks.scss @@ -97,6 +97,9 @@ @apply leading-loose; line-height: 1.5; } + + div { + margin-top: 12px; + } } } @@ -113,3 +116,11 @@ .table-of-contents { font-size: 14px; } + +.task-list-item { + list-style: none; +} + +blockquote { + margin-bottom: 12px; +} diff --git a/plugins/inference-plugin/nitro/version.txt b/plugins/inference-plugin/nitro/version.txt index 84aa3a7dd..a34eaa5d0 100644 --- a/plugins/inference-plugin/nitro/version.txt +++ b/plugins/inference-plugin/nitro/version.txt @@ -1 +1 @@ -0.1.8 \ No newline at end of file +0.1.11 \ No newline at end of file diff --git a/plugins/inference-plugin/package.json b/plugins/inference-plugin/package.json index 3b7159c3c..f19c864cd 100644 --- a/plugins/inference-plugin/package.json +++ b/plugins/inference-plugin/package.json @@ -36,6 +36,7 @@ "dependencies": { "@janhq/core": "file:../../core", "download-cli": "^1.1.1", + "electron-log": "^5.0.1", "fetch-retry": "^5.0.6", "kill-port": "^2.0.1", "path-browserify": "^1.0.1", @@ -55,6 +56,7 @@ "bundleDependencies": [ "tcp-port-used", "kill-port", - "fetch-retry" + "fetch-retry", + "electron-log" ] } diff --git a/plugins/inference-plugin/src/helpers/sse.ts b/plugins/inference-plugin/src/helpers/sse.ts index f63cc027b..987751221 100644 --- a/plugins/inference-plugin/src/helpers/sse.ts +++ b/plugins/inference-plugin/src/helpers/sse.ts @@ -4,7 +4,7 @@ import { Observable } from "rxjs"; * @param recentMessages - An array of recent messages to use as context for the inference. * @returns An Observable that emits the generated response as a string. */ -export function requestInference(recentMessages: any[]): Observable { +export function requestInference(recentMessages: any[], controller?: AbortController): Observable { return new Observable((subscriber) => { const requestBody = JSON.stringify({ messages: recentMessages, @@ -20,6 +20,7 @@ export function requestInference(recentMessages: any[]): Observable { "Access-Control-Allow-Origin": "*", }, body: requestBody, + signal: controller?.signal }) .then(async (response) => { const stream = response.body; diff --git a/plugins/inference-plugin/src/index.ts b/plugins/inference-plugin/src/index.ts index 8cabf0343..4f358dc56 100644 --- a/plugins/inference-plugin/src/index.ts +++ b/plugins/inference-plugin/src/index.ts @@ -28,6 +28,8 @@ import { fs } from "@janhq/core"; * It also subscribes to events emitted by the @janhq/core package and handles new message requests. */ export default class JanInferencePlugin implements InferencePlugin { + controller = new AbortController(); + isCancelled = false; /** * Returns the type of the plugin. * @returns {PluginType} The type of the plugin. @@ -40,7 +42,9 @@ export default class JanInferencePlugin implements InferencePlugin { * Subscribes to events emitted by the @janhq/core package. */ onLoad(): void { - events.on(EventName.OnNewMessageRequest, this.handleMessageRequest); + events.on(EventName.OnNewMessageRequest, (data) => + JanInferencePlugin.handleMessageRequest(data, this) + ); } /** @@ -70,23 +74,32 @@ export default class JanInferencePlugin implements InferencePlugin { return executeOnMain(MODULE, "killSubprocess"); } + /** + * Stops streaming inference. + * @returns {Promise} A promise that resolves when the streaming is stopped. + */ + async stopInference(): Promise { + this.isCancelled = true; + this.controller?.abort(); + } + /** * Makes a single response inference request. * @param {MessageRequest} data - The data for the inference request. * @returns {Promise} A promise that resolves with the inference response. */ - async inferenceRequest(data: MessageRequest): Promise { - const message = { - ...data, - message: "", - user: "assistant", + async inferenceRequest(data: MessageRequest): Promise { + const message: ThreadMessage = { + threadId: data.threadId, + content: "", createdAt: new Date().toISOString(), + status: MessageStatus.Ready, }; return new Promise(async (resolve, reject) => { requestInference(data.messages ?? []).subscribe({ next: (content) => { - message.message = content; + message.content = content; }, complete: async () => { resolve(message); @@ -100,9 +113,14 @@ export default class JanInferencePlugin implements InferencePlugin { /** * Handles a new message request by making an inference request and emitting events. + * Function registered in event manager, should be static to avoid binding issues. + * Pass instance as a reference. * @param {MessageRequest} data - The data for the new message request. */ - private async handleMessageRequest(data: MessageRequest) { + private static async handleMessageRequest( + data: MessageRequest, + instance: JanInferencePlugin + ) { const message: ThreadMessage = { threadId: data.threadId, content: "", @@ -113,7 +131,10 @@ export default class JanInferencePlugin implements InferencePlugin { }; events.emit(EventName.OnNewMessageResponse, message); - requestInference(data.messages).subscribe({ + instance.isCancelled = false; + instance.controller = new AbortController(); + + requestInference(data.messages, instance.controller).subscribe({ next: (content) => { message.content = content; events.emit(EventName.OnMessageResponseUpdate, message); @@ -125,7 +146,8 @@ export default class JanInferencePlugin implements InferencePlugin { }, error: async (err) => { message.content = - message.content.trim() + "\n" + "Error occurred: " + err.message; + message.content.trim() + + (instance.isCancelled ? "" : "\n" + "Error occurred: " + err.message); message.status = MessageStatus.Ready; events.emit(EventName.OnMessageResponseUpdate, message); }, diff --git a/plugins/inference-plugin/src/module.ts b/plugins/inference-plugin/src/module.ts index a9e60e4cd..a1a1d4ea0 100644 --- a/plugins/inference-plugin/src/module.ts +++ b/plugins/inference-plugin/src/module.ts @@ -5,6 +5,8 @@ const { spawn } = require("child_process"); const tcpPortUsed = require("tcp-port-used"); const fetchRetry = require("fetch-retry")(global.fetch); +const log = require("electron-log"); + // The PORT to use for the Nitro subprocess const PORT = 3928; const LOCAL_HOST = "127.0.0.1"; @@ -35,6 +37,7 @@ interface InitModelResponse { function initModel(modelFile: string): Promise { // 1. Check if the model file exists currentModelFile = modelFile; + log.info("Started to load model " + modelFile); return ( // 1. Check if the port is used, if used, attempt to unload model / kill nitro process @@ -42,13 +45,12 @@ function initModel(modelFile: string): Promise { .then(checkAndUnloadNitro) // 2. Spawn the Nitro subprocess .then(spawnNitroProcess) - // 3. Wait until the port is used (Nitro http server is up) - .then(() => tcpPortUsed.waitUntilUsed(PORT, 300, 30000)) // 4. Load the model into the Nitro subprocess (HTTP POST request) .then(loadLLMModel) // 5. Check if the model is loaded successfully .then(validateModelStatus) .catch((err) => { + log.error("error: " + JSON.stringify(err)); return { error: err }; }) ); @@ -63,6 +65,7 @@ function loadLLMModel(): Promise { llama_model_path: currentModelFile, ctx_len: 2048, ngl: 100, + cont_batching: false, embedding: false, // Always enable embedding mode on }; @@ -77,6 +80,7 @@ function loadLLMModel(): Promise { retryDelay: 500, }).catch((err) => { console.error(err); + log.error("error: " + JSON.stringify(err)); // Fetch error, Nitro server might not started properly throw new Error("Model loading failed."); }); @@ -112,7 +116,8 @@ async function validateModelStatus(): Promise { return { error: "Model loading failed" }; }) .catch((err) => { - return { error: `Model loading failed. ${err.message}` }; + log.error("Model loading failed" + err.toString()); + return { error: `Model loading failed.` }; }); } @@ -158,46 +163,53 @@ function checkAndUnloadNitro() { * Using child-process to spawn the process * Should run exactly platform specified Nitro binary version */ -function spawnNitroProcess() { - let binaryFolder = path.join(__dirname, "nitro"); // Current directory by default - let binaryName; +async function spawnNitroProcess(): Promise { + return new Promise((resolve, reject) => { + let binaryFolder = path.join(__dirname, "nitro"); // Current directory by default + let binaryName; - if (process.platform === "win32") { - // Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries - binaryName = "win-start.bat"; - } else if (process.platform === "darwin") { - // Mac OS platform - if (process.arch === "arm64") { - binaryFolder = path.join(binaryFolder, "mac-arm64"); + if (process.platform === "win32") { + // Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries + binaryName = "win-start.bat"; + } else if (process.platform === "darwin") { + // Mac OS platform + if (process.arch === "arm64") { + binaryFolder = path.join(binaryFolder, "mac-arm64"); + } else { + binaryFolder = path.join(binaryFolder, "mac-x64"); + } + binaryName = "nitro"; } else { - binaryFolder = path.join(binaryFolder, "mac-x64"); + // Linux + // Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries + binaryName = "linux-start.sh"; // For other platforms } - binaryName = "nitro"; - } else { - // Linux - // Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries - binaryName = "linux-start.sh"; // For other platforms - } - const binaryPath = path.join(binaryFolder, binaryName); + const binaryPath = path.join(binaryFolder, binaryName); - // Execute the binary - subprocess = spawn(binaryPath, [1, "0.0.0.0", PORT], { - cwd: binaryFolder, - }); + // Execute the binary + subprocess = spawn(binaryPath, [1, "0.0.0.0", PORT], { + cwd: binaryFolder, + }); - // Handle subprocess output - subprocess.stdout.on("data", (data) => { - console.log(`stdout: ${data}`); - }); + // Handle subprocess output + subprocess.stdout.on("data", (data) => { + console.log(`stdout: ${data}`); + }); - subprocess.stderr.on("data", (data) => { - console.error(`stderr: ${data}`); - }); + subprocess.stderr.on("data", (data) => { + log.error("subprocess error:" + data.toString()); + console.error(`stderr: ${data}`); + }); - subprocess.on("close", (code) => { - console.log(`child process exited with code ${code}`); - subprocess = null; + subprocess.on("close", (code) => { + console.log(`child process exited with code ${code}`); + subprocess = null; + reject(`Nitro process exited. ${code ?? ""}`); + }); + tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => { + resolve(); + }); }); } @@ -206,11 +218,14 @@ function spawnNitroProcess() { * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. */ function validateModelVersion(): Promise { + log.info("validateModelVersion"); // Read the file return new Promise((resolve, reject) => { fs.open(currentModelFile, "r", (err, fd) => { if (err) { + log.error("validateModelVersion error" + JSON.stringify(err)); console.error(err.message); + reject(err); return; } @@ -220,7 +235,13 @@ function validateModelVersion(): Promise { // Model version will be the 5th byte of the file fs.read(fd, buffer, 0, 1, 4, (err, bytesRead, buffer) => { if (err) { + log.error("validateModelVersion open error" + JSON.stringify(err)); console.error(err.message); + fs.close(fd, (err) => { + log.error("validateModelVersion close error" + JSON.stringify(err)); + if (err) console.error(err.message); + }); + reject(err); } else { // Interpret the byte as ASCII if (buffer[0] === 0x01) { diff --git a/web/containers/ModalCancelDownload/index.tsx b/web/containers/ModalCancelDownload/index.tsx index 3413d02c4..33476fda1 100644 --- a/web/containers/ModalCancelDownload/index.tsx +++ b/web/containers/ModalCancelDownload/index.tsx @@ -16,7 +16,6 @@ import { import { atom, useAtomValue } from 'jotai' import { useDownloadState } from '@/hooks/useDownloadState' -import useGetPerformanceTag from '@/hooks/useGetPerformanceTag' import { formatDownloadPercentage } from '@/utils/converter' @@ -30,7 +29,6 @@ export default function ModalCancelDownload({ isFromList, }: Props) { const { modelDownloadStateAtom } = useDownloadState() - useGetPerformanceTag() const downloadAtom = useMemo( () => atom((get) => get(modelDownloadStateAtom)[suitableModel.name]), // eslint-disable-next-line react-hooks/exhaustive-deps diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts index 57e3a0118..20b8aec15 100644 --- a/web/hooks/useActiveModel.ts +++ b/web/hooks/useActiveModel.ts @@ -49,7 +49,7 @@ export function useActiveModel() { console.debug('Init model: ', modelId) const path = join('models', model.name, modelId) const res = await initModel(path) - if (res?.error) { + if (res?.error && (!activeModel?.id || modelId === activeModel?.id)) { const errorMessage = `${res.error}` alert(errorMessage) setStateModel(() => ({ diff --git a/web/hooks/useGetPerformanceTag.ts b/web/hooks/useGetPerformanceTag.ts index 4c9934778..435f82c4c 100644 --- a/web/hooks/useGetPerformanceTag.ts +++ b/web/hooks/useGetPerformanceTag.ts @@ -1,40 +1,37 @@ -import { useState } from 'react' - import { ModelVersion } from '@janhq/core/lib/types' -import { useAtomValue } from 'jotai' import { ModelPerformance, TagType } from '@/constants/tagType' -import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom' - // Recommendation: // `Recommended (green)`: "Max RAM required" is 80% of users max RAM. // `Slow on your device (yellow)`: Max RAM required is 80-100% of users max RAM // `Not enough RAM (red)`: User RAM is below "Max RAM required" export default function useGetPerformanceTag() { - const [performanceTag, setPerformanceTag] = useState() - const totalRam = useAtomValue(totalRamAtom) - - const getPerformanceForModel = async (modelVersion: ModelVersion) => { + async function getPerformanceForModel( + modelVersion: ModelVersion, + totalRam: number + ): Promise<{ title: string; performanceTag: TagType }> { const requiredRam = modelVersion.maxRamRequired - setPerformanceTag(calculateRamPerformance(requiredRam, totalRam)) + const performanceTag = calculateRamPerformance(requiredRam, totalRam) + + let title = '' + + switch (performanceTag) { + case ModelPerformance.PerformancePositive: + title = 'Recommended' + break + case ModelPerformance.PerformanceNeutral: + title = 'Slow on your device' + break + case ModelPerformance.PerformanceNegative: + title = 'Not enough RAM' + break + } + return { title, performanceTag } } - let title = '' - switch (performanceTag) { - case ModelPerformance.PerformancePositive: - title = 'Recommended' - break - case ModelPerformance.PerformanceNeutral: - title = 'Slow on your device' - break - case ModelPerformance.PerformanceNegative: - title = 'Not enough RAM' - break - } - - return { performanceTag, title, getPerformanceForModel } + return { getPerformanceForModel } } const calculateRamPerformance = ( diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts index 12f54abd0..7a3b71ea5 100644 --- a/web/hooks/useSendChatMessage.ts +++ b/web/hooks/useSendChatMessage.ts @@ -63,13 +63,13 @@ export default function useSendChatMessage() { if ( currentConvo && currentConvo.id === newMessage.threadId && - result?.message && - result?.message?.trim().length > 0 && - result.message.split(' ').length <= 10 + result?.content && + result?.content?.trim().length > 0 && + result.content.split(' ').length <= 10 ) { const updatedConv = { ...currentConvo, - summary: result.message, + summary: result.content, } updateConversation(updatedConv) pluginManager diff --git a/web/package.json b/web/package.json index 59d142dbf..16522cace 100644 --- a/web/package.json +++ b/web/package.json @@ -15,8 +15,8 @@ "@headlessui/react": "^1.7.15", "@heroicons/react": "^2.0.18", "@hookform/resolvers": "^3.3.2", - "@janhq/core": "./core", - "@janhq/uikit": "./uikit", + "@janhq/core": "link:./core", + "@janhq/uikit": "link:./uikit", "autoprefixer": "10.4.16", "class-variance-authority": "^0.7.0", "framer-motion": "^10.16.4", diff --git a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx index fe3c9c3e9..31a9933ee 100644 --- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx +++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx @@ -1,5 +1,5 @@ /* eslint-disable react-hooks/exhaustive-deps */ -import { useCallback, useEffect, useMemo } from 'react' +import { useCallback, useEffect, useMemo, useState } from 'react' import { ModelCatalog, ModelVersion } from '@janhq/core/lib/types' import { Badge, Button } from '@janhq/uikit' @@ -20,6 +20,8 @@ import { useMainViewState } from '@/hooks/useMainViewState' import { toGigabytes } from '@/utils/converter' +import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom' + type Props = { suitableModel: ModelVersion exploreModel: ModelCatalog @@ -32,8 +34,12 @@ const ExploreModelItemHeader: React.FC = ({ const { downloadModel } = useDownloadModel() const { downloadedModels } = useGetDownloadedModels() const { modelDownloadStateAtom, downloadStates } = useDownloadState() - const { performanceTag, title, getPerformanceForModel } = - useGetPerformanceTag() + const { getPerformanceForModel } = useGetPerformanceTag() + const [title, setTitle] = useState('Recommended') + const totalRam = useAtomValue(totalRamAtom) + const [performanceTag, setPerformanceTag] = useState( + ModelPerformance.PerformancePositive + ) const downloadAtom = useMemo( () => atom((get) => get(modelDownloadStateAtom)[suitableModel.name]), [suitableModel.name] @@ -41,9 +47,20 @@ const ExploreModelItemHeader: React.FC = ({ const downloadState = useAtomValue(downloadAtom) const { setMainViewState } = useMainViewState() + const calculatePerformance = useCallback( + (suitableModel: ModelVersion) => async () => { + const { title, performanceTag } = await getPerformanceForModel( + suitableModel, + totalRam + ) + setPerformanceTag(performanceTag) + setTitle(title) + }, + [totalRam] + ) + useEffect(() => { - getPerformanceForModel(suitableModel) - // eslint-disable-next-line react-hooks/exhaustive-deps + calculatePerformance(suitableModel) }, [suitableModel]) const onDownloadClick = useCallback(() => {