Merge branch 'main' into docs/install-succinct

2023-11-23 18:35:48 +08:00 · 2023-11-23 18:35:48 +08:00 · 07b01e885e
commit 07b01e885e
parent eba225c888 f7f1e3da74
23 changed files with 547 additions and 418 deletions
--- a/.github/workflows/jan-docs.yml
+++ b/.github/workflows/jan-docs.yml
@ -6,7 +6,7 @@ on:
      - main
    paths:
      - 'docs/**'
-    tags: ["v[0-9]+.[0-9]+.[0-9]+-docs"]
+      - '.github/workflows/jan-docs.yml'
  pull_request:
    branches:
      - main
@ -24,7 +24,7 @@ jobs:
      CLOUDFLARE_PROJECT_NAME: jan
    runs-on: ubuntu-latest
    permissions:
-      contents: read
+      contents: write
      deployments: write
      pull-requests: write
    steps:
@ -78,13 +78,13 @@ jobs:
              Preview URL: ${{ steps.deployCloudflarePages.outputs.url }}

      - name: Add Custome Domain file
-        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
+        if: github.event_name == 'push' && github.event.pull_request.head.repo.full_name != github.repository
        run: echo "${{ vars.DOCUSAURUS_DOMAIN }}" > ./docs/build/CNAME

      # Popular action to deploy to GitHub Pages:
      # Docs: https://github.com/peaceiris/actions-gh-pages#%EF%B8%8F-docusaurus
      - name: Deploy to GitHub Pages
-        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
+        if: github.event_name == 'push' && github.event.pull_request.head.repo.full_name != github.repository
        uses: peaceiris/actions-gh-pages@v3
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/jan-electron-build.yml
+++ b/.github/workflows/jan-electron-build.yml
@ -28,12 +28,7 @@ jobs:

      - name: Update app version base on tag
        run: |
-          if [[ ! "${VERSION_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-              echo "Error: Tag is not valid!"
-              exit 1
-          fi
-          jq --arg version "${VERSION_TAG#v}" '.version = $version' electron/package.json > /tmp/package.json
-          mv /tmp/package.json electron/package.json
+          make update-app-version
        env:
          VERSION_TAG: ${{ steps.tag.outputs.tag }}

@ -49,25 +44,9 @@ jobs:
          p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
          p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }}

-
-      - name: Build uikit
-        run: |
-          cd uikit
-          yarn install
-          yarn build
-
-      - name: Install yarn dependencies
-        run: |
-          yarn build:core
-          yarn install
-          yarn build:plugins
-        env:
-          APP_PATH: "."
-          DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }}
-
      - name: Build and publish app
        run: |
-          yarn build:publish
+          make build-and-publish
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          CSC_LINK: "/tmp/codesign.p12"
@ -75,6 +54,8 @@ jobs:
          CSC_IDENTITY_AUTO_DISCOVERY: "true"
          APPLE_ID: ${{ secrets.APPLE_ID }}
          APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
+          APP_PATH: "."
+          DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }}

  build-windows-x64:
    runs-on: windows-latest
@ -99,34 +80,13 @@ jobs:
      - name: Update app version base on tag
        shell: bash
        run: |
-          if [[ ! "${VERSION_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-              echo "Error: Tag is not valid!"
-              exit 1
-          fi
-          jq --arg version "${VERSION_TAG#v}" '.version = $version' electron/package.json > /tmp/package.json
-          mv /tmp/package.json electron/package.json
+          make update-app-version
        env:
          VERSION_TAG: ${{ steps.tag.outputs.tag }}

-      - name: Build uikit
-        run: |
-          cd uikit
-          yarn config set network-timeout 300000
-          yarn install
-          yarn build
-
-      - name: Install yarn dependencies
-        shell: powershell
-        run: |
-          yarn config set network-timeout 300000
-          yarn build:core
-          yarn install
-          $env:NITRO_VERSION = Get-Content .\plugins\inference-plugin\nitro\version.txt; echo $env:NITRO_VERSION
-          yarn build:plugins
-
      - name: Build and publish app
        run: |
-          yarn build:publish
+          make build-and-publish
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

@ -158,31 +118,13 @@ jobs:

      - name: Update app version base on tag
        run: |
-          if [[ ! "${VERSION_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-              echo "Error: Tag is not valid!"
-              exit 1
-          fi
-          jq --arg version "${VERSION_TAG#v}" '.version = $version' electron/package.json > /tmp/package.json
-          mv /tmp/package.json electron/package.json
+          make update-app-version
        env:
          VERSION_TAG: ${{ steps.tag.outputs.tag }}

-      - name: Build uikit
-        run: |
-          cd uikit
-          yarn install
-          yarn build
-
-      - name: Install yarn dependencies
-        run: |
-          yarn config set network-timeout 300000
-          yarn build:core
-          yarn install
-          yarn build:plugins
-
      - name: Build and publish app
        run: |
-          yarn build:publish
+          make build-and-publish
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

--- a/.github/workflows/jan-electron-linter-and-test.yml
+++ b/.github/workflows/jan-electron-linter-and-test.yml
@ -43,21 +43,9 @@ jobs:
        with:
          node-version: 20

-      - name: Build uikit
-        run: |
-          cd uikit
-          yarn install
-          yarn build
-
      - name: Linter and test
        run: |
-          yarn config set network-timeout 300000
-          yarn build:core
-          yarn install
-          yarn lint
-          yarn build:plugins
-          yarn build:test
-          yarn test
+          make test
        env:
          CSC_IDENTITY_AUTO_DISCOVERY: "false"

@ -81,24 +69,10 @@ jobs:
        with:
          node-version: 20

-      - name: Build uikit
-        run: |
-          yarn config set network-timeout 300000
-          cd uikit
-          yarn install
-          yarn build
-
      - name: Linter and test
        shell: powershell
        run: |
-          yarn config set network-timeout 300000
-          yarn build:core
-          yarn install
-          $env:NITRO_VERSION = Get-Content .\plugins\inference-plugin\nitro\version.txt; echo $env:NITRO_VERSION
-          yarn build:plugins
-          yarn build:test
-          $env:CI="e2e"
-          yarn test
+          make test

  test-on-ubuntu:
    runs-on: [self-hosted, Linux, ubuntu-desktop]
@ -118,19 +92,8 @@ jobs:
        with:
          node-version: 20

-      - name: Build uikit
-        run: |
-          cd uikit
-          yarn install
-          yarn build
-
      - name: Linter and test
        run: |
          export DISPLAY=$(w -h | awk 'NR==1 {print $2}')
          echo -e "Display ID: $DISPLAY"
-          yarn config set network-timeout 300000
-          yarn build:core
-          yarn install
-          yarn build:plugins
-          yarn build:test
-          yarn test
+          make test
--- a/62
+++ b/62
@ -0,0 +1,62 @@
+# Makefile for Jan Electron App - Build, Lint, Test, and Clean
+
+# Default target, does nothing
+all:
+	@echo "Specify a target to run"
+
+# Builds the UI kit
+build-uikit:
+ifeq ($(OS),Windows_NT)
+	cd uikit && yarn config set network-timeout 300000 && yarn install && yarn build
+else
+	cd uikit && yarn install && yarn build
+endif
+# Updates the app version based on the tag
+update-app-version:
+	if [[ ! "${VERSION_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then \
+		echo "Error: Tag is not valid!"; \
+		exit 1; \
+	fi
+	jq --arg version "${VERSION_TAG#v}" '.version = $version' electron/package.json > /tmp/package.json
+	mv /tmp/package.json electron/package.json
+
+# Installs yarn dependencies and builds core and plugins
+install-and-build: build-uikit
+ifeq ($(OS),Windows_NT)
+	powershell -Command "yarn config set network-timeout 300000; \
+	$$env:NITRO_VERSION = Get-Content .\\plugins\\inference-plugin\\nitro\\version.txt; \
+	Write-Output \"Nitro version: $$env:NITRO_VERSION\"; yarn build:core; yarn install; yarn build:plugins"
+else
+	yarn build:core
+	yarn install
+	yarn build:plugins
+endif
+
+dev: install-and-build
+	yarn dev
+
+# Linting
+lint: install-and-build
+	yarn lint
+
+# Testing
+test: lint
+	yarn build:test
+	yarn test
+
+# Builds and publishes the app
+build-and-publish: install-and-build
+	yarn build:publish
+
+# Build
+build: install-and-build
+	yarn build
+
+clean:
+ifeq ($(OS),Windows_NT)
+	powershell -Command "Get-ChildItem -Path . -Include node_modules, .next, dist -Recurse -Directory | Remove-Item -Recurse -Force"
+else
+	find . -name "node_modules" -type d -prune -exec rm -rf '{}' +
+	find . -name ".next" -type d -exec rm -rf '{}' +
+	find . -name "dist" -type d -exec rm -rf '{}' +
+endif
--- a/README.md
+++ b/README.md
@ -91,11 +91,10 @@ Contributions are welcome! Please read the [CONTRIBUTING.md](CONTRIBUTING.md) fi

 - node >= 20.0.0
 - yarn >= 1.22.0
+- make >= 3.81

 ### Instructions

-Note: This instruction is tested on MacOS only.
-
 1. **Clone the Repository:**

 ```bash
@ -104,25 +103,10 @@ Note: This instruction is tested on MacOS only.
   cd jan
 ```

-2. **Install dependencies:**
-
-```bash
-   yarn install
-
-   # Build core module
-   yarn build:core
-
-   # Packing base plugins
-   yarn build:plugins
-
-   # Packing uikit
-   yarn build:uikit
-```
-
-3. **Run development and Using Jan Desktop**
+2. **Run development and Using Jan Desktop**

   ```
-   yarn dev
+   make dev
   ```

   This will start the development server and open the desktop app.
@ -134,19 +118,9 @@ Note: This instruction is tested on MacOS only.
 # Do step 1 and 2 in previous section
 git clone https://github.com/janhq/jan
 cd jan
-yarn install
-
-# Build core module
-yarn build:core
-
-# Package base plugins
-yarn build:plugins
-
-# Packing uikit
-yarn build:uikit

 # Build the app
-yarn build
+make build
 ```

 This will build the app MacOS m1/m2 for production (with code signing already done) and put the result in `dist` folder.
--- a/core/src/plugins/inference.ts
+++ b/core/src/plugins/inference.ts
@ -1,4 +1,4 @@
-import { MessageRequest } from "../index";
+import { MessageRequest, ThreadMessage } from "../index";
 import { JanPlugin } from "../plugin";

 /**
@ -16,10 +16,15 @@ export abstract class InferencePlugin extends JanPlugin {
   */
  abstract stopModel(): Promise<void>;

+  /**
+   * Stops the streaming inference.
+   */
+  abstract stopInference(): Promise<void>;
+
  /**
   * Processes an inference request.
   * @param data - The data for the inference request.
   * @returns The result of the inference request.
   */
-  abstract inferenceRequest(data: MessageRequest): Promise<any>;
+  abstract inferenceRequest(data: MessageRequest): Promise<ThreadMessage>;
 }
--- a/docs/README.md
+++ b/docs/README.md
@ -43,9 +43,9 @@ If you are using GitHub pages for hosting, this command is a convenient way to b

 ### Preview URL, Pre-release and Publishing Documentation

-When a PR is created, the preview URL will be automatically commented on the PR. For staging or pre-release documentation, use the following domains [https://pre-release.jan.ai/](https://pre-release.jan.ai/)
+When a PR is created, the preview URL will be automatically commented on the PR.

-To officially publish documentation, create a tag in the format `vx.y.z-docs` (e.g., `v0.1.1-docs`) on the `main` branch. The documentation will then be published to [https://jan.ai/](https://jan.ai/)
+The documentation will then be published to [https://jan.ai/](https://jan.ai/) when the PR is merged to `main`.

 ### Additional Plugins
 - @docusaurus/theme-live-codeblock
--- a/docs/docs/specs/messages.md
+++ b/docs/docs/specs/messages.md
@ -2,31 +2,79 @@
 title: Messages
 ---

-:::warning
+:::caution

-Draft Specification: functionality has not been implemented yet. 
-
-Feedback: [HackMD: Threads Spec](https://hackmd.io/BM_8o_OCQ-iLCYhunn2Aug)
+This is currently under development.

 :::

-Messages are within `threads` and capture additional metadata.
-> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/messages
+## Overview
+
+`Messages` capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats).
+
+- Users and assistants can send multimedia messages.
+- An [OpenAI Message API](https://platform.openai.com/docs/api-reference/messages) compatible endpoint at `localhost:3000/v1/messages`.
+
+## Folder Structure
+
+Messages are saved in the `/threads/{thread_id}` folder in `messages.jsonl` files
+
+```sh
+jan/
+    threads/
+        assistant_name_unix_timestamp/
+            ...
+            messages.jsonl
+        jan_2341243134/
+            ...
+            messages.jsonl
+```
+
+## `message.jsonl`
+
+Individual messages are saved in `jsonl` format for indexing purposes.

-## Message Object
-> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/messages/object
 ```json
-{
-  // Jan specific properties
-  "updatedAt": "...", // that's it I think
+{...message_2}
+{...message_1}
+{...message_0}
+```

-  // OpenAI compatible properties: https://platform.openai.com/docs/api-reference/messages)
-  "id": "msg_dKYDWyQvtjDBi3tudL1yWKDa",
-  "object": "thread.message",
-  "created_at": 1698983503,
-  "thread_id": "thread_RGUhOuO9b2nrktrmsQ2uSR6I",
-  "role": "assistant",
-  "content": [
+### Examples
+
+Here's a standard example `message` sent from a user.
+
+```json
+"id": "0",                            // Sequential or UUID
+"object": "thread.message",           // Defaults to "thread.message"
+"created_at": 1698983503,
+"thread_id": "thread_asdf",           // Defaults to parent thread
+"assistant_id": "jan",                // Defaults to parent thread
+"role": "user",                  // From either "user" or "assistant"
+"content": [
+  {
+    "type": "text",
+    "text": {
+      "value": "Hi!?",
+      "annotations": []
+    }
+  }
+],
+"metadata": {},                       // Defaults to {}
+// "run_id": "...",                   // Rather than `run` id abstraction
+// "file_ids": [],
+```
+
+Here's an example `message` response from an assistant.
+
+```json
+"id": "0",                            // Sequential or UUID
+"object": "thread.message",           // Defaults to "thread.message"
+"created_at": 1698983503,
+"thread_id": "thread_asdf",           // Defaults to parent thread
+"assistant_id": "jan",                // Defaults to parent thread
+"role": "assistant",                  // From either "user" or "assistant"
+"content": [                          // Usually from Chat Completion obj
  {
    "type": "text",
    "text": {
@ -34,25 +82,34 @@ Messages are within `threads` and capture additional metadata.
      "annotations": []
    }
  }
-  ],
-  "file_ids": [],
-  "assistant_id": "asst_ToSF7Gb04YMj8AMMm50ZLLtY",
-  "run_id": "run_BjylUJgDqYK9bOhy4yjAiMrn",
-  "metadata": {}
-}
+],
+"metadata": {},                       // Defaults to {}
+// "run_id": "...",                   // KIV
+// "file_ids": [],                    // KIV
+// "usage": {}                        // KIV: saving chat completion properties https://platform.openai.com/docs/api-reference/chat/object
 ```

-## Messages API
-> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/messages
+## API Reference

+Jan's `messages` API is compatible with [OpenAI's Messages API](https://platform.openai.com/docs/api-reference/messages), with additional methods for managing messages locally.
+
+See [Jan Messages API](https://jan.ai/api-reference#tag/Messages)
+
+<!-- TODO clean this part up into API -->
+<!--
 ### Get list message
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/messages/getMessage
+
 - Example request
+
 ```shell
  curl {JAN_URL}/v1/threads/{thread_id}/messages/{message_id} \
    -H "Content-Type: application/json"
 ```
+
 - Example response
+
 ```json
 {
  "id": "msg_abc123",
@ -75,9 +132,13 @@ Messages are within `threads` and capture additional metadata.
  "metadata": {}
 }
 ```
+
 ### Create message
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/messages/createMessage
+
 - Example request
+
 ```shell
  curl -X POST {JAN_URL}/v1/threads/{thread_id}/messages \
    -H "Content-Type: application/json" \
@ -86,9 +147,11 @@ Messages are within `threads` and capture additional metadata.
      "content": "How does AI work? Explain it in simple terms."
    }'
 ```
+
 - Example response
+
 ```json
-  {
+{
  "id": "msg_abc123",
  "object": "thread.message",
  "created_at": 1699017614,
@ -107,18 +170,24 @@ Messages are within `threads` and capture additional metadata.
  "assistant_id": null,
  "run_id": null,
  "metadata": {}
-  }
+}
 ```
+
 ### Get message
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/assistants/listAssistants
+
 - Example request
+
 ```shell
  curl {JAN_URL}/v1/threads/{thread_id}/messages/{message_id} \
    -H "Content-Type: application/json"
 ```
+
 - Example response
+
 ```json
-  {
+{
  "id": "msg_abc123",
  "object": "thread.message",
  "created_at": 1699017614,
@ -137,42 +206,53 @@ Messages are within `threads` and capture additional metadata.
  "assistant_id": null,
  "run_id": null,
  "metadata": {}
-  }
+}
 ```

 ### Modify message
+
 > Jan: TODO: Do we need to modify message? Or let user create new message?

 # Get message file
+
 > OpenAI Equivalent: https://api.openai.com/v1/threads/{thread_id}/messages/{message_id}/files/{file_id}
+
 - Example request
+
 ```shell
  curl {JAN_URL}/v1/threads/{thread_id}/messages/{message_id}/files/{file_id} \
    -H "Content-Type: application/json"
 ```
+
 - Example response
+
 ```json
-  {
+{
  "id": "file-abc123",
  "object": "thread.message.file",
  "created_at": 1699061776,
  "message_id": "msg_abc123"
-  }
+}
 ```
+
 # List message files
+
 > OpenAI Equivalent: https://api.openai.com/v1/threads/{thread_id}/messages/{message_id}/files
-```
+
+````
 - Example request
 ```shell
  curl {JAN_URL}/v1/threads/{thread_id}/messages/{message_id}/files/{file_id} \
    -H "Content-Type: application/json"
-```
+````
+
 - Example response
+
 ```json
-  {
+{
  "id": "file-abc123",
  "object": "thread.message.file",
  "created_at": 1699061776,
  "message_id": "msg_abc123"
-  }
-```
+}
+``` -->
--- a/docs/docs/specs/models.md
+++ b/docs/docs/specs/models.md
@ -4,7 +4,7 @@ title: Models

 :::caution

-Draft Specification: functionality has not been implemented yet.
+This is currently under development.

 :::

@ -46,19 +46,19 @@ jan/                               # Jan root folder
 - `model.json` contains metadata and default parameters used to run a model.
 - The only required field is `source_url`.

-### GGUF Example
+### Example

 Here's a standard example `model.json` for a GGUF model.

 - `source_url`: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/.

 ```json
-"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf",
-"type": "model",                    // Defaults to "model"
-"version": "1",                     // Defaults to 1
 "id": "zephyr-7b"                   // Defaults to foldername
+"object": "model",                    // Defaults to "model"
+"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf",
 "name": "Zephyr 7B"                 // Defaults to foldername
 "owned_by": "you"                   // Defaults to you
+"version": "1",                     // Defaults to 1
 "created": 1231231                  // Defaults to file creation time
 "description": ""
 "state": enum[null, "downloading", "ready", "starting", "stopping", ...]
@ -95,7 +95,7 @@ See [Jan Models API](https://jan.ai/api-reference#tag/Models)

 :::caution

-This is current under development.
+This is currently under development.

 :::

--- a/docs/docs/specs/threads.md
+++ b/docs/docs/specs/threads.md
@ -2,89 +2,101 @@
 title: Threads
 ---

-:::warning
+:::caution

-Draft Specification: functionality has not been implemented yet. 
-
-Feedback: [HackMD: Threads Spec](https://hackmd.io/BM_8o_OCQ-iLCYhunn2Aug)
+This is currently under development.

 :::

-## User Stories
+## Overview

-_Users can chat with an assistant in a thread_
+`Threads` are conversations between an `assistant` and the user:

- See [Messages Spec](./messages.md)
+- Users can tweak `model` params and `assistant` behavior within each thread.
+- Users can import and export threads.
+- An [OpenAI Thread API](https://platform.openai.com/docs/api-reference/threads) compatible endpoint at `localhost:3000/v1/threads`.

-_Users can change assistant and model parameters in a thread_
+## Folder Structure

- Wireframes of
-
-_Users can delete all thread history_
-
- Wireframes of settings page.
-
-## Jan Thread Object
-
- A `Jan Thread Object` is a "representation of a conversation thread" between an `assistant` and the user
- Objects are defined by `thread-uuid.json` files in `json` format
- Objects are designed to be compatible with `OpenAI Thread Objects` with additional properties needed to run on our infrastructure.
- Objects contain a `models` field, to track when the user overrides the assistant's default model parameters.
-
-| Property   | Type                                            | Description                                                                                                                                                                                    | Validation                     |
-| ---------- | ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------ |
-| `object`   | enum: `model`, `assistant`, `thread`, `message` | The Jan Object type                                                                                                                                                                            | Defaults to `thread`           |
-| `assistants`   | array                                           | An array of Jan Assistant Objects. Threads can "override" an assistant's parameters. Thread-level model parameters are directly saved in the `thread.models` property! (see Models spec) | Defaults to `assistant.name` |
-| `messages` | array                                           | An array of Jan Message Objects. (see Messages spec)                                                                                                                                           | Defaults to `[]`               |
-| `metadata` | map                                             | Useful for storing additional information about the object in a structured format.                                                                                                             | Defaults to `{}`               |
-
-### Generic Example
-
-```json
-// janroot/threads/jan_1700123404.json
-"assistants": ["assistant-123"],
-"messages": [
-    {...message0}, {...message1}
-],
-"metadata": {
-    "summary": "funny physics joke",
-},
-```
-
-## Filesystem
-
- `Jan Thread Objects`'s `json` files always has the naming schema: `assistant_uuid` + `unix_time_thread_created_at. See below.
- Threads are all saved in the `janroot/threads` folder in a flat folder structure.
- The folder is standalone and can be easily zipped, exported, and cleared.
+- Threads are saved in the `/threads` folder.
+- Threads are organized by folders, one for each thread, and can be easily zipped, exported, and cleared.
+- Thread folders follow the naming: `assistant_id` + `thread_created_at`.
+- Thread folders also contain `messages.jsonl` files. See [messages](/specs/messages).

 ```sh
-janroot/
+jan/
    threads/
-        jan_1700123404.json
-        homework_helper_700120003.json
+        assistant_name_unix_timestamp/
+            thread.json
+            messages.jsonl
+        jan_2341243134/
+            thread.json
 ```

-## Jan API
+## `thread.json`
+
+- Each `thread` folder contains a `thread.json` file, which is a representation of a thread.
+- `thread.json` contains metadata and model parameter overrides.
+- There are no required fields.
+
+### Example
+
+Here's a standard example `thread.json` for a conversation between the user and the default Jan assistant.
+
+```json
+"id": "thread_....",                  // Defaults to foldername
+"object": "thread",                   // Defaults to "thread"
+"title": "funny physics joke",        // Defaults to ""
+"assistants": [
+  {
+    "assistant_id": "jan",            // Defaults to "jan"
+    "model": {                        // Defaults to 1 currently active model (can be changed before thread is begun)
+      "settings": {},                 // Defaults to and overrides assistant.json's "settings" (and if none, then model.json "settings")
+      "parameters": {},               // Defaults to and overrides assistant.json's "parameters" (and if none, then model.json "parameters")
+    }
+  },
+],
+"created": 1231231                    // Defaults to file creation time
+"metadata": {},                       // Defaults to {}
+```
+
+## API Reference
+
+Jan's Threads API is compatible with [OpenAI's Threads API](https://platform.openai.com/docs/api-reference/threads), with additional methods for managing threads locally.
+
+See [Jan Threads API](https://jan.ai/api-reference#tag/Threads)
+
+<!-- TODO clean this part up into API -->
+<!--
 ### Get thread
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/threads/getThread
+
 - Example request
+
 ```shell
    curl {JAN_URL}/v1/threads/{thread_id}
 ```
+
 - Example response
+
 ```json
-    {
+{
  "id": "thread_abc123",
  "object": "thread",
  "created_at": 1699014083,
  "assistants": ["assistant-001"],
  "metadata": {},
  "messages": []
-    }
+}
 ```
+
 ### Create Thread
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/threads/createThread
+
 - Example request
+
 ```shell
    curl -X POST {JAN_URL}/v1/threads \
    -H "Content-Type: application/json" \
@ -99,18 +111,24 @@ janroot/
        }]
    }'
 ```
+
 - Example response
+
 ```json
-    {
-    "id": 'thread_abc123',
-    "object": 'thread',
+{
+  "id": "thread_abc123",
+  "object": "thread",
  "created_at": 1699014083,
  "metadata": {}
-    }
+}
 ```
+
 ### Modify Thread
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/threads/modifyThread
+
 - Example request
+
 ```shell
    curl -X POST {JAN_URL}/v1/threads/{thread_id} \
    -H "Content-Type: application/json" \
@ -125,43 +143,55 @@ janroot/
        }]
    }'
 ```
+
 - Example response
+
 ```json
-    {
-    "id": 'thread_abc123',
-    "object": 'thread',
+{
+  "id": "thread_abc123",
+  "object": "thread",
  "created_at": 1699014083,
  "metadata": {}
-    }
+}
 ```

 - https://platform.openai.com/docs/api-reference/threads/modifyThread

 ### Delete Thread
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/threads/deleteThread
+
 - Example request
+
 ```shell
    curl -X DELETE {JAN_URL}/v1/threads/{thread_id}
 ```
+
 - Example response
+
 ```json
-    {
+{
  "id": "thread_abc123",
  "object": "thread.deleted",
  "deleted": true
-    }
+}
 ```

 ### List Threads
+
 > This is a Jan-only endpoint, not supported by OAI yet.
+
 - Example request
+
 ```shell
    curl {JAN_URL}/v1/threads \
    -H "Content-Type: application/json" \
 ```
+
 - Example response
+
 ```json
-    [
+[
  {
    "id": "thread_abc123",
    "object": "thread",
@ -175,19 +205,23 @@ janroot/
    "object": "thread",
    "created_at": 1699014083,
    "assistants": ["assistant-002", "assistant-002"],
-            "metadata": {},
+    "metadata": {}
  }
-    ]
+]
 ```

 ### Get & Modify `Thread.Assistants`
+
 -> Can achieve this goal by calling `Modify Thread` API

 #### `GET v1/threads/{thread_id}/assistants`
+
 -> Can achieve this goal by calling `Get Thread` API

 #### `POST v1/threads/{thread_id}/assistants/{assistant_id}`
+
 -> Can achieve this goal by calling `Modify Assistant` API with `thread.assistant[]`

 ### List `Thread.Messages`
-> Can achieve this goal by calling `Get Thread` API
+
+-> Can achieve this goal by calling `Get Thread` API -->
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@ -67,8 +67,8 @@ const sidebars = {
          items: [
            "specs/chats",
            "specs/models",
-            // "specs/threads",
-            // "specs/messages",
+            "specs/threads",
+            "specs/messages",
            // "specs/assistants",
            // "specs/files",
            // "specs/jan",
--- a/docs/src/styles/tweaks.scss
+++ b/docs/src/styles/tweaks.scss
@ -97,6 +97,9 @@
      @apply leading-loose;
      line-height: 1.5;
    }
+    + div {
+      margin-top: 12px;
+    }
  }
 }

@ -113,3 +116,11 @@
 .table-of-contents {
  font-size: 14px;
 }
+
+.task-list-item {
+  list-style: none;
+}
+
+blockquote {
+  margin-bottom: 12px;
+}
--- a/plugins/inference-plugin/nitro/version.txt
+++ b/plugins/inference-plugin/nitro/version.txt
@ -1 +1 @@
-0.1.8
+0.1.11
--- a/plugins/inference-plugin/package.json
+++ b/plugins/inference-plugin/package.json
@ -36,6 +36,7 @@
  "dependencies": {
    "@janhq/core": "file:../../core",
    "download-cli": "^1.1.1",
+    "electron-log": "^5.0.1",
    "fetch-retry": "^5.0.6",
    "kill-port": "^2.0.1",
    "path-browserify": "^1.0.1",
@ -55,6 +56,7 @@
  "bundleDependencies": [
    "tcp-port-used",
    "kill-port",
-    "fetch-retry"
+    "fetch-retry",
+    "electron-log"
  ]
 }
--- a/plugins/inference-plugin/src/helpers/sse.ts
+++ b/plugins/inference-plugin/src/helpers/sse.ts
@ -4,7 +4,7 @@ import { Observable } from "rxjs";
 * @param recentMessages - An array of recent messages to use as context for the inference.
 * @returns An Observable that emits the generated response as a string.
 */
-export function requestInference(recentMessages: any[]): Observable<string> {
+export function requestInference(recentMessages: any[], controller?: AbortController): Observable<string> {
  return new Observable((subscriber) => {
    const requestBody = JSON.stringify({
      messages: recentMessages,
@ -20,6 +20,7 @@ export function requestInference(recentMessages: any[]): Observable<string> {
        "Access-Control-Allow-Origin": "*",
      },
      body: requestBody,
+      signal: controller?.signal
    })
      .then(async (response) => {
        const stream = response.body;
--- a/plugins/inference-plugin/src/index.ts
+++ b/plugins/inference-plugin/src/index.ts
@ -28,6 +28,8 @@ import { fs } from "@janhq/core";
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 */
 export default class JanInferencePlugin implements InferencePlugin {
+  controller = new AbortController();
+  isCancelled = false;
  /**
   * Returns the type of the plugin.
   * @returns {PluginType} The type of the plugin.
@ -40,7 +42,9 @@ export default class JanInferencePlugin implements InferencePlugin {
   * Subscribes to events emitted by the @janhq/core package.
   */
  onLoad(): void {
-    events.on(EventName.OnNewMessageRequest, this.handleMessageRequest);
+    events.on(EventName.OnNewMessageRequest, (data) =>
+      JanInferencePlugin.handleMessageRequest(data, this)
+    );
  }

  /**
@ -70,23 +74,32 @@ export default class JanInferencePlugin implements InferencePlugin {
    return executeOnMain(MODULE, "killSubprocess");
  }

+  /**
+   * Stops streaming inference.
+   * @returns {Promise<void>} A promise that resolves when the streaming is stopped.
+   */
+  async stopInference(): Promise<void> {
+    this.isCancelled = true;
+    this.controller?.abort();
+  }
+
  /**
   * Makes a single response inference request.
   * @param {MessageRequest} data - The data for the inference request.
   * @returns {Promise<any>} A promise that resolves with the inference response.
   */
-  async inferenceRequest(data: MessageRequest): Promise<any> {
-    const message = {
-      ...data,
-      message: "",
-      user: "assistant",
+  async inferenceRequest(data: MessageRequest): Promise<ThreadMessage> {
+    const message: ThreadMessage = {
+      threadId: data.threadId,
+      content: "",
      createdAt: new Date().toISOString(),
+      status: MessageStatus.Ready,
    };

    return new Promise(async (resolve, reject) => {
      requestInference(data.messages ?? []).subscribe({
        next: (content) => {
-          message.message = content;
+          message.content = content;
        },
        complete: async () => {
          resolve(message);
@ -100,9 +113,14 @@ export default class JanInferencePlugin implements InferencePlugin {

  /**
   * Handles a new message request by making an inference request and emitting events.
+   * Function registered in event manager, should be static to avoid binding issues.
+   * Pass instance as a reference.
   * @param {MessageRequest} data - The data for the new message request.
   */
-  private async handleMessageRequest(data: MessageRequest) {
+  private static async handleMessageRequest(
+    data: MessageRequest,
+    instance: JanInferencePlugin
+  ) {
    const message: ThreadMessage = {
      threadId: data.threadId,
      content: "",
@ -113,7 +131,10 @@ export default class JanInferencePlugin implements InferencePlugin {
    };
    events.emit(EventName.OnNewMessageResponse, message);

-    requestInference(data.messages).subscribe({
+    instance.isCancelled = false;
+    instance.controller = new AbortController();
+
+    requestInference(data.messages, instance.controller).subscribe({
      next: (content) => {
        message.content = content;
        events.emit(EventName.OnMessageResponseUpdate, message);
@ -125,7 +146,8 @@ export default class JanInferencePlugin implements InferencePlugin {
      },
      error: async (err) => {
        message.content =
-          message.content.trim() + "\n" + "Error occurred: " + err.message;
+          message.content.trim() +
+          (instance.isCancelled ? "" : "\n" + "Error occurred: " + err.message);
        message.status = MessageStatus.Ready;
        events.emit(EventName.OnMessageResponseUpdate, message);
      },
--- a/plugins/inference-plugin/src/module.ts
+++ b/plugins/inference-plugin/src/module.ts
@ -5,6 +5,8 @@ const { spawn } = require("child_process");
 const tcpPortUsed = require("tcp-port-used");
 const fetchRetry = require("fetch-retry")(global.fetch);

+const log = require("electron-log");
+
 // The PORT to use for the Nitro subprocess
 const PORT = 3928;
 const LOCAL_HOST = "127.0.0.1";
@ -35,6 +37,7 @@ interface InitModelResponse {
 function initModel(modelFile: string): Promise<InitModelResponse> {
  // 1. Check if the model file exists
  currentModelFile = modelFile;
+  log.info("Started to load model " + modelFile);

  return (
    // 1. Check if the port is used, if used, attempt to unload model / kill nitro process
@ -42,13 +45,12 @@ function initModel(modelFile: string): Promise<InitModelResponse> {
      .then(checkAndUnloadNitro)
      // 2. Spawn the Nitro subprocess
      .then(spawnNitroProcess)
-      // 3. Wait until the port is used (Nitro http server is up)
-      .then(() => tcpPortUsed.waitUntilUsed(PORT, 300, 30000))
      // 4. Load the model into the Nitro subprocess (HTTP POST request)
      .then(loadLLMModel)
      // 5. Check if the model is loaded successfully
      .then(validateModelStatus)
      .catch((err) => {
+        log.error("error: " + JSON.stringify(err));
        return { error: err };
      })
  );
@ -63,6 +65,7 @@ function loadLLMModel(): Promise<Response> {
    llama_model_path: currentModelFile,
    ctx_len: 2048,
    ngl: 100,
+    cont_batching: false,
    embedding: false, // Always enable embedding mode on
  };

@ -77,6 +80,7 @@ function loadLLMModel(): Promise<Response> {
    retryDelay: 500,
  }).catch((err) => {
    console.error(err);
+    log.error("error: " + JSON.stringify(err));
    // Fetch error, Nitro server might not started properly
    throw new Error("Model loading failed.");
  });
@ -112,7 +116,8 @@ async function validateModelStatus(): Promise<InitModelResponse> {
      return { error: "Model loading failed" };
    })
    .catch((err) => {
-      return { error: `Model loading failed. ${err.message}` };
+      log.error("Model loading failed" + err.toString());
+      return { error: `Model loading failed.` };
    });
 }

@ -158,7 +163,8 @@ function checkAndUnloadNitro() {
 * Using child-process to spawn the process
 * Should run exactly platform specified Nitro binary version
 */
-function spawnNitroProcess() {
+async function spawnNitroProcess(): Promise<void> {
+  return new Promise((resolve, reject) => {
    let binaryFolder = path.join(__dirname, "nitro"); // Current directory by default
    let binaryName;

@ -192,12 +198,18 @@ function spawnNitroProcess() {
    });

    subprocess.stderr.on("data", (data) => {
+      log.error("subprocess error:" + data.toString());
      console.error(`stderr: ${data}`);
    });

    subprocess.on("close", (code) => {
      console.log(`child process exited with code ${code}`);
      subprocess = null;
+      reject(`Nitro process exited. ${code ?? ""}`);
+    });
+    tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
+      resolve();
+    });
  });
 }

@ -206,11 +218,14 @@ function spawnNitroProcess() {
 * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
 */
 function validateModelVersion(): Promise<void> {
+  log.info("validateModelVersion");
  // Read the file
  return new Promise((resolve, reject) => {
    fs.open(currentModelFile, "r", (err, fd) => {
      if (err) {
+        log.error("validateModelVersion error" + JSON.stringify(err));
        console.error(err.message);
+        reject(err);
        return;
      }

@ -220,7 +235,13 @@ function validateModelVersion(): Promise<void> {
      // Model version will be the 5th byte of the file
      fs.read(fd, buffer, 0, 1, 4, (err, bytesRead, buffer) => {
        if (err) {
+          log.error("validateModelVersion open error" + JSON.stringify(err));
          console.error(err.message);
+          fs.close(fd, (err) => {
+            log.error("validateModelVersion close error" + JSON.stringify(err));
+            if (err) console.error(err.message);
+          });
+          reject(err);
        } else {
          // Interpret the byte as ASCII
          if (buffer[0] === 0x01) {
--- a/web/containers/ModalCancelDownload/index.tsx
+++ b/web/containers/ModalCancelDownload/index.tsx
@ -16,7 +16,6 @@ import {
 import { atom, useAtomValue } from 'jotai'

 import { useDownloadState } from '@/hooks/useDownloadState'
-import useGetPerformanceTag from '@/hooks/useGetPerformanceTag'

 import { formatDownloadPercentage } from '@/utils/converter'

@ -30,7 +29,6 @@ export default function ModalCancelDownload({
  isFromList,
 }: Props) {
  const { modelDownloadStateAtom } = useDownloadState()
-  useGetPerformanceTag()
  const downloadAtom = useMemo(
    () => atom((get) => get(modelDownloadStateAtom)[suitableModel.name]),
    // eslint-disable-next-line react-hooks/exhaustive-deps
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@ -49,7 +49,7 @@ export function useActiveModel() {
    console.debug('Init model: ', modelId)
    const path = join('models', model.name, modelId)
    const res = await initModel(path)
-    if (res?.error) {
+    if (res?.error && (!activeModel?.id || modelId === activeModel?.id)) {
      const errorMessage = `${res.error}`
      alert(errorMessage)
      setStateModel(() => ({
--- a/web/hooks/useGetPerformanceTag.ts
+++ b/web/hooks/useGetPerformanceTag.ts
@ -1,27 +1,22 @@
-import { useState } from 'react'
-
 import { ModelVersion } from '@janhq/core/lib/types'
-import { useAtomValue } from 'jotai'

 import { ModelPerformance, TagType } from '@/constants/tagType'

-import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom'
-
 // Recommendation:
 // `Recommended (green)`: "Max RAM required" is 80% of users max  RAM.
 // `Slow on your device (yellow)`: Max RAM required is 80-100% of users max RAM
 // `Not enough RAM (red)`: User RAM is below "Max RAM required"

 export default function useGetPerformanceTag() {
-  const [performanceTag, setPerformanceTag] = useState<TagType | undefined>()
-  const totalRam = useAtomValue(totalRamAtom)
-
-  const getPerformanceForModel = async (modelVersion: ModelVersion) => {
+  async function getPerformanceForModel(
+    modelVersion: ModelVersion,
+    totalRam: number
+  ): Promise<{ title: string; performanceTag: TagType }> {
    const requiredRam = modelVersion.maxRamRequired
-    setPerformanceTag(calculateRamPerformance(requiredRam, totalRam))
-  }
+    const performanceTag = calculateRamPerformance(requiredRam, totalRam)

    let title = ''
+
    switch (performanceTag) {
      case ModelPerformance.PerformancePositive:
        title = 'Recommended'
@ -33,8 +28,10 @@ export default function useGetPerformanceTag() {
        title = 'Not enough RAM'
        break
    }
+    return { title, performanceTag }
+  }

-  return { performanceTag, title, getPerformanceForModel }
+  return { getPerformanceForModel }
 }

 const calculateRamPerformance = (
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@ -63,13 +63,13 @@ export default function useSendChatMessage() {
        if (
          currentConvo &&
          currentConvo.id === newMessage.threadId &&
-          result?.message &&
-          result?.message?.trim().length > 0 &&
-          result.message.split(' ').length <= 10
+          result?.content &&
+          result?.content?.trim().length > 0 &&
+          result.content.split(' ').length <= 10
        ) {
          const updatedConv = {
            ...currentConvo,
-            summary: result.message,
+            summary: result.content,
          }
          updateConversation(updatedConv)
          pluginManager
--- a/web/package.json
+++ b/web/package.json
@ -15,8 +15,8 @@
    "@headlessui/react": "^1.7.15",
    "@heroicons/react": "^2.0.18",
    "@hookform/resolvers": "^3.3.2",
-    "@janhq/core": "./core",
-    "@janhq/uikit": "./uikit",
+    "@janhq/core": "link:./core",
+    "@janhq/uikit": "link:./uikit",
    "autoprefixer": "10.4.16",
    "class-variance-authority": "^0.7.0",
    "framer-motion": "^10.16.4",
--- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
@ -1,5 +1,5 @@
 /* eslint-disable react-hooks/exhaustive-deps */
-import { useCallback, useEffect, useMemo } from 'react'
+import { useCallback, useEffect, useMemo, useState } from 'react'

 import { ModelCatalog, ModelVersion } from '@janhq/core/lib/types'
 import { Badge, Button } from '@janhq/uikit'
@ -20,6 +20,8 @@ import { useMainViewState } from '@/hooks/useMainViewState'

 import { toGigabytes } from '@/utils/converter'

+import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom'
+
 type Props = {
  suitableModel: ModelVersion
  exploreModel: ModelCatalog
@ -32,8 +34,12 @@ const ExploreModelItemHeader: React.FC<Props> = ({
  const { downloadModel } = useDownloadModel()
  const { downloadedModels } = useGetDownloadedModels()
  const { modelDownloadStateAtom, downloadStates } = useDownloadState()
-  const { performanceTag, title, getPerformanceForModel } =
-    useGetPerformanceTag()
+  const { getPerformanceForModel } = useGetPerformanceTag()
+  const [title, setTitle] = useState<string>('Recommended')
+  const totalRam = useAtomValue(totalRamAtom)
+  const [performanceTag, setPerformanceTag] = useState<TagType>(
+    ModelPerformance.PerformancePositive
+  )
  const downloadAtom = useMemo(
    () => atom((get) => get(modelDownloadStateAtom)[suitableModel.name]),
    [suitableModel.name]
@ -41,9 +47,20 @@ const ExploreModelItemHeader: React.FC<Props> = ({
  const downloadState = useAtomValue(downloadAtom)
  const { setMainViewState } = useMainViewState()

+  const calculatePerformance = useCallback(
+    (suitableModel: ModelVersion) => async () => {
+      const { title, performanceTag } = await getPerformanceForModel(
+        suitableModel,
+        totalRam
+      )
+      setPerformanceTag(performanceTag)
+      setTitle(title)
+    },
+    [totalRam]
+  )
+
  useEffect(() => {
-    getPerformanceForModel(suitableModel)
-    // eslint-disable-next-line react-hooks/exhaustive-deps
+    calculatePerformance(suitableModel)
  }, [suitableModel])

  const onDownloadClick = useCallback(() => {
 @ -1 +1 @@
 .1.8
 .1.11