Resolved conflicts by keeping HEAD changes
This commit is contained in:
parent
19274f7e69
commit
a8abc9f9aa
@ -1,2 +0,0 @@
|
|||||||
bin
|
|
||||||
!version.txt
|
|
||||||
@ -1,75 +0,0 @@
|
|||||||
# Create a Jan Extension using Typescript
|
|
||||||
|
|
||||||
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
|
|
||||||
|
|
||||||
## Create Your Own Extension
|
|
||||||
|
|
||||||
To create your own extension, you can use this repository as a template! Just follow the below instructions:
|
|
||||||
|
|
||||||
1. Click the Use this template button at the top of the repository
|
|
||||||
2. Select Create a new repository
|
|
||||||
3. Select an owner and name for your new repository
|
|
||||||
4. Click Create repository
|
|
||||||
5. Clone your new repository
|
|
||||||
|
|
||||||
## Initial Setup
|
|
||||||
|
|
||||||
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
>
|
|
||||||
> You'll need to have a reasonably modern version of
|
|
||||||
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
|
|
||||||
> [`nodenv`](https://github.com/nodenv/nodenv) or
|
|
||||||
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
|
|
||||||
> root of your repository to install the version specified in
|
|
||||||
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
|
|
||||||
|
|
||||||
1. :hammer_and_wrench: Install the dependencies
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm install
|
|
||||||
```
|
|
||||||
|
|
||||||
1. :building_construction: Package the TypeScript for distribution
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm run bundle
|
|
||||||
```
|
|
||||||
|
|
||||||
1. :white_check_mark: Check your artifact
|
|
||||||
|
|
||||||
There will be a tgz file in your extension directory now
|
|
||||||
|
|
||||||
## Update the Extension Metadata
|
|
||||||
|
|
||||||
The [`package.json`](package.json) file defines metadata about your extension, such as
|
|
||||||
extension name, main entry, description and version.
|
|
||||||
|
|
||||||
When you copy this repository, update `package.json` with the name, description for your extension.
|
|
||||||
|
|
||||||
## Update the Extension Code
|
|
||||||
|
|
||||||
The [`src/`](./src/) directory is the heart of your extension! This contains the
|
|
||||||
source code that will be run when your extension functions are invoked. You can replace the
|
|
||||||
contents of this directory with your own code.
|
|
||||||
|
|
||||||
There are a few things to keep in mind when writing your extension code:
|
|
||||||
|
|
||||||
- Most Jan Extension functions are processed asynchronously.
|
|
||||||
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
import { events, MessageEvent, MessageRequest } from '@janhq/core'
|
|
||||||
|
|
||||||
function onStart(): Promise<any> {
|
|
||||||
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
|
|
||||||
this.inference(data)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
For more information about the Jan Extension Core module, see the
|
|
||||||
[documentation](https://github.com/menloresearch/jan/blob/main/core/README.md).
|
|
||||||
|
|
||||||
So, what are you waiting for? Go ahead and start customizing your extension!
|
|
||||||
@ -1 +0,0 @@
|
|||||||
1.0.13-rc9
|
|
||||||
@ -1,40 +0,0 @@
|
|||||||
@echo off
|
|
||||||
set BIN_PATH=./bin
|
|
||||||
set SHARED_PATH=./../../electron/shared
|
|
||||||
set /p CORTEX_VERSION=<./bin/version.txt
|
|
||||||
set ENGINE_VERSION=b5509
|
|
||||||
|
|
||||||
@REM Download llama.cpp binaries
|
|
||||||
set DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
|
||||||
set DOWNLOAD_GGML_URL=https://github.com/ggml-org/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
|
||||||
set CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%
|
|
||||||
set SUBFOLDERS=win-noavx-cuda-cu12.0-x64 win-noavx-cuda-cu11.7-x64 win-avx2-cuda-cu12.0-x64 win-avx2-cuda-cu11.7-x64 win-noavx-x64 win-avx-x64 win-avx2-x64 win-avx512-x64 win-vulkan-x64
|
|
||||||
|
|
||||||
call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/menloresearch/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
|
|
||||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-cu12.0-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx2-cuda-cu12.0-x64/%ENGINE_VERSION%
|
|
||||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-cu11.7-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx2-cuda-cu11.7-x64/%ENGINE_VERSION%
|
|
||||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-cu12.0-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-noavx-cuda-cu12.0-x64/%ENGINE_VERSION%
|
|
||||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-cu11.7-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-noavx-cuda-cu11.7-x64/%ENGINE_VERSION%
|
|
||||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-noavx-x64/%ENGINE_VERSION%
|
|
||||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx-x64/%ENGINE_VERSION%
|
|
||||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx2-x64/%ENGINE_VERSION%
|
|
||||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx512-x64/%ENGINE_VERSION%
|
|
||||||
call .\node_modules\.bin\download %DOWNLOAD_GGML_URL%-vulkan-x64.zip -e --strip 1 -o %SHARED_PATH%/engines/llama.cpp/win-vulkan-x64/%ENGINE_VERSION%
|
|
||||||
call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cudart-llama-bin-win-cu12.0-x64.tar.gz -e --strip 1 -o %BIN_PATH%
|
|
||||||
call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cudart-llama-bin-win-cu11.7-x64.tar.gz -e --strip 1 -o %BIN_PATH%
|
|
||||||
|
|
||||||
move %BIN_PATH%\cortex-server-beta.exe %BIN_PATH%\cortex-server.exe
|
|
||||||
del %BIN_PATH%\cortex-beta.exe
|
|
||||||
del %BIN_PATH%\cortex.exe
|
|
||||||
|
|
||||||
@REM Loop through each folder and move DLLs
|
|
||||||
for %%F in (%SUBFOLDERS%) do (
|
|
||||||
echo Processing folder: %SHARED_PATH%\engines\llama.cpp\%%F\%ENGINE_VERSION%
|
|
||||||
|
|
||||||
@REM Move cu*.dll files
|
|
||||||
for %%D in (%SHARED_PATH%\engines\llama.cpp\%%F\%ENGINE_VERSION%\cu*.dll) do (
|
|
||||||
move "%%D" "%BIN_PATH%"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
echo DLL files moved successfully.
|
|
||||||
@ -1,50 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Read CORTEX_VERSION
|
|
||||||
CORTEX_VERSION=$(cat ./bin/version.txt)
|
|
||||||
ENGINE_VERSION=b5509
|
|
||||||
CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
|
|
||||||
ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
|
|
||||||
CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}
|
|
||||||
BIN_PATH=./bin
|
|
||||||
SHARED_PATH="../../electron/shared"
|
|
||||||
# Detect platform
|
|
||||||
OS_TYPE=$(uname)
|
|
||||||
|
|
||||||
if [ "$OS_TYPE" == "Linux" ]; then
|
|
||||||
# Linux downloads
|
|
||||||
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
|
|
||||||
mv ./bin/cortex-server-beta ./bin/cortex-server
|
|
||||||
rm -rf ./bin/cortex
|
|
||||||
rm -rf ./bin/cortex-beta
|
|
||||||
chmod +x "./bin/cortex-server"
|
|
||||||
|
|
||||||
# Download engines for Linux
|
|
||||||
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-noavx-x64/${ENGINE_VERSION}" 1
|
|
||||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx-x64/${ENGINE_VERSION}" 1
|
|
||||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx2-x64/${ENGINE_VERSION}" 1
|
|
||||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx512-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx512-x64/${ENGINE_VERSION}" 1
|
|
||||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-cuda-cu12.0-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx2-cuda-cu12.0-x64/${ENGINE_VERSION}" 1
|
|
||||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-cuda-cu11.7-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx2-cuda-cu11.7-x64/${ENGINE_VERSION}" 1
|
|
||||||
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-cuda-cu12.0-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-noavx-cuda-cu12.0-x64/${ENGINE_VERSION}" 1
|
|
||||||
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-cuda-cu11.7-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-noavx-cuda-cu11.7-x64/${ENGINE_VERSION}" 1
|
|
||||||
download "${ENGINE_DOWNLOAD_URL}-linux-vulkan-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-vulkan-x64/${ENGINE_VERSION}" 1
|
|
||||||
download "${CUDA_DOWNLOAD_URL}/cudart-llama-bin-linux-cu12.0-x64.tar.gz" -e --strip 1 -o "${BIN_PATH}" 1
|
|
||||||
download "${CUDA_DOWNLOAD_URL}/cudart-llama-bin-linux-cu11.7-x64.tar.gz" -e --strip 1 -o "${BIN_PATH}" 1
|
|
||||||
|
|
||||||
elif [ "$OS_TYPE" == "Darwin" ]; then
|
|
||||||
# macOS downloads
|
|
||||||
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1
|
|
||||||
mv ./bin/cortex-server-beta ./bin/cortex-server
|
|
||||||
rm -rf ./bin/cortex
|
|
||||||
rm -rf ./bin/cortex-beta
|
|
||||||
chmod +x "./bin/cortex-server"
|
|
||||||
|
|
||||||
# Download engines for macOS
|
|
||||||
download "${ENGINE_DOWNLOAD_URL}-macos-arm64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/macos-arm64/${ENGINE_VERSION}"
|
|
||||||
download "${ENGINE_DOWNLOAD_URL}-macos-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/macos-x64/${ENGINE_VERSION}"
|
|
||||||
|
|
||||||
else
|
|
||||||
echo "Unsupported operating system: $OS_TYPE"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
@ -1,67 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "@janhq/inference-cortex-extension",
|
|
||||||
"productName": "Cortex Inference Engine",
|
|
||||||
"version": "1.0.25",
|
|
||||||
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
|
||||||
"main": "dist/index.js",
|
|
||||||
"node": "dist/node/index.cjs.js",
|
|
||||||
"author": "Jan <service@jan.ai>",
|
|
||||||
"license": "AGPL-3.0",
|
|
||||||
"scripts": {
|
|
||||||
"test": "vitest run",
|
|
||||||
"build": "rolldown -c rolldown.config.mjs",
|
|
||||||
"downloadcortex:linux:darwin": "./download.sh",
|
|
||||||
"downloadcortex:win32": "download.bat",
|
|
||||||
"downloadcortex": "run-script-os",
|
|
||||||
"build:publish:darwin": "rimraf *.tgz --glob || true && yarn build && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
|
||||||
"build:publish:win32:linux": "rimraf *.tgz --glob || true && yarn build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
|
||||||
"build:publish": "run-script-os"
|
|
||||||
},
|
|
||||||
"exports": {
|
|
||||||
".": "./dist/index.js",
|
|
||||||
"./main": "./dist/node/index.cjs.js"
|
|
||||||
},
|
|
||||||
"devDependencies": {
|
|
||||||
"@jest/globals": "^29.7.0",
|
|
||||||
"@types/decompress": "^4.2.7",
|
|
||||||
"@types/jest": "^29.5.12",
|
|
||||||
"@types/node": "^20.11.4",
|
|
||||||
"@types/os-utils": "^0.0.4",
|
|
||||||
"@types/tcp-port-used": "^1.0.4",
|
|
||||||
"cpx": "^1.5.0",
|
|
||||||
"download-cli": "^1.1.1",
|
|
||||||
"jest": "^29.7.0",
|
|
||||||
"rimraf": "^3.0.2",
|
|
||||||
"rolldown": "1.0.0-beta.1",
|
|
||||||
"run-script-os": "^1.1.6",
|
|
||||||
"ts-jest": "^29.1.2",
|
|
||||||
"typescript": "^5.3.3",
|
|
||||||
"vitest": "^3.0.8"
|
|
||||||
},
|
|
||||||
"dependencies": {
|
|
||||||
"@janhq/core": "../../core/package.tgz",
|
|
||||||
"fetch-retry": "^5.0.6",
|
|
||||||
"ky": "^1.7.2",
|
|
||||||
"p-queue": "^8.0.1",
|
|
||||||
"rxjs": "^7.8.1",
|
|
||||||
"ulidx": "^2.3.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=18.0.0"
|
|
||||||
},
|
|
||||||
"files": [
|
|
||||||
"dist/*",
|
|
||||||
"package.json",
|
|
||||||
"README.md"
|
|
||||||
],
|
|
||||||
"bundleDependencies": [
|
|
||||||
"tcp-port-used",
|
|
||||||
"fetch-retry",
|
|
||||||
"@janhq/core",
|
|
||||||
"decompress"
|
|
||||||
],
|
|
||||||
"installConfig": {
|
|
||||||
"hoistingLimits": "workspaces"
|
|
||||||
},
|
|
||||||
"packageManager": "yarn@4.5.3"
|
|
||||||
}
|
|
||||||
@ -1,126 +0,0 @@
|
|||||||
[
|
|
||||||
{
|
|
||||||
"key": "auto_unload_models",
|
|
||||||
"title": "Auto-Unload Old Models",
|
|
||||||
"description": "Automatically unloads models that are not in use to free up memory. Ensure only one model is loaded at a time.",
|
|
||||||
"controllerType": "checkbox",
|
|
||||||
"controllerProps": {
|
|
||||||
"value": true
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"key": "context_shift",
|
|
||||||
"title": "Context Shift",
|
|
||||||
"description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.",
|
|
||||||
"controllerType": "checkbox",
|
|
||||||
"controllerProps": {
|
|
||||||
"value": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"key": "cont_batching",
|
|
||||||
"title": "Continuous Batching",
|
|
||||||
"description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
|
|
||||||
"controllerType": "checkbox",
|
|
||||||
"controllerProps": {
|
|
||||||
"value": ""
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"key": "n_parallel",
|
|
||||||
"title": "Parallel Operations",
|
|
||||||
"description": "Number of prompts that can be processed simultaneously by the model.",
|
|
||||||
"controllerType": "input",
|
|
||||||
"controllerProps": {
|
|
||||||
"value": "",
|
|
||||||
"placeholder": "1",
|
|
||||||
"type": "number",
|
|
||||||
"textAlign": "right"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"key": "cpu_threads",
|
|
||||||
"title": "CPU Threads",
|
|
||||||
"description": "Number of CPU cores used for model processing when running without GPU.",
|
|
||||||
"controllerType": "input",
|
|
||||||
"controllerProps": {
|
|
||||||
"value": "",
|
|
||||||
"placeholder": "-1 (auto-detect)",
|
|
||||||
"type": "number",
|
|
||||||
"textAlign": "right"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"key": "threads_batch",
|
|
||||||
"title": "Threads (Batch)",
|
|
||||||
"description": "Number of threads for batch and prompt processing (default: same as Threads).",
|
|
||||||
"controllerType": "input",
|
|
||||||
"controllerProps": {
|
|
||||||
"value": "",
|
|
||||||
"placeholder": "-1 (same as Threads)",
|
|
||||||
"type": "number"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"key": "flash_attn",
|
|
||||||
"title": "Flash Attention",
|
|
||||||
"description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
|
|
||||||
"controllerType": "checkbox",
|
|
||||||
"controllerProps": {
|
|
||||||
"value": true
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"key": "caching_enabled",
|
|
||||||
"title": "Caching",
|
|
||||||
"description": "Stores recent prompts and responses to improve speed when similar questions are asked.",
|
|
||||||
"controllerType": "checkbox",
|
|
||||||
"controllerProps": {
|
|
||||||
"value": true
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"key": "cache_type",
|
|
||||||
"title": "KV Cache Type",
|
|
||||||
"description": "Controls memory usage and precision trade-off.",
|
|
||||||
"controllerType": "dropdown",
|
|
||||||
"controllerProps": {
|
|
||||||
"value": "q8_0",
|
|
||||||
"options": [
|
|
||||||
{
|
|
||||||
"value": "q4_0",
|
|
||||||
"name": "q4_0"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"value": "q8_0",
|
|
||||||
"name": "q8_0"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"value": "f16",
|
|
||||||
"name": "f16"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"key": "use_mmap",
|
|
||||||
"title": "mmap",
|
|
||||||
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
|
|
||||||
"controllerType": "checkbox",
|
|
||||||
"controllerProps": {
|
|
||||||
"value": true
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"key": "hugging-face-access-token",
|
|
||||||
"title": "Hugging Face Access Token",
|
|
||||||
"description": "Access tokens programmatically authenticate your identity to the Hugging Face Hub, allowing applications to perform specific actions specified by the scope of permissions granted.",
|
|
||||||
"controllerType": "input",
|
|
||||||
"controllerProps": {
|
|
||||||
"value": "",
|
|
||||||
"placeholder": "hf_**********************************",
|
|
||||||
"type": "password",
|
|
||||||
"inputActions": ["unobscure", "copy"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
@ -1,44 +0,0 @@
|
|||||||
import { defineConfig } from 'rolldown'
|
|
||||||
import packageJson from './package.json' with { type: 'json' }
|
|
||||||
import defaultSettingJson from './resources/default_settings.json' with { type: 'json' }
|
|
||||||
|
|
||||||
export default defineConfig([
|
|
||||||
{
|
|
||||||
input: 'src/index.ts',
|
|
||||||
output: {
|
|
||||||
format: 'esm',
|
|
||||||
file: 'dist/index.js',
|
|
||||||
},
|
|
||||||
platform: 'browser',
|
|
||||||
define: {
|
|
||||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
|
||||||
SETTINGS: JSON.stringify(defaultSettingJson),
|
|
||||||
CORTEX_API_URL: JSON.stringify(
|
|
||||||
`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
|
||||||
),
|
|
||||||
CORTEX_SOCKET_URL: JSON.stringify(
|
|
||||||
`ws://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
|
||||||
),
|
|
||||||
CORTEX_ENGINE_VERSION: JSON.stringify('b5509'),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: 'src/node/index.ts',
|
|
||||||
external: ['@janhq/core/node'],
|
|
||||||
output: {
|
|
||||||
format: 'cjs',
|
|
||||||
file: 'dist/node/index.cjs.js',
|
|
||||||
sourcemap: false,
|
|
||||||
inlineDynamicImports: true,
|
|
||||||
},
|
|
||||||
resolve: {
|
|
||||||
extensions: ['.js', '.ts', '.json'],
|
|
||||||
},
|
|
||||||
define: {
|
|
||||||
CORTEX_API_URL: JSON.stringify(
|
|
||||||
`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
|
||||||
),
|
|
||||||
},
|
|
||||||
platform: 'node',
|
|
||||||
},
|
|
||||||
])
|
|
||||||
@ -1,5 +0,0 @@
|
|||||||
declare const NODE: string
|
|
||||||
declare const CORTEX_API_URL: string
|
|
||||||
declare const CORTEX_SOCKET_URL: string
|
|
||||||
declare const CORTEX_ENGINE_VERSION: string
|
|
||||||
declare const SETTINGS: any
|
|
||||||
@ -1,452 +0,0 @@
|
|||||||
import { describe, beforeEach, it, expect, vi, afterEach } from 'vitest'
|
|
||||||
|
|
||||||
// Must mock before imports
|
|
||||||
vi.mock('@janhq/core', () => {
|
|
||||||
return {
|
|
||||||
executeOnMain: vi.fn().mockResolvedValue({}),
|
|
||||||
events: {
|
|
||||||
emit: vi.fn()
|
|
||||||
},
|
|
||||||
extractModelLoadParams: vi.fn().mockReturnValue({}),
|
|
||||||
ModelEvent: {
|
|
||||||
OnModelsUpdate: 'OnModelsUpdate',
|
|
||||||
OnModelStopped: 'OnModelStopped'
|
|
||||||
},
|
|
||||||
EngineEvent: {
|
|
||||||
OnEngineUpdate: 'OnEngineUpdate'
|
|
||||||
},
|
|
||||||
InferenceEngine: {
|
|
||||||
cortex: 'cortex',
|
|
||||||
nitro: 'nitro',
|
|
||||||
cortex_llamacpp: 'cortex_llamacpp'
|
|
||||||
},
|
|
||||||
LocalOAIEngine: class LocalOAIEngine {
|
|
||||||
onLoad() {}
|
|
||||||
onUnload() {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
import JanInferenceCortexExtension, { Settings } from './index'
|
|
||||||
import { InferenceEngine, ModelEvent, EngineEvent, executeOnMain, events } from '@janhq/core'
|
|
||||||
import ky from 'ky'
|
|
||||||
|
|
||||||
// Mock global variables
|
|
||||||
const CORTEX_API_URL = 'http://localhost:3000'
|
|
||||||
const CORTEX_SOCKET_URL = 'ws://localhost:3000'
|
|
||||||
const SETTINGS = [
|
|
||||||
{ id: 'n_parallel', name: 'Parallel Execution', description: 'Number of parallel executions', type: 'number', value: '4' },
|
|
||||||
{ id: 'cont_batching', name: 'Continuous Batching', description: 'Enable continuous batching', type: 'boolean', value: true },
|
|
||||||
{ id: 'caching_enabled', name: 'Caching', description: 'Enable caching', type: 'boolean', value: true },
|
|
||||||
{ id: 'flash_attn', name: 'Flash Attention', description: 'Enable flash attention', type: 'boolean', value: true },
|
|
||||||
{ id: 'cache_type', name: 'Cache Type', description: 'Type of cache to use', type: 'string', value: 'f16' },
|
|
||||||
{ id: 'use_mmap', name: 'Use Memory Map', description: 'Use memory mapping', type: 'boolean', value: true },
|
|
||||||
{ id: 'cpu_threads', name: 'CPU Threads', description: 'Number of CPU threads', type: 'number', value: '' }
|
|
||||||
]
|
|
||||||
const NODE = 'node'
|
|
||||||
|
|
||||||
// Mock globals
|
|
||||||
vi.stubGlobal('CORTEX_API_URL', CORTEX_API_URL)
|
|
||||||
vi.stubGlobal('CORTEX_SOCKET_URL', CORTEX_SOCKET_URL)
|
|
||||||
vi.stubGlobal('SETTINGS', SETTINGS)
|
|
||||||
vi.stubGlobal('NODE', NODE)
|
|
||||||
vi.stubGlobal('window', {
|
|
||||||
addEventListener: vi.fn()
|
|
||||||
})
|
|
||||||
|
|
||||||
// Mock WebSocket
|
|
||||||
class MockWebSocket {
|
|
||||||
url :string
|
|
||||||
listeners: {}
|
|
||||||
onclose: Function
|
|
||||||
|
|
||||||
constructor(url) {
|
|
||||||
this.url = url
|
|
||||||
this.listeners = {}
|
|
||||||
}
|
|
||||||
|
|
||||||
addEventListener(event, listener) {
|
|
||||||
this.listeners[event] = listener
|
|
||||||
}
|
|
||||||
|
|
||||||
emit(event, data) {
|
|
||||||
if (this.listeners[event]) {
|
|
||||||
this.listeners[event](data)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
close() {
|
|
||||||
if (this.onclose) {
|
|
||||||
this.onclose({ code: 1000 })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mock global WebSocket
|
|
||||||
// @ts-ignore
|
|
||||||
global.WebSocket = vi.fn().mockImplementation((url) => new MockWebSocket(url))
|
|
||||||
|
|
||||||
describe('JanInferenceCortexExtension', () => {
|
|
||||||
let extension
|
|
||||||
|
|
||||||
beforeEach(() => {
|
|
||||||
// Reset mocks
|
|
||||||
vi.clearAllMocks()
|
|
||||||
|
|
||||||
// Create a new instance for each test
|
|
||||||
extension = new JanInferenceCortexExtension()
|
|
||||||
|
|
||||||
// Mock the getSetting method
|
|
||||||
extension.getSetting = vi.fn().mockImplementation((key, defaultValue) => {
|
|
||||||
switch(key) {
|
|
||||||
case Settings.n_parallel:
|
|
||||||
return '4'
|
|
||||||
case Settings.cont_batching:
|
|
||||||
return true
|
|
||||||
case Settings.caching_enabled:
|
|
||||||
return true
|
|
||||||
case Settings.flash_attn:
|
|
||||||
return true
|
|
||||||
case Settings.cache_type:
|
|
||||||
return 'f16'
|
|
||||||
case Settings.use_mmap:
|
|
||||||
return true
|
|
||||||
case Settings.cpu_threads:
|
|
||||||
return ''
|
|
||||||
default:
|
|
||||||
return defaultValue
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Mock methods
|
|
||||||
extension.registerSettings = vi.fn()
|
|
||||||
extension.onLoad = vi.fn()
|
|
||||||
extension.clean = vi.fn().mockResolvedValue({})
|
|
||||||
extension.healthz = vi.fn().mockResolvedValue({})
|
|
||||||
extension.subscribeToEvents = vi.fn()
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('onSettingUpdate', () => {
|
|
||||||
it('should update n_parallel setting correctly', () => {
|
|
||||||
extension.onSettingUpdate(Settings.n_parallel, '8')
|
|
||||||
expect(extension.n_parallel).toBe(8)
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should update cont_batching setting correctly', () => {
|
|
||||||
extension.onSettingUpdate(Settings.cont_batching, false)
|
|
||||||
expect(extension.cont_batching).toBe(false)
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should update caching_enabled setting correctly', () => {
|
|
||||||
extension.onSettingUpdate(Settings.caching_enabled, false)
|
|
||||||
expect(extension.caching_enabled).toBe(false)
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should update flash_attn setting correctly', () => {
|
|
||||||
extension.onSettingUpdate(Settings.flash_attn, false)
|
|
||||||
expect(extension.flash_attn).toBe(false)
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should update cache_type setting correctly', () => {
|
|
||||||
extension.onSettingUpdate(Settings.cache_type, 'f32')
|
|
||||||
expect(extension.cache_type).toBe('f32')
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should update use_mmap setting correctly', () => {
|
|
||||||
extension.onSettingUpdate(Settings.use_mmap, false)
|
|
||||||
expect(extension.use_mmap).toBe(false)
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should update cpu_threads setting correctly', () => {
|
|
||||||
extension.onSettingUpdate(Settings.cpu_threads, '4')
|
|
||||||
expect(extension.cpu_threads).toBe(4)
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should not update cpu_threads when value is not a number', () => {
|
|
||||||
extension.cpu_threads = undefined
|
|
||||||
extension.onSettingUpdate(Settings.cpu_threads, 'not-a-number')
|
|
||||||
expect(extension.cpu_threads).toBeUndefined()
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('onUnload', () => {
|
|
||||||
it('should clean up resources correctly', async () => {
|
|
||||||
extension.shouldReconnect = true
|
|
||||||
|
|
||||||
await extension.onUnload()
|
|
||||||
|
|
||||||
expect(extension.shouldReconnect).toBe(false)
|
|
||||||
expect(extension.clean).toHaveBeenCalled()
|
|
||||||
expect(executeOnMain).toHaveBeenCalledWith(NODE, 'dispose')
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('loadModel', () => {
|
|
||||||
it('should remove llama_model_path and mmproj from settings', async () => {
|
|
||||||
// Setup
|
|
||||||
const model = {
|
|
||||||
id: 'test-model',
|
|
||||||
settings: {
|
|
||||||
llama_model_path: '/path/to/model',
|
|
||||||
mmproj: '/path/to/mmproj',
|
|
||||||
some_setting: 'value'
|
|
||||||
},
|
|
||||||
engine: InferenceEngine.cortex_llamacpp
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mock ky.post
|
|
||||||
vi.spyOn(ky, 'post').mockImplementation(() => ({
|
|
||||||
// @ts-ignore
|
|
||||||
json: () => Promise.resolve({}),
|
|
||||||
catch: () => ({
|
|
||||||
finally: () => ({
|
|
||||||
// @ts-ignore
|
|
||||||
then: () => Promise.resolve({})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}))
|
|
||||||
|
|
||||||
// Setup queue for testing
|
|
||||||
extension.queue = { add: vi.fn(fn => fn()) }
|
|
||||||
|
|
||||||
// Execute
|
|
||||||
await extension.loadModel(model)
|
|
||||||
|
|
||||||
// Verify settings were filtered
|
|
||||||
expect(model.settings).not.toHaveProperty('llama_model_path')
|
|
||||||
expect(model.settings).not.toHaveProperty('mmproj')
|
|
||||||
expect(model.settings).toHaveProperty('some_setting')
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should convert nitro to cortex_llamacpp engine', async () => {
|
|
||||||
// Setup
|
|
||||||
const model = {
|
|
||||||
id: 'test-model',
|
|
||||||
settings: {},
|
|
||||||
engine: InferenceEngine.nitro
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mock ky.post
|
|
||||||
const mockKyPost = vi.spyOn(ky, 'post').mockImplementation(() => ({
|
|
||||||
// @ts-ignore
|
|
||||||
json: () => Promise.resolve({}),
|
|
||||||
catch: () => ({
|
|
||||||
finally: () => ({
|
|
||||||
// @ts-ignore
|
|
||||||
then: () => Promise.resolve({})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}))
|
|
||||||
|
|
||||||
// Setup queue for testing
|
|
||||||
extension.queue = { add: vi.fn(fn => fn()) }
|
|
||||||
|
|
||||||
// Execute
|
|
||||||
await extension.loadModel(model)
|
|
||||||
|
|
||||||
// Verify API call
|
|
||||||
expect(mockKyPost).toHaveBeenCalledWith(
|
|
||||||
`${CORTEX_API_URL}/v1/models/start`,
|
|
||||||
expect.objectContaining({
|
|
||||||
json: expect.objectContaining({
|
|
||||||
engine: InferenceEngine.cortex_llamacpp
|
|
||||||
})
|
|
||||||
})
|
|
||||||
)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('unloadModel', () => {
|
|
||||||
it('should call the correct API endpoint and abort loading if in progress', async () => {
|
|
||||||
// Setup
|
|
||||||
const model = { id: 'test-model' }
|
|
||||||
const mockAbort = vi.fn()
|
|
||||||
extension.abortControllers.set(model.id, { abort: mockAbort })
|
|
||||||
|
|
||||||
// Mock ky.post
|
|
||||||
const mockKyPost = vi.spyOn(ky, 'post').mockImplementation(() => ({
|
|
||||||
// @ts-ignore
|
|
||||||
json: () => Promise.resolve({}),
|
|
||||||
finally: () => ({
|
|
||||||
// @ts-ignore
|
|
||||||
then: () => Promise.resolve({})
|
|
||||||
})
|
|
||||||
}))
|
|
||||||
|
|
||||||
// Execute
|
|
||||||
await extension.unloadModel(model)
|
|
||||||
|
|
||||||
// Verify API call
|
|
||||||
expect(mockKyPost).toHaveBeenCalledWith(
|
|
||||||
`${CORTEX_API_URL}/v1/models/stop`,
|
|
||||||
expect.objectContaining({
|
|
||||||
json: { model: model.id }
|
|
||||||
})
|
|
||||||
)
|
|
||||||
|
|
||||||
// Verify abort controller was called
|
|
||||||
expect(mockAbort).toHaveBeenCalled()
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('clean', () => {
|
|
||||||
it('should make a DELETE request to destroy process manager', async () => {
|
|
||||||
// Mock the ky.delete function directly
|
|
||||||
const mockDelete = vi.fn().mockReturnValue({
|
|
||||||
catch: vi.fn().mockReturnValue(Promise.resolve({}))
|
|
||||||
})
|
|
||||||
|
|
||||||
// Replace the original implementation
|
|
||||||
vi.spyOn(ky, 'delete').mockImplementation(mockDelete)
|
|
||||||
|
|
||||||
// Override the clean method to use the real implementation
|
|
||||||
// @ts-ignore
|
|
||||||
extension.clean = JanInferenceCortexExtension.prototype.clean
|
|
||||||
|
|
||||||
// Call the method
|
|
||||||
await extension.clean()
|
|
||||||
|
|
||||||
// Verify the correct API call was made
|
|
||||||
expect(mockDelete).toHaveBeenCalledWith(
|
|
||||||
`${CORTEX_API_URL}/processmanager/destroy`,
|
|
||||||
expect.objectContaining({
|
|
||||||
timeout: 2000,
|
|
||||||
retry: expect.objectContaining({
|
|
||||||
limit: 0
|
|
||||||
})
|
|
||||||
})
|
|
||||||
)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('WebSocket events', () => {
|
|
||||||
it('should handle WebSocket events correctly', () => {
|
|
||||||
// Create a mock implementation for subscribeToEvents that stores the socket
|
|
||||||
let messageHandler;
|
|
||||||
let closeHandler;
|
|
||||||
|
|
||||||
// Override the private method
|
|
||||||
extension.subscribeToEvents = function() {
|
|
||||||
this.socket = new MockWebSocket('ws://localhost:3000/events');
|
|
||||||
this.socket.addEventListener('message', (event) => {
|
|
||||||
const data = JSON.parse(event.data);
|
|
||||||
|
|
||||||
// Store for testing
|
|
||||||
messageHandler = data;
|
|
||||||
|
|
||||||
const transferred = data.task.items.reduce(
|
|
||||||
(acc, cur) => acc + cur.downloadedBytes,
|
|
||||||
0
|
|
||||||
);
|
|
||||||
const total = data.task.items.reduce(
|
|
||||||
(acc, cur) => acc + cur.bytes,
|
|
||||||
0
|
|
||||||
);
|
|
||||||
const percent = total > 0 ? transferred / total : 0;
|
|
||||||
|
|
||||||
events.emit(
|
|
||||||
data.type === 'DownloadUpdated' ? 'onFileDownloadUpdate' :
|
|
||||||
data.type === 'DownloadSuccess' ? 'onFileDownloadSuccess' :
|
|
||||||
data.type,
|
|
||||||
{
|
|
||||||
modelId: data.task.id,
|
|
||||||
percent: percent,
|
|
||||||
size: {
|
|
||||||
transferred: transferred,
|
|
||||||
total: total,
|
|
||||||
},
|
|
||||||
downloadType: data.task.type,
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
if (data.task.type === 'Engine') {
|
|
||||||
events.emit(EngineEvent.OnEngineUpdate, {
|
|
||||||
type: data.type,
|
|
||||||
percent: percent,
|
|
||||||
id: data.task.id,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
else if (data.type === 'DownloadSuccess') {
|
|
||||||
setTimeout(() => {
|
|
||||||
events.emit(ModelEvent.OnModelsUpdate, {
|
|
||||||
fetch: true,
|
|
||||||
});
|
|
||||||
}, 500);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
this.socket.onclose = (event) => {
|
|
||||||
closeHandler = event;
|
|
||||||
// Notify app to update model running state
|
|
||||||
events.emit(ModelEvent.OnModelStopped, {});
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
// Setup queue
|
|
||||||
extension.queue = {
|
|
||||||
add: vi.fn(fn => fn())
|
|
||||||
};
|
|
||||||
|
|
||||||
// Execute the method
|
|
||||||
extension.subscribeToEvents();
|
|
||||||
|
|
||||||
// Simulate a message event
|
|
||||||
extension.socket.listeners.message({
|
|
||||||
data: JSON.stringify({
|
|
||||||
type: 'DownloadUpdated',
|
|
||||||
task: {
|
|
||||||
id: 'test-model',
|
|
||||||
type: 'Model',
|
|
||||||
items: [
|
|
||||||
{ downloadedBytes: 50, bytes: 100 }
|
|
||||||
]
|
|
||||||
}
|
|
||||||
})
|
|
||||||
});
|
|
||||||
|
|
||||||
// Verify event emission
|
|
||||||
expect(events.emit).toHaveBeenCalledWith(
|
|
||||||
'onFileDownloadUpdate',
|
|
||||||
expect.objectContaining({
|
|
||||||
modelId: 'test-model',
|
|
||||||
percent: 0.5
|
|
||||||
})
|
|
||||||
);
|
|
||||||
|
|
||||||
// Simulate a download success event
|
|
||||||
vi.useFakeTimers();
|
|
||||||
extension.socket.listeners.message({
|
|
||||||
data: JSON.stringify({
|
|
||||||
type: 'DownloadSuccess',
|
|
||||||
task: {
|
|
||||||
id: 'test-model',
|
|
||||||
type: 'Model',
|
|
||||||
items: [
|
|
||||||
{ downloadedBytes: 100, bytes: 100 }
|
|
||||||
]
|
|
||||||
}
|
|
||||||
})
|
|
||||||
});
|
|
||||||
|
|
||||||
// Fast-forward time to trigger the timeout
|
|
||||||
vi.advanceTimersByTime(500);
|
|
||||||
|
|
||||||
// Verify the ModelEvent.OnModelsUpdate event was emitted
|
|
||||||
expect(events.emit).toHaveBeenCalledWith(
|
|
||||||
ModelEvent.OnModelsUpdate,
|
|
||||||
{ fetch: true }
|
|
||||||
);
|
|
||||||
|
|
||||||
vi.useRealTimers();
|
|
||||||
|
|
||||||
// Trigger websocket close
|
|
||||||
extension.socket.onclose({ code: 1000 });
|
|
||||||
|
|
||||||
// Verify OnModelStopped event was emitted
|
|
||||||
expect(events.emit).toHaveBeenCalledWith(
|
|
||||||
ModelEvent.OnModelStopped,
|
|
||||||
{}
|
|
||||||
);
|
|
||||||
});
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@ -1,435 +0,0 @@
|
|||||||
/**
|
|
||||||
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
|
|
||||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
|
||||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
|
||||||
* @version 1.0.0
|
|
||||||
* @module inference-extension/src/index
|
|
||||||
*/
|
|
||||||
|
|
||||||
import {
|
|
||||||
Model,
|
|
||||||
EngineEvent,
|
|
||||||
LocalOAIEngine,
|
|
||||||
extractModelLoadParams,
|
|
||||||
events,
|
|
||||||
ModelEvent,
|
|
||||||
} from '@janhq/core'
|
|
||||||
import ky, { KyInstance } from 'ky'
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Event subscription types of Downloader
|
|
||||||
*/
|
|
||||||
enum DownloadTypes {
|
|
||||||
DownloadUpdated = 'onFileDownloadUpdate',
|
|
||||||
DownloadError = 'onFileDownloadError',
|
|
||||||
DownloadSuccess = 'onFileDownloadSuccess',
|
|
||||||
DownloadStopped = 'onFileDownloadStopped',
|
|
||||||
DownloadStarted = 'onFileDownloadStarted',
|
|
||||||
}
|
|
||||||
|
|
||||||
enum Settings {
|
|
||||||
n_parallel = 'n_parallel',
|
|
||||||
cont_batching = 'cont_batching',
|
|
||||||
caching_enabled = 'caching_enabled',
|
|
||||||
flash_attn = 'flash_attn',
|
|
||||||
cache_type = 'cache_type',
|
|
||||||
use_mmap = 'use_mmap',
|
|
||||||
cpu_threads = 'cpu_threads',
|
|
||||||
huggingfaceToken = 'hugging-face-access-token',
|
|
||||||
auto_unload_models = 'auto_unload_models',
|
|
||||||
context_shift = 'context_shift',
|
|
||||||
}
|
|
||||||
|
|
||||||
type LoadedModelResponse = { data: { engine: string; id: string }[] }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
|
||||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
|
||||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
|
||||||
*/
|
|
||||||
export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
|
||||||
nodeModule: string = 'node'
|
|
||||||
|
|
||||||
provider: string = 'cortex'
|
|
||||||
|
|
||||||
shouldReconnect = true
|
|
||||||
|
|
||||||
/** Default Engine model load settings */
|
|
||||||
n_parallel?: number
|
|
||||||
cont_batching: boolean = false
|
|
||||||
caching_enabled: boolean = true
|
|
||||||
flash_attn: boolean = true
|
|
||||||
use_mmap: boolean = true
|
|
||||||
cache_type: string = 'q8'
|
|
||||||
cpu_threads?: number
|
|
||||||
auto_unload_models: boolean = true
|
|
||||||
reasoning_budget = -1 // Default reasoning budget in seconds
|
|
||||||
context_shift = false
|
|
||||||
/**
|
|
||||||
* The URL for making inference requests.
|
|
||||||
*/
|
|
||||||
inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Socket instance of events subscription
|
|
||||||
*/
|
|
||||||
socket?: WebSocket = undefined
|
|
||||||
|
|
||||||
abortControllers = new Map<string, AbortController>()
|
|
||||||
|
|
||||||
api?: KyInstance
|
|
||||||
/**
|
|
||||||
* Get the API instance
|
|
||||||
* @returns
|
|
||||||
*/
|
|
||||||
async apiInstance(): Promise<KyInstance> {
|
|
||||||
if (this.api) return this.api
|
|
||||||
const apiKey = await window.core?.api.appToken()
|
|
||||||
this.api = ky.extend({
|
|
||||||
prefixUrl: CORTEX_API_URL,
|
|
||||||
headers: apiKey
|
|
||||||
? {
|
|
||||||
Authorization: `Bearer ${apiKey}`,
|
|
||||||
}
|
|
||||||
: {},
|
|
||||||
retry: 10,
|
|
||||||
})
|
|
||||||
return this.api
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Authorization headers for the API requests.
|
|
||||||
* @returns
|
|
||||||
*/
|
|
||||||
headers(): Promise<HeadersInit> {
|
|
||||||
return window.core?.api.appToken().then((token: string) => ({
|
|
||||||
Authorization: `Bearer ${token}`,
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Called when the extension is loaded.
|
|
||||||
*/
|
|
||||||
async onLoad() {
|
|
||||||
super.onLoad()
|
|
||||||
|
|
||||||
// Register Settings
|
|
||||||
this.registerSettings(SETTINGS)
|
|
||||||
|
|
||||||
const numParallel = await this.getSetting<string>(Settings.n_parallel, '')
|
|
||||||
if (numParallel.length > 0 && parseInt(numParallel) > 0) {
|
|
||||||
this.n_parallel = parseInt(numParallel)
|
|
||||||
}
|
|
||||||
if (this.n_parallel && this.n_parallel > 1)
|
|
||||||
this.cont_batching = await this.getSetting<boolean>(
|
|
||||||
Settings.cont_batching,
|
|
||||||
false
|
|
||||||
)
|
|
||||||
this.caching_enabled = await this.getSetting<boolean>(
|
|
||||||
Settings.caching_enabled,
|
|
||||||
true
|
|
||||||
)
|
|
||||||
this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
|
|
||||||
this.context_shift = await this.getSetting<boolean>(
|
|
||||||
Settings.context_shift,
|
|
||||||
false
|
|
||||||
)
|
|
||||||
this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
|
|
||||||
if (this.caching_enabled)
|
|
||||||
this.cache_type = await this.getSetting<string>(Settings.cache_type, 'q8')
|
|
||||||
this.auto_unload_models = await this.getSetting<boolean>(
|
|
||||||
Settings.auto_unload_models,
|
|
||||||
true
|
|
||||||
)
|
|
||||||
const threads_number = Number(
|
|
||||||
await this.getSetting<string>(Settings.cpu_threads, '')
|
|
||||||
)
|
|
||||||
|
|
||||||
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
|
|
||||||
|
|
||||||
const huggingfaceToken = await this.getSetting<string>(
|
|
||||||
Settings.huggingfaceToken,
|
|
||||||
''
|
|
||||||
)
|
|
||||||
if (huggingfaceToken) {
|
|
||||||
this.updateCortexConfig({ huggingface_token: huggingfaceToken })
|
|
||||||
}
|
|
||||||
this.subscribeToEvents()
|
|
||||||
|
|
||||||
window.addEventListener('beforeunload', () => {
|
|
||||||
this.clean()
|
|
||||||
})
|
|
||||||
|
|
||||||
// Migrate configs
|
|
||||||
if (!localStorage.getItem('cortex_migration_completed')) {
|
|
||||||
const config = await this.getCortexConfig()
|
|
||||||
console.log('Start cortex.cpp migration', config)
|
|
||||||
if (config && config.huggingface_token) {
|
|
||||||
this.updateSettings([
|
|
||||||
{
|
|
||||||
key: Settings.huggingfaceToken,
|
|
||||||
controllerProps: {
|
|
||||||
value: config.huggingface_token,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
])
|
|
||||||
this.updateCortexConfig({
|
|
||||||
huggingface_token: config.huggingface_token,
|
|
||||||
})
|
|
||||||
localStorage.setItem('cortex_migration_completed', 'true')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async onUnload() {
|
|
||||||
console.log('Clean up cortex.cpp services')
|
|
||||||
this.shouldReconnect = false
|
|
||||||
this.clean()
|
|
||||||
super.onUnload()
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Subscribe to settings update and make change accordingly
|
|
||||||
* @param key
|
|
||||||
* @param value
|
|
||||||
*/
|
|
||||||
onSettingUpdate<T>(key: string, value: T): void {
|
|
||||||
if (key === Settings.n_parallel && typeof value === 'string') {
|
|
||||||
if (value.length > 0 && parseInt(value) > 0) {
|
|
||||||
this.n_parallel = parseInt(value)
|
|
||||||
}
|
|
||||||
} else if (key === Settings.cont_batching && typeof value === 'boolean') {
|
|
||||||
this.cont_batching = value as boolean
|
|
||||||
} else if (key === Settings.caching_enabled && typeof value === 'boolean') {
|
|
||||||
this.caching_enabled = value as boolean
|
|
||||||
} else if (key === Settings.flash_attn && typeof value === 'boolean') {
|
|
||||||
this.flash_attn = value as boolean
|
|
||||||
} else if (key === Settings.cache_type && typeof value === 'string') {
|
|
||||||
this.cache_type = value as string
|
|
||||||
} else if (key === Settings.use_mmap && typeof value === 'boolean') {
|
|
||||||
this.use_mmap = value as boolean
|
|
||||||
} else if (key === Settings.cpu_threads && typeof value === 'string') {
|
|
||||||
const threads_number = Number(value)
|
|
||||||
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
|
|
||||||
} else if (key === Settings.huggingfaceToken) {
|
|
||||||
this.updateCortexConfig({ huggingface_token: value })
|
|
||||||
} else if (key === Settings.auto_unload_models) {
|
|
||||||
this.auto_unload_models = value as boolean
|
|
||||||
} else if (key === Settings.context_shift && typeof value === 'boolean') {
|
|
||||||
this.context_shift = value
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
override async loadModel(
|
|
||||||
model: Partial<Model> & {
|
|
||||||
id: string
|
|
||||||
settings?: object
|
|
||||||
file_path?: string
|
|
||||||
},
|
|
||||||
abortController: AbortController
|
|
||||||
): Promise<void> {
|
|
||||||
// Cortex will handle these settings
|
|
||||||
const { llama_model_path, mmproj, ...settings } = model.settings ?? {}
|
|
||||||
model.settings = settings
|
|
||||||
|
|
||||||
const controller = abortController ?? new AbortController()
|
|
||||||
const { signal } = controller
|
|
||||||
|
|
||||||
this.abortControllers.set(model.id, controller)
|
|
||||||
|
|
||||||
const loadedModels = await this.activeModels()
|
|
||||||
|
|
||||||
// This is to avoid loading the same model multiple times
|
|
||||||
if (loadedModels.some((e: { id: string }) => e.id === model.id)) {
|
|
||||||
console.log(`Model ${model.id} already loaded`)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if (this.auto_unload_models) {
|
|
||||||
// Unload the last used model if it is not the same as the current one
|
|
||||||
for (const lastUsedModel of loadedModels) {
|
|
||||||
if (lastUsedModel.id !== model.id) {
|
|
||||||
console.log(`Unloading last used model: ${lastUsedModel.id}`)
|
|
||||||
await this.unloadModel(lastUsedModel as Model)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const modelSettings = extractModelLoadParams(model.settings)
|
|
||||||
return await this.apiInstance().then((api) =>
|
|
||||||
api
|
|
||||||
.post('v1/models/start', {
|
|
||||||
json: {
|
|
||||||
...modelSettings,
|
|
||||||
model: model.id,
|
|
||||||
engine:
|
|
||||||
model.engine === 'nitro' // Legacy model cache
|
|
||||||
? 'llama-cpp'
|
|
||||||
: model.engine,
|
|
||||||
...(this.n_parallel ? { n_parallel: this.n_parallel } : {}),
|
|
||||||
...(this.use_mmap ? { use_mmap: true } : {}),
|
|
||||||
...(this.caching_enabled ? { caching_enabled: true } : {}),
|
|
||||||
...(this.flash_attn ? { flash_attn: true } : {}),
|
|
||||||
...(this.caching_enabled && this.cache_type
|
|
||||||
? { cache_type: this.cache_type }
|
|
||||||
: {}),
|
|
||||||
...(this.cpu_threads && this.cpu_threads > 0
|
|
||||||
? { cpu_threads: this.cpu_threads }
|
|
||||||
: {}),
|
|
||||||
...(this.cont_batching && this.n_parallel && this.n_parallel > 1
|
|
||||||
? { cont_batching: this.cont_batching }
|
|
||||||
: {}),
|
|
||||||
...(model.id.toLowerCase().includes('jan-nano')
|
|
||||||
? { reasoning_budget: 0 }
|
|
||||||
: { reasoning_budget: this.reasoning_budget }),
|
|
||||||
...(this.context_shift !== true // explicit true required to enable context shift
|
|
||||||
? { 'no-context-shift': true }
|
|
||||||
: {}),
|
|
||||||
...(modelSettings.ngl === -1 || modelSettings.ngl === undefined
|
|
||||||
? { ngl: 100 }
|
|
||||||
: {}),
|
|
||||||
},
|
|
||||||
timeout: false,
|
|
||||||
signal,
|
|
||||||
})
|
|
||||||
.json()
|
|
||||||
.catch(async (e) => {
|
|
||||||
throw (await e.response?.json()) ?? e
|
|
||||||
})
|
|
||||||
.finally(() => this.abortControllers.delete(model.id))
|
|
||||||
.then()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
override async unloadModel(model: Model): Promise<void> {
|
|
||||||
return this.apiInstance().then((api) =>
|
|
||||||
api
|
|
||||||
.post('v1/models/stop', {
|
|
||||||
json: { model: model.id },
|
|
||||||
retry: {
|
|
||||||
limit: 0,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
.json()
|
|
||||||
.finally(() => {
|
|
||||||
this.abortControllers.get(model.id)?.abort()
|
|
||||||
})
|
|
||||||
.then()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
async activeModels(): Promise<(object & { id: string })[]> {
|
|
||||||
return await this.apiInstance()
|
|
||||||
.then((e) =>
|
|
||||||
e.get('inferences/server/models', {
|
|
||||||
retry: {
|
|
||||||
limit: 0, // Do not retry
|
|
||||||
},
|
|
||||||
})
|
|
||||||
)
|
|
||||||
.then((e) => e.json())
|
|
||||||
.then((e) => (e as LoadedModelResponse).data ?? [])
|
|
||||||
.catch(() => [])
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Clean cortex processes
|
|
||||||
* @returns
|
|
||||||
*/
|
|
||||||
private async clean(): Promise<any> {
|
|
||||||
return this.apiInstance()
|
|
||||||
.then((api) =>
|
|
||||||
api.delete('processmanager/destroy', {
|
|
||||||
timeout: 2000, // maximum 2 seconds
|
|
||||||
retry: {
|
|
||||||
limit: 0,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
)
|
|
||||||
.catch(() => {
|
|
||||||
// Do nothing
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Update cortex config
|
|
||||||
* @param body
|
|
||||||
*/
|
|
||||||
private async updateCortexConfig(body: {
|
|
||||||
[key: string]: any
|
|
||||||
}): Promise<void> {
|
|
||||||
return this.apiInstance()
|
|
||||||
.then((api) => api.patch('v1/configs', { json: body }).then(() => {}))
|
|
||||||
.catch((e) => console.debug(e))
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get cortex config
|
|
||||||
* @param body
|
|
||||||
*/
|
|
||||||
private async getCortexConfig(): Promise<any> {
|
|
||||||
return this.apiInstance()
|
|
||||||
.then((api) => api.get('v1/configs').json())
|
|
||||||
.catch((e) => console.debug(e))
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Subscribe to cortex.cpp websocket events
|
|
||||||
*/
|
|
||||||
private subscribeToEvents() {
|
|
||||||
this.socket = new WebSocket(`${CORTEX_SOCKET_URL}/events`)
|
|
||||||
|
|
||||||
this.socket.addEventListener('message', (event) => {
|
|
||||||
const data = JSON.parse(event.data)
|
|
||||||
|
|
||||||
const transferred = data.task.items.reduce(
|
|
||||||
(acc: number, cur: any) => acc + cur.downloadedBytes,
|
|
||||||
0
|
|
||||||
)
|
|
||||||
const total = data.task.items.reduce(
|
|
||||||
(acc: number, cur: any) => acc + cur.bytes,
|
|
||||||
0
|
|
||||||
)
|
|
||||||
const percent = total > 0 ? transferred / total : 0
|
|
||||||
|
|
||||||
events.emit(DownloadTypes[data.type as keyof typeof DownloadTypes], {
|
|
||||||
modelId: data.task.id,
|
|
||||||
percent: percent,
|
|
||||||
size: {
|
|
||||||
transferred: transferred,
|
|
||||||
total: total,
|
|
||||||
},
|
|
||||||
downloadType: data.task.type,
|
|
||||||
})
|
|
||||||
|
|
||||||
if (data.task.type === 'Engine') {
|
|
||||||
events.emit(EngineEvent.OnEngineUpdate, {
|
|
||||||
type: DownloadTypes[data.type as keyof typeof DownloadTypes],
|
|
||||||
percent: percent,
|
|
||||||
id: data.task.id,
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
if (data.type === DownloadTypes.DownloadSuccess) {
|
|
||||||
// Delay for the state update from cortex.cpp
|
|
||||||
// Just to be sure
|
|
||||||
setTimeout(() => {
|
|
||||||
events.emit(ModelEvent.OnModelsUpdate, {
|
|
||||||
fetch: true,
|
|
||||||
})
|
|
||||||
}, 500)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This is to handle the server segfault issue
|
|
||||||
*/
|
|
||||||
this.socket.onclose = (event) => {
|
|
||||||
// Notify app to update model running state
|
|
||||||
events.emit(ModelEvent.OnModelStopped, {})
|
|
||||||
|
|
||||||
// Reconnect to the /events websocket
|
|
||||||
if (this.shouldReconnect) {
|
|
||||||
setTimeout(() => this.subscribeToEvents(), 1000)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,144 +0,0 @@
|
|||||||
import { describe, it, expect, vi } from 'vitest'
|
|
||||||
// Mocks
|
|
||||||
|
|
||||||
const CORTEX_API_URL = 'http://localhost:3000'
|
|
||||||
vi.stubGlobal('CORTEX_API_URL', CORTEX_API_URL)
|
|
||||||
|
|
||||||
vi.mock('@janhq/core/node', (actual) => ({
|
|
||||||
...actual(),
|
|
||||||
getJanDataFolderPath: () => '',
|
|
||||||
appResourcePath: () => '/mock/path',
|
|
||||||
log: vi.fn(),
|
|
||||||
getSystemResourceInfo: () => {
|
|
||||||
return {
|
|
||||||
cpu: {
|
|
||||||
cores: 1,
|
|
||||||
logicalCores: 1,
|
|
||||||
threads: 1,
|
|
||||||
model: 'model',
|
|
||||||
speed: 1,
|
|
||||||
},
|
|
||||||
memory: {
|
|
||||||
total: 1,
|
|
||||||
free: 1,
|
|
||||||
},
|
|
||||||
gpu: {
|
|
||||||
model: 'model',
|
|
||||||
memory: 1,
|
|
||||||
cuda: {
|
|
||||||
version: 'version',
|
|
||||||
devices: 'devices',
|
|
||||||
},
|
|
||||||
vulkan: {
|
|
||||||
version: 'version',
|
|
||||||
devices: 'devices',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
|
|
||||||
vi.mock('fs', () => ({
|
|
||||||
default: {
|
|
||||||
readdirSync: () => [],
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
|
|
||||||
vi.mock('./watchdog', () => {
|
|
||||||
return {
|
|
||||||
ProcessWatchdog: vi.fn().mockImplementation(() => {
|
|
||||||
return {
|
|
||||||
start: vi.fn(),
|
|
||||||
terminate: vi.fn(),
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
vi.mock('child_process', () => ({
|
|
||||||
exec: () => {
|
|
||||||
return {
|
|
||||||
stdout: { on: vi.fn() },
|
|
||||||
stderr: { on: vi.fn() },
|
|
||||||
on: vi.fn(),
|
|
||||||
}
|
|
||||||
},
|
|
||||||
spawn: () => {
|
|
||||||
return {
|
|
||||||
stdout: { on: vi.fn() },
|
|
||||||
stderr: { on: vi.fn() },
|
|
||||||
on: vi.fn(),
|
|
||||||
pid: '111',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
|
|
||||||
import index from './index'
|
|
||||||
|
|
||||||
describe('Cortex extension node interface', () => {
|
|
||||||
describe('run', () => {
|
|
||||||
it('should start the cortex subprocess on macOS', async () => {
|
|
||||||
Object.defineProperty(process, 'platform', {
|
|
||||||
value: 'darwin',
|
|
||||||
})
|
|
||||||
|
|
||||||
const result = await index.run()
|
|
||||||
expect(result).toBeUndefined()
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should start the cortex subprocess on Windows', async () => {
|
|
||||||
Object.defineProperty(process, 'platform', {
|
|
||||||
value: 'win32',
|
|
||||||
})
|
|
||||||
|
|
||||||
const result = await index.run()
|
|
||||||
expect(result).toBeUndefined()
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should set the proper environment variables based on platform', async () => {
|
|
||||||
// Test for Windows
|
|
||||||
Object.defineProperty(process, 'platform', {
|
|
||||||
value: 'win32',
|
|
||||||
})
|
|
||||||
process.env.PATH = '/original/path'
|
|
||||||
|
|
||||||
await index.run()
|
|
||||||
expect(process.env.PATH).toContain('/original/path')
|
|
||||||
|
|
||||||
// Test for non-Windows (macOS/Linux)
|
|
||||||
Object.defineProperty(process, 'platform', {
|
|
||||||
value: 'darwin',
|
|
||||||
})
|
|
||||||
process.env.LD_LIBRARY_PATH = '/original/ld/path'
|
|
||||||
|
|
||||||
await index.run()
|
|
||||||
expect(process.env.LD_LIBRARY_PATH).toContain('/original/ld/path')
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
describe('dispose', () => {
|
|
||||||
it('should dispose a model successfully on Mac', async () => {
|
|
||||||
Object.defineProperty(process, 'platform', {
|
|
||||||
value: 'darwin',
|
|
||||||
})
|
|
||||||
|
|
||||||
// Call the dispose function
|
|
||||||
const result = index.dispose()
|
|
||||||
|
|
||||||
// Assert that the result is as expected
|
|
||||||
expect(result).toBeUndefined()
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should kill the subprocess successfully on Windows', async () => {
|
|
||||||
Object.defineProperty(process, 'platform', {
|
|
||||||
value: 'win32',
|
|
||||||
})
|
|
||||||
|
|
||||||
// Call the dispose function
|
|
||||||
const result = index.dispose()
|
|
||||||
|
|
||||||
// Assert that the result is as expected
|
|
||||||
expect(result).toBeUndefined()
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@ -1,103 +0,0 @@
|
|||||||
import path from 'path'
|
|
||||||
import { appResourcePath, getJanDataFolderPath, log } from '@janhq/core/node'
|
|
||||||
import { ProcessWatchdog } from './watchdog'
|
|
||||||
|
|
||||||
let watchdog: ProcessWatchdog | undefined = undefined
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Spawns a Nitro subprocess.
|
|
||||||
* @returns A promise that resolves when the Nitro subprocess is started.
|
|
||||||
*/
|
|
||||||
function run(): Promise<any> {
|
|
||||||
log(`[CORTEX]:: Spawning cortex subprocess...`)
|
|
||||||
|
|
||||||
return new Promise<void>(async (resolve, reject) => {
|
|
||||||
// let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
|
|
||||||
let binaryName = `cortex-server${
|
|
||||||
process.platform === 'win32' ? '.exe' : ''
|
|
||||||
}`
|
|
||||||
const binPath = path.join(__dirname, '..', 'bin')
|
|
||||||
|
|
||||||
const executablePath = path.join(binPath, binaryName)
|
|
||||||
|
|
||||||
addEnvPaths(binPath)
|
|
||||||
|
|
||||||
const sharedPath = path.join(appResourcePath(), 'shared')
|
|
||||||
// Execute the binary
|
|
||||||
log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`)
|
|
||||||
|
|
||||||
const dataFolderPath = getJanDataFolderPath()
|
|
||||||
if (watchdog) {
|
|
||||||
watchdog.terminate()
|
|
||||||
}
|
|
||||||
|
|
||||||
// The HOST address to use for the cortex subprocess
|
|
||||||
const LOCAL_PORT = CORTEX_API_URL.split(':').pop() ?? '39291'
|
|
||||||
|
|
||||||
watchdog = new ProcessWatchdog(
|
|
||||||
executablePath,
|
|
||||||
[
|
|
||||||
'--start-server',
|
|
||||||
'--port',
|
|
||||||
LOCAL_PORT.toString(),
|
|
||||||
'--config_file_path',
|
|
||||||
`${path.join(dataFolderPath, '.janrc')}`,
|
|
||||||
'--data_folder_path',
|
|
||||||
dataFolderPath,
|
|
||||||
'config',
|
|
||||||
'--api_keys',
|
|
||||||
process.env.appToken ?? 'cortex.cpp',
|
|
||||||
],
|
|
||||||
{
|
|
||||||
env: {
|
|
||||||
...process.env,
|
|
||||||
// CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
|
|
||||||
// // Vulkan - Support 1 device at a time for now
|
|
||||||
// ...(gpuVisibleDevices?.length > 0 && {
|
|
||||||
// GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
|
|
||||||
// }),
|
|
||||||
},
|
|
||||||
cwd: sharedPath,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
watchdog.start()
|
|
||||||
resolve()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Every module should have a dispose function
|
|
||||||
* This will be called when the extension is unloaded and should clean up any resources
|
|
||||||
* Also called when app is closed
|
|
||||||
*/
|
|
||||||
function dispose() {
|
|
||||||
watchdog?.terminate()
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the environment paths for the cortex subprocess
|
|
||||||
* @param dest
|
|
||||||
*/
|
|
||||||
function addEnvPaths(dest: string) {
|
|
||||||
// Add engine path to the PATH and LD_LIBRARY_PATH
|
|
||||||
if (process.platform === 'win32') {
|
|
||||||
process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
|
|
||||||
} else {
|
|
||||||
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
|
|
||||||
path.delimiter,
|
|
||||||
dest
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Cortex process info
|
|
||||||
*/
|
|
||||||
export interface CortexProcessInfo {
|
|
||||||
isRunning: boolean
|
|
||||||
}
|
|
||||||
|
|
||||||
export default {
|
|
||||||
run,
|
|
||||||
dispose,
|
|
||||||
}
|
|
||||||
@ -1,84 +0,0 @@
|
|||||||
import { log } from '@janhq/core/node'
|
|
||||||
import { spawn, ChildProcess } from 'child_process'
|
|
||||||
import { EventEmitter } from 'events'
|
|
||||||
|
|
||||||
interface WatchdogOptions {
|
|
||||||
cwd?: string
|
|
||||||
restartDelay?: number
|
|
||||||
maxRestarts?: number
|
|
||||||
env?: NodeJS.ProcessEnv
|
|
||||||
}
|
|
||||||
|
|
||||||
export class ProcessWatchdog extends EventEmitter {
|
|
||||||
private command: string
|
|
||||||
private args: string[]
|
|
||||||
private options: WatchdogOptions
|
|
||||||
private process: ChildProcess | null
|
|
||||||
private restartDelay: number
|
|
||||||
private maxRestarts: number
|
|
||||||
private restartCount: number
|
|
||||||
private isTerminating: boolean
|
|
||||||
|
|
||||||
constructor(command: string, args: string[], options: WatchdogOptions = {}) {
|
|
||||||
super()
|
|
||||||
this.command = command
|
|
||||||
this.args = args
|
|
||||||
this.options = options
|
|
||||||
this.process = null
|
|
||||||
this.restartDelay = options.restartDelay || 5000
|
|
||||||
this.maxRestarts = options.maxRestarts || 5
|
|
||||||
this.restartCount = 0
|
|
||||||
this.isTerminating = false
|
|
||||||
}
|
|
||||||
|
|
||||||
start(): void {
|
|
||||||
this.spawnProcess()
|
|
||||||
}
|
|
||||||
|
|
||||||
private spawnProcess(): void {
|
|
||||||
if (this.isTerminating) return
|
|
||||||
|
|
||||||
log(`Starting process: ${this.command} ${this.args.join(' ')}`)
|
|
||||||
this.process = spawn(this.command, this.args, this.options)
|
|
||||||
|
|
||||||
this.process.stdout?.on('data', (data: Buffer) => {
|
|
||||||
log(`Process output: ${data}`)
|
|
||||||
this.emit('output', data.toString())
|
|
||||||
})
|
|
||||||
|
|
||||||
this.process.stderr?.on('data', (data: Buffer) => {
|
|
||||||
log(`Process error: ${data}`)
|
|
||||||
this.emit('error', data.toString())
|
|
||||||
})
|
|
||||||
|
|
||||||
this.process.on('close', (code: number | null) => {
|
|
||||||
log(`Process exited with code ${code}`)
|
|
||||||
this.emit('close', code)
|
|
||||||
if (!this.isTerminating) {
|
|
||||||
this.restartProcess()
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
private restartProcess(): void {
|
|
||||||
if (this.restartCount < this.maxRestarts) {
|
|
||||||
this.restartCount++
|
|
||||||
log(
|
|
||||||
`Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})`
|
|
||||||
)
|
|
||||||
setTimeout(() => this.spawnProcess(), this.restartDelay)
|
|
||||||
} else {
|
|
||||||
log('Max restart attempts reached. Exiting watchdog.')
|
|
||||||
this.emit('maxRestartsReached')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
terminate(): void {
|
|
||||||
this.isTerminating = true
|
|
||||||
if (this.process) {
|
|
||||||
log('Terminating watched process...')
|
|
||||||
this.process.kill()
|
|
||||||
}
|
|
||||||
this.emit('terminated')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,15 +0,0 @@
|
|||||||
{
|
|
||||||
"compilerOptions": {
|
|
||||||
"moduleResolution": "node",
|
|
||||||
"target": "es2016",
|
|
||||||
"module": "esnext",
|
|
||||||
"strict": true,
|
|
||||||
"sourceMap": true,
|
|
||||||
"esModuleInterop": true,
|
|
||||||
"outDir": "dist",
|
|
||||||
"importHelpers": true,
|
|
||||||
"typeRoots": ["node_modules/@types"]
|
|
||||||
},
|
|
||||||
"include": ["src"],
|
|
||||||
"exclude": ["src/**/*.test.ts"]
|
|
||||||
}
|
|
||||||
@ -21,6 +21,7 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@janhq/core": "../../core/package.tgz",
|
"@janhq/core": "../../core/package.tgz",
|
||||||
|
"@tauri-apps/api": "^1.4.0",
|
||||||
"fetch-retry": "^5.0.6",
|
"fetch-retry": "^5.0.6",
|
||||||
"ulidx": "^2.3.0"
|
"ulidx": "^2.3.0"
|
||||||
},
|
},
|
||||||
|
|||||||
@ -6,10 +6,55 @@
|
|||||||
* @module llamacpp-extension/src/index
|
* @module llamacpp-extension/src/index
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { RemoteOAIEngine, getJanDataFolderPath, fs, ModelCapability, Model } from '@janhq/core'
|
import {
|
||||||
|
AIEngine,
|
||||||
|
localProvider,
|
||||||
|
getJanDataFolderPath,
|
||||||
|
fs,
|
||||||
|
Model,
|
||||||
|
} from '@janhq/core'
|
||||||
|
|
||||||
export enum Settings {
|
import { invoke } from '@tauri-apps/api/tauri'
|
||||||
port = 'port',
|
import {
|
||||||
|
LocalProvider,
|
||||||
|
ModelInfo,
|
||||||
|
ListOptions,
|
||||||
|
ListResult,
|
||||||
|
PullOptions,
|
||||||
|
PullResult,
|
||||||
|
LoadOptions,
|
||||||
|
SessionInfo,
|
||||||
|
UnloadOptions,
|
||||||
|
UnloadResult,
|
||||||
|
ChatOptions,
|
||||||
|
ChatCompletion,
|
||||||
|
ChatCompletionChunk,
|
||||||
|
DeleteOptions,
|
||||||
|
DeleteResult,
|
||||||
|
ImportOptions,
|
||||||
|
ImportResult,
|
||||||
|
AbortPullOptions,
|
||||||
|
AbortPullResult,
|
||||||
|
ChatCompletionRequest,
|
||||||
|
} from './types'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to convert GGUF model filename to a more structured ID/name
|
||||||
|
* Example: "mistral-7b-instruct-v0.2.Q4_K_M.gguf" -> { baseModelId: "mistral-7b-instruct-v0.2", quant: "Q4_K_M" }
|
||||||
|
**/
|
||||||
|
function parseGGUFFileName(filename: string): {
|
||||||
|
baseModelId: string
|
||||||
|
quant?: string
|
||||||
|
} {
|
||||||
|
const nameWithoutExt = filename.replace(/\.gguf$/i, '')
|
||||||
|
// Try to split by common quantization patterns (e.g., .Q4_K_M, -IQ2_XS)
|
||||||
|
const match = nameWithoutExt.match(
|
||||||
|
/^(.*?)[-_]([QqIiFf]\w{1,3}_\w{1,3}|[Qq]\d+_[KkSsMmXxLl\d]+|[IiQq]\d+_[XxSsMm]+|[Qq]\d+)$/
|
||||||
|
)
|
||||||
|
if (match && match[1] && match[2]) {
|
||||||
|
return { baseModelId: match[1], quant: match[2] }
|
||||||
|
}
|
||||||
|
return { baseModelId: nameWithoutExt }
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -17,99 +62,246 @@ export enum Settings {
|
|||||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||||
*/
|
*/
|
||||||
export default class LlamacppProvider extends RemoteOAIEngine {
|
export default class inference_llamacpp_extension
|
||||||
inferenceUrl: string = ''
|
extends AIEngine
|
||||||
baseURL: string = ''
|
implements localProvider
|
||||||
provider: string = ENGINE
|
{
|
||||||
|
provider: string = 'llamacpp'
|
||||||
|
readonly providerId: string = 'llamcpp'
|
||||||
|
|
||||||
|
private activeSessions: Map<string, SessionInfo> = new Map()
|
||||||
|
|
||||||
|
private modelsBasePath!: string
|
||||||
|
|
||||||
override async onLoad(): Promise<void> {
|
override async onLoad(): Promise<void> {
|
||||||
super.onLoad()
|
super.onLoad() // Calls registerEngine() from AIEngine
|
||||||
|
this.registerSettings(SETTINGS_DEFINITIONS)
|
||||||
|
|
||||||
// Register Settings
|
const customPath = await this.getSetting<string>(
|
||||||
this.registerSettings(SETTINGS)
|
LlamaCppSettings.ModelsPath,
|
||||||
|
''
|
||||||
|
)
|
||||||
|
if (customPath && (await fs.exists(customPath))) {
|
||||||
|
this.modelsBasePath = customPath
|
||||||
|
} else {
|
||||||
|
this.modelsBasePath = await path.join(
|
||||||
|
await getJanDataFolderPath(),
|
||||||
|
'models',
|
||||||
|
ENGINE_ID
|
||||||
|
)
|
||||||
|
}
|
||||||
|
await fs.createDirAll(this.modelsBasePath)
|
||||||
|
|
||||||
// register models
|
console.log(
|
||||||
const models = await this.listModels()
|
`${this.providerId} provider loaded. Models path: ${this.modelsBasePath}`
|
||||||
this.registerModels(models)
|
)
|
||||||
|
|
||||||
// NOTE: port 0 may mean request free port from OS. we may want
|
// Optionally, list and register models with the core system if AIEngine expects it
|
||||||
// to take advantage of this. llama-server --port 0 on macOS works.
|
// const models = await this.listModels({ providerId: this.providerId });
|
||||||
const port = await this.getSetting<number>(Settings.port, 0)
|
// this.registerModels(this.mapModelInfoToCoreModel(models)); // mapModelInfoToCoreModel would be a helper
|
||||||
this.updateBaseUrl(port)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// onSettingUpdate<T>(key: string, value: T): void {
|
async getModelsPath(): Promise<string> {
|
||||||
// if (key === Settings.apiKey) {
|
// Ensure modelsBasePath is initialized
|
||||||
// this.apiKey = value as string
|
if (!this.modelsBasePath) {
|
||||||
// } else if (key === Settings.baseUrl) {
|
const customPath = await this.getSetting<string>(
|
||||||
// if (typeof value !== 'string') return
|
LlamaCppSettings.ModelsPath,
|
||||||
// this.updateBaseUrl(value)
|
''
|
||||||
// }
|
)
|
||||||
// }
|
if (customPath && (await fs.exists(customPath))) {
|
||||||
|
this.modelsBasePath = customPath
|
||||||
updateBaseUrl(value: number): void {
|
} else {
|
||||||
if (value == 0) {
|
this.modelsBasePath = await path.join(
|
||||||
// set to default value
|
await getJanDataFolderPath(),
|
||||||
SETTINGS.forEach((setting) => {
|
'models',
|
||||||
if (setting.key === Settings.port) {
|
ENGINE_ID
|
||||||
value = setting.controllerProps.value as number
|
)
|
||||||
}
|
}
|
||||||
|
await fs.createDirAll(this.modelsBasePath)
|
||||||
|
}
|
||||||
|
return this.modelsBasePath
|
||||||
|
}
|
||||||
|
|
||||||
|
async listModels(_opts: ListOptions): Promise<ListResult> {
|
||||||
|
const modelsDir = await this.getModelsPath()
|
||||||
|
const result: ModelInfo[] = []
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!(await fs.exists(modelsDir))) {
|
||||||
|
await fs.createDirAll(modelsDir)
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
|
||||||
|
const entries = await fs.readDir(modelsDir)
|
||||||
|
for (const entry of entries) {
|
||||||
|
if (entry.name?.endsWith('.gguf') && entry.isFile) {
|
||||||
|
const modelPath = await path.join(modelsDir, entry.name)
|
||||||
|
const stats = await fs.stat(modelPath) // Tauri's fs.stat or Node's fs.statSync
|
||||||
|
const parsedName = parseGGUFFileName(entry.name)
|
||||||
|
|
||||||
|
result.push({
|
||||||
|
id: `${parsedName.baseModelId}${parsedName.quant ? `/${parsedName.quant}` : ''}`, // e.g., "mistral-7b/Q4_0"
|
||||||
|
name: entry.name.replace('.gguf', ''), // Or a more human-friendly name
|
||||||
|
quant_type: parsedName.quant,
|
||||||
|
providerId: this.providerId,
|
||||||
|
sizeBytes: stats.size,
|
||||||
|
path: modelPath,
|
||||||
|
tags: [this.providerId, parsedName.quant || 'unknown_quant'].filter(
|
||||||
|
Boolean
|
||||||
|
) as string[],
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
this.baseURL = `http://127.0.0.1:${value}`
|
}
|
||||||
this.inferenceUrl = `${this.baseURL}/chat/completions`
|
} catch (error) {
|
||||||
|
console.error(`[${this.providerId}] Error listing models:`, error)
|
||||||
|
// Depending on desired behavior, either throw or return empty/partial list
|
||||||
|
}
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
async listModels(): Promise<Model[]> {
|
// pullModel
|
||||||
let modelIds = []
|
async pullModel(opts: PullOptions): Promise<PullResult> {
|
||||||
|
// TODO: Implement pullModel
|
||||||
const modelsFolder = `${await getJanDataFolderPath()}/models`
|
return 0;
|
||||||
|
|
||||||
// cortexso models
|
|
||||||
const cortexsoFolder = `${modelsFolder}/cortex.so`
|
|
||||||
const modelDirs = await fs.readdirSync(cortexsoFolder)
|
|
||||||
for (const modelDir of modelDirs) {
|
|
||||||
const modelName = modelDir.split('/').pop()
|
|
||||||
|
|
||||||
// TODO: try removing this check
|
|
||||||
// skip files start with . e.g. .DS_store
|
|
||||||
if (!modelName || modelName.startsWith('.')) continue
|
|
||||||
|
|
||||||
const variantDirs = await fs.readdirSync(modelDir)
|
|
||||||
for (const variantDir of variantDirs) {
|
|
||||||
// NOTE: we can't detect unfinished download here
|
|
||||||
const ggufPath = `${variantDir}/model.gguf`
|
|
||||||
|
|
||||||
if (await fs.existsSync(ggufPath)) {
|
|
||||||
const variantName = variantDir.split('/').pop()
|
|
||||||
modelIds.push(`${modelName}/${variantName}`)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: list models under huggingface.co
|
// abortPull
|
||||||
|
async abortPull(opts: AbortPullOptions): Promise<AbortPullResult> {
|
||||||
|
// TODO: implement abortPull
|
||||||
|
}
|
||||||
|
|
||||||
const models = modelIds.map((modelId) => {
|
async loadModel(opts: LoadOptions): Promise<SessionInfo> {
|
||||||
|
if (opts.providerId !== this.providerId) {
|
||||||
|
throw new Error('Invalid providerId for LlamaCppProvider.loadModel')
|
||||||
|
}
|
||||||
|
|
||||||
|
const sessionId = uuidv4()
|
||||||
|
const loadParams = {
|
||||||
|
model_path: opts.modelPath,
|
||||||
|
session_id: sessionId, // Pass sessionId to Rust for tracking
|
||||||
|
// Default llama.cpp server options, can be overridden by opts.options
|
||||||
|
port: opts.options?.port ?? 0, // 0 for dynamic port assignment by OS
|
||||||
|
n_gpu_layers:
|
||||||
|
opts.options?.n_gpu_layers ??
|
||||||
|
(await this.getSetting(LlamaCppSettings.DefaultNGpuLayers, -1)),
|
||||||
|
n_ctx:
|
||||||
|
opts.options?.n_ctx ??
|
||||||
|
(await this.getSetting(LlamaCppSettings.DefaultNContext, 2048)),
|
||||||
|
// Spread any other options from opts.options
|
||||||
|
...(opts.options || {}),
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
console.log(
|
||||||
|
`[${this.providerId}] Requesting to load model: ${opts.modelPath} with options:`,
|
||||||
|
loadParams
|
||||||
|
)
|
||||||
|
// This matches the Rust handler: core::utils::extensions::inference_llamacpp_extension::server::load
|
||||||
|
const rustResponse: {
|
||||||
|
session_id: string
|
||||||
|
port: number
|
||||||
|
model_path: string
|
||||||
|
settings: Record<string, unknown>
|
||||||
|
} = await invoke('plugin:llamacpp|load', { params: loadParams }) // Adjust namespace if needed
|
||||||
|
|
||||||
|
if (!rustResponse || !rustResponse.port) {
|
||||||
|
throw new Error(
|
||||||
|
'Rust load function did not return expected port or session info.'
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const sessionInfo: SessionInfo = {
|
||||||
|
sessionId: rustResponse.session_id, // Use sessionId from Rust if it regenerates/confirms it
|
||||||
|
port: rustResponse.port,
|
||||||
|
modelPath: rustResponse.model_path,
|
||||||
|
providerId: this.providerId,
|
||||||
|
settings: rustResponse.settings, // Settings actually used by the server
|
||||||
|
}
|
||||||
|
|
||||||
|
this.activeSessions.set(sessionInfo.sessionId, sessionInfo)
|
||||||
|
console.log(
|
||||||
|
`[${this.providerId}] Model loaded: ${sessionInfo.modelPath} on port ${sessionInfo.port}, session: ${sessionInfo.sessionId}`
|
||||||
|
)
|
||||||
|
return sessionInfo
|
||||||
|
} catch (error) {
|
||||||
|
console.error(
|
||||||
|
`[${this.providerId}] Error loading model ${opts.modelPath}:`,
|
||||||
|
error
|
||||||
|
)
|
||||||
|
throw error // Re-throw to be handled by the caller
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async unloadModel(opts: UnloadOptions): Promise<UnloadResult> {
|
||||||
|
if (opts.providerId !== this.providerId) {
|
||||||
|
return { success: false, error: 'Invalid providerId' }
|
||||||
|
}
|
||||||
|
const session = this.activeSessions.get(opts.sessionId)
|
||||||
|
if (!session) {
|
||||||
return {
|
return {
|
||||||
sources: [],
|
success: false,
|
||||||
object: 'model',
|
error: `No active session found for id: ${opts.sessionId}`,
|
||||||
version: '1.0',
|
|
||||||
format: 'api',
|
|
||||||
id: modelId,
|
|
||||||
name: modelId,
|
|
||||||
created: 0,
|
|
||||||
description: '',
|
|
||||||
settings: {},
|
|
||||||
parameters: {},
|
|
||||||
metadata: {
|
|
||||||
author: '',
|
|
||||||
tags: [],
|
|
||||||
size: 0,
|
|
||||||
},
|
|
||||||
engine: this.provider,
|
|
||||||
capabilities: [ModelCapability.completion],
|
|
||||||
}
|
}
|
||||||
})
|
}
|
||||||
return models
|
|
||||||
|
try {
|
||||||
|
console.log(
|
||||||
|
`[${this.providerId}] Requesting to unload model for session: ${opts.sessionId}`
|
||||||
|
)
|
||||||
|
// Matches: core::utils::extensions::inference_llamacpp_extension::server::unload
|
||||||
|
const rustResponse: { success: boolean; error?: string } = await invoke(
|
||||||
|
'plugin:llamacpp|unload',
|
||||||
|
{ sessionId: opts.sessionId }
|
||||||
|
)
|
||||||
|
|
||||||
|
if (rustResponse.success) {
|
||||||
|
this.activeSessions.delete(opts.sessionId)
|
||||||
|
console.log(
|
||||||
|
`[${this.providerId}] Session ${opts.sessionId} unloaded successfully.`
|
||||||
|
)
|
||||||
|
return { success: true }
|
||||||
|
} else {
|
||||||
|
console.error(
|
||||||
|
`[${this.providerId}] Failed to unload session ${opts.sessionId}: ${rustResponse.error}`
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: rustResponse.error || 'Unknown error during unload',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(
|
||||||
|
`[${this.providerId}] Error invoking unload for session ${opts.sessionId}:`,
|
||||||
|
error
|
||||||
|
)
|
||||||
|
return { success: false, error: error.message || String(error) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async chat(
|
||||||
|
opts: ChatOptions
|
||||||
|
): Promise<ChatCompletion | AsyncIterable<ChatCompletionChunk>> {}
|
||||||
|
|
||||||
|
async deleteModel(opts: DeleteOptions): Promise<DeleteResult> {}
|
||||||
|
|
||||||
|
async importModel(opts: ImportOptions): Promise<ImportResult> {}
|
||||||
|
|
||||||
|
override async loadModel(model: Model): Promise<any> {
|
||||||
|
if (model.engine?.toString() !== this.provider) return Promise.resolve()
|
||||||
|
console.log(
|
||||||
|
`[${this.providerId} AIEngine] Received OnModelInit for:`,
|
||||||
|
model.id
|
||||||
|
)
|
||||||
|
return super.loadModel(model)
|
||||||
|
}
|
||||||
|
|
||||||
|
override async unloadModel(model?: Model): Promise<any> {
|
||||||
|
if (model?.engine && model.engine.toString() !== this.provider)
|
||||||
|
return Promise.resolve()
|
||||||
|
console.log(
|
||||||
|
`[${this.providerId} AIEngine] Received OnModelStop for:`,
|
||||||
|
model?.id || 'all models'
|
||||||
|
)
|
||||||
|
return super.unloadModel(model)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
199
extensions/llamacpp-extension/src/types.ts
Normal file
199
extensions/llamacpp-extension/src/types.ts
Normal file
@ -0,0 +1,199 @@
|
|||||||
|
// src/providers/local/types.ts
|
||||||
|
|
||||||
|
// --- Re-using OpenAI types (minimal definitions for this example) ---
|
||||||
|
// In a real project, you'd import these from 'openai' or a shared types package.
|
||||||
|
export interface ChatCompletionRequestMessage {
|
||||||
|
role: 'system' | 'user' | 'assistant' | 'tool';
|
||||||
|
content: string | null;
|
||||||
|
name?: string;
|
||||||
|
tool_calls?: any[]; // Simplified
|
||||||
|
tool_call_id?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ChatCompletionRequest {
|
||||||
|
model: string; // Model ID, though for local it might be implicit via sessionId
|
||||||
|
messages: ChatCompletionRequestMessage[];
|
||||||
|
temperature?: number | null;
|
||||||
|
top_p?: number | null;
|
||||||
|
n?: number | null;
|
||||||
|
stream?: boolean | null;
|
||||||
|
stop?: string | string[] | null;
|
||||||
|
max_tokens?: number;
|
||||||
|
presence_penalty?: number | null;
|
||||||
|
frequency_penalty?: number | null;
|
||||||
|
logit_bias?: Record<string, number> | null;
|
||||||
|
user?: string;
|
||||||
|
// ... TODO: other OpenAI params
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ChatCompletionChunkChoiceDelta {
|
||||||
|
content?: string | null;
|
||||||
|
role?: 'system' | 'user' | 'assistant' | 'tool';
|
||||||
|
tool_calls?: any[]; // Simplified
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ChatCompletionChunkChoice {
|
||||||
|
index: number;
|
||||||
|
delta: ChatCompletionChunkChoiceDelta;
|
||||||
|
finish_reason?: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call' | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ChatCompletionChunk {
|
||||||
|
id: string;
|
||||||
|
object: 'chat.completion.chunk';
|
||||||
|
created: number;
|
||||||
|
model: string;
|
||||||
|
choices: ChatCompletionChunkChoice[];
|
||||||
|
system_fingerprint?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export interface ChatCompletionChoice {
|
||||||
|
index: number;
|
||||||
|
message: ChatCompletionRequestMessage; // Response message
|
||||||
|
finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call';
|
||||||
|
logprobs?: any; // Simplified
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ChatCompletion {
|
||||||
|
id: string;
|
||||||
|
object: 'chat.completion';
|
||||||
|
created: number;
|
||||||
|
model: string; // Model ID used
|
||||||
|
choices: ChatCompletionChoice[];
|
||||||
|
usage?: {
|
||||||
|
prompt_tokens: number;
|
||||||
|
completion_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
};
|
||||||
|
system_fingerprint?: string;
|
||||||
|
}
|
||||||
|
// --- End OpenAI types ---
|
||||||
|
|
||||||
|
|
||||||
|
// Shared model metadata
|
||||||
|
export interface ModelInfo {
|
||||||
|
id: string; // e.g. "qwen3-4B" or "org/model/quant"
|
||||||
|
name: string; // human‑readable, e.g., "Qwen3 4B Q4_0"
|
||||||
|
quant_type?: string; // q4_0 (optional as it might be part of ID or name)
|
||||||
|
providerId: string; // e.g. "llama.cpp"
|
||||||
|
sizeBytes: number;
|
||||||
|
tags?: string[];
|
||||||
|
path?: string; // Absolute path to the model file, if applicable
|
||||||
|
// Additional provider-specific metadata can be added here
|
||||||
|
[key: string]: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1. /list
|
||||||
|
export interface ListOptions {
|
||||||
|
providerId: string; // To specify which provider if a central manager calls this
|
||||||
|
}
|
||||||
|
export type ListResult = ModelInfo[];
|
||||||
|
|
||||||
|
// 2. /pull
|
||||||
|
export interface PullOptions {
|
||||||
|
providerId: string;
|
||||||
|
modelId: string; // Identifier for the model to pull (e.g., from a known registry)
|
||||||
|
downloadUrl: string; // URL to download the model from
|
||||||
|
/** optional callback to receive download progress */
|
||||||
|
onProgress?: (progress: { percent: number; downloadedBytes: number; totalBytes?: number; }) => void;
|
||||||
|
}
|
||||||
|
export interface PullResult {
|
||||||
|
success: boolean;
|
||||||
|
path?: string; // local file path to the pulled model
|
||||||
|
error?: string;
|
||||||
|
modelInfo?: ModelInfo; // Info of the pulled model
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. /load
|
||||||
|
export interface LoadOptions {
|
||||||
|
providerId: string;
|
||||||
|
modelPath: string;
|
||||||
|
/** any provider‑specific tuning options for llama.cpp server */
|
||||||
|
options?: {
|
||||||
|
port?: number; // 0 means dynamic port
|
||||||
|
n_gpu_layers?: number;
|
||||||
|
n_ctx?: number; // context size
|
||||||
|
// ... other llama-cpp-python or llama.cpp server flags
|
||||||
|
[key: string]: any;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SessionInfo {
|
||||||
|
sessionId: string; // opaque handle for unload/chat
|
||||||
|
port: number; // llama-server output port (corrected from portid)
|
||||||
|
modelPath: string; // path of the loaded model
|
||||||
|
providerId: string;
|
||||||
|
settings: Record<string, unknown>; // The actual settings used to load
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. /unload
|
||||||
|
export interface UnloadOptions {
|
||||||
|
providerId: string;
|
||||||
|
sessionId: string;
|
||||||
|
}
|
||||||
|
export interface UnloadResult {
|
||||||
|
success: boolean;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. /chat
|
||||||
|
export interface ChatOptions {
|
||||||
|
providerId: string;
|
||||||
|
sessionId: string;
|
||||||
|
/** Full OpenAI ChatCompletionRequest payload */
|
||||||
|
payload: ChatCompletionRequest;
|
||||||
|
}
|
||||||
|
// Output for /chat will be Promise<ChatCompletion> for non-streaming
|
||||||
|
// or Promise<AsyncIterable<ChatCompletionChunk>> for streaming
|
||||||
|
|
||||||
|
// 6. /delete
|
||||||
|
export interface DeleteOptions {
|
||||||
|
providerId: string;
|
||||||
|
modelId: string; // The ID of the model to delete (implies finding its path)
|
||||||
|
modelPath?: string; // Optionally, direct path can be provided
|
||||||
|
}
|
||||||
|
export interface DeleteResult {
|
||||||
|
success: boolean;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 7. /import
|
||||||
|
export interface ImportOptions {
|
||||||
|
providerId: string;
|
||||||
|
sourcePath: string; // Path to the local model file to import
|
||||||
|
desiredModelId?: string; // Optional: if user wants to name it specifically
|
||||||
|
}
|
||||||
|
export interface ImportResult {
|
||||||
|
success: boolean;
|
||||||
|
modelInfo?: ModelInfo;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 8. /abortPull
|
||||||
|
export interface AbortPullOptions {
|
||||||
|
providerId: string;
|
||||||
|
modelId: string; // The modelId whose download is to be aborted
|
||||||
|
}
|
||||||
|
export interface AbortPullResult {
|
||||||
|
success: boolean;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// The interface for any local provider
|
||||||
|
export interface LocalProvider {
|
||||||
|
readonly providerId: string;
|
||||||
|
|
||||||
|
listModels(opts: ListOptions): Promise<ListResult>;
|
||||||
|
pullModel(opts: PullOptions): Promise<PullResult>;
|
||||||
|
loadModel(opts: LoadOptions): Promise<SessionInfo>;
|
||||||
|
unloadModel(opts: UnloadOptions): Promise<UnloadResult>;
|
||||||
|
chat(opts: ChatOptions): Promise<ChatCompletion | AsyncIterable<ChatCompletionChunk>>;
|
||||||
|
deleteModel(opts: DeleteOptions): Promise<DeleteResult>;
|
||||||
|
importModel(opts: ImportOptions): Promise<ImportResult>;
|
||||||
|
abortPull(opts: AbortPullOptions): Promise<AbortPullResult>;
|
||||||
|
|
||||||
|
// Optional: for direct access to underlying client if needed for specific streaming cases
|
||||||
|
getChatClient?(sessionId: string): any; // e.g., an OpenAI client instance configured for the session
|
||||||
|
}
|
||||||
3700
extensions/yarn.lock
3700
extensions/yarn.lock
File diff suppressed because it is too large
Load Diff
@ -3,12 +3,9 @@ use std::{
|
|||||||
fs::{self, File},
|
fs::{self, File},
|
||||||
io::Read,
|
io::Read,
|
||||||
path::PathBuf,
|
path::PathBuf,
|
||||||
sync::Arc,
|
|
||||||
};
|
};
|
||||||
use tar::Archive;
|
use tar::Archive;
|
||||||
use tauri::{App, Emitter, Listener, Manager};
|
use tauri::{App, Emitter, Manager};
|
||||||
use tauri_plugin_shell::process::{CommandChild, CommandEvent};
|
|
||||||
use tauri_plugin_shell::ShellExt;
|
|
||||||
use tauri_plugin_store::StoreExt;
|
use tauri_plugin_store::StoreExt;
|
||||||
use tokio::sync::Mutex;
|
use tokio::sync::Mutex;
|
||||||
use tokio::time::{sleep, Duration}; // Using tokio::sync::Mutex
|
use tokio::time::{sleep, Duration}; // Using tokio::sync::Mutex
|
||||||
@ -200,22 +197,18 @@ pub fn setup_mcp(app: &App) {
|
|||||||
let state = app.state::<AppState>();
|
let state = app.state::<AppState>();
|
||||||
let servers = state.mcp_servers.clone();
|
let servers = state.mcp_servers.clone();
|
||||||
let app_handle: tauri::AppHandle = app.handle().clone();
|
let app_handle: tauri::AppHandle = app.handle().clone();
|
||||||
|
|
||||||
// Setup kill-mcp-servers event listener (similar to cortex kill-sidecar)
|
// Setup kill-mcp-servers event listener (similar to cortex kill-sidecar)
|
||||||
let app_handle_for_kill = app_handle.clone();
|
let app_handle_for_kill = app_handle.clone();
|
||||||
app_handle.listen("kill-mcp-servers", move |_event| {
|
app_handle.listen("kill-mcp-servers", move |_event| {
|
||||||
let app_handle = app_handle_for_kill.clone();
|
let app_handle = app_handle_for_kill.clone();
|
||||||
tauri::async_runtime::spawn(async move {
|
tauri::async_runtime::spawn(async move {
|
||||||
log::info!("Received kill-mcp-servers event - cleaning up MCP servers");
|
log::info!("Received kill-mcp-servers event - cleaning up MCP servers");
|
||||||
|
|
||||||
let app_state = app_handle.state::<AppState>();
|
let app_state = app_handle.state::<AppState>();
|
||||||
|
|
||||||
// Stop all running MCP servers
|
// Stop all running MCP servers
|
||||||
if let Err(e) = super::mcp::stop_mcp_servers(app_state.mcp_servers.clone()).await {
|
if let Err(e) = super::mcp::stop_mcp_servers(app_state.mcp_servers.clone()).await {
|
||||||
log::error!("Failed to stop MCP servers: {}", e);
|
log::error!("Failed to stop MCP servers: {}", e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear active servers and restart counts
|
// Clear active servers and restart counts
|
||||||
{
|
{
|
||||||
let mut active_servers = app_state.mcp_active_servers.lock().await;
|
let mut active_servers = app_state.mcp_active_servers.lock().await;
|
||||||
@ -225,11 +218,9 @@ pub fn setup_mcp(app: &App) {
|
|||||||
let mut restart_counts = app_state.mcp_restart_counts.lock().await;
|
let mut restart_counts = app_state.mcp_restart_counts.lock().await;
|
||||||
restart_counts.clear();
|
restart_counts.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
log::info!("MCP servers cleaned up successfully");
|
log::info!("MCP servers cleaned up successfully");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
tauri::async_runtime::spawn(async move {
|
tauri::async_runtime::spawn(async move {
|
||||||
if let Err(e) = run_mcp_commands(&app_handle, servers).await {
|
if let Err(e) = run_mcp_commands(&app_handle, servers).await {
|
||||||
log::error!("Failed to run mcp commands: {}", e);
|
log::error!("Failed to run mcp commands: {}", e);
|
||||||
@ -471,65 +462,22 @@ pub fn setup_sidecar(app: &App) -> Result<(), String> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//pub fn setup_engine_binaries(app: &App) -> Result<(), String> {
|
||||||
|
// // Copy engine binaries to app_data
|
||||||
|
// let app_data_dir = app.handle().path().app_data_dir().unwrap();
|
||||||
|
// let binaries_dir = app.handle().path().resource_dir().unwrap().join("binaries");
|
||||||
|
// let themes_dir = app
|
||||||
|
// .handle()
|
||||||
|
// .path()
|
||||||
|
// .resource_dir()
|
||||||
|
// .unwrap()
|
||||||
|
// .join("resources");
|
||||||
//
|
//
|
||||||
// Clean up function to kill the sidecar process
|
// if let Err(e) = copy_dir_all(binaries_dir, app_data_dir.clone()) {
|
||||||
//
|
// log::error!("Failed to copy binaries: {}", e);
|
||||||
pub fn clean_up() {
|
// }
|
||||||
#[cfg(windows)]
|
// if let Err(e) = copy_dir_all(themes_dir, app_data_dir.clone()) {
|
||||||
{
|
// log::error!("Failed to copy themes: {}", e);
|
||||||
use std::os::windows::process::CommandExt;
|
// }
|
||||||
let _ = std::process::Command::new("taskkill")
|
// Ok(())
|
||||||
.args(["-f", "-im", "llama-server.exe"])
|
//}
|
||||||
.creation_flags(0x08000000)
|
|
||||||
.spawn();
|
|
||||||
let _ = std::process::Command::new("taskkill")
|
|
||||||
.args(["-f", "-im", "cortex-server.exe"])
|
|
||||||
.creation_flags(0x08000000)
|
|
||||||
.spawn();
|
|
||||||
}
|
|
||||||
#[cfg(unix)]
|
|
||||||
{
|
|
||||||
let _ = std::process::Command::new("pkill")
|
|
||||||
.args(["-f", "llama-server"])
|
|
||||||
.spawn();
|
|
||||||
let _ = std::process::Command::new("pkill")
|
|
||||||
.args(["-f", "cortex-server"])
|
|
||||||
.spawn();
|
|
||||||
}
|
|
||||||
log::info!("Clean up function executed, sidecar processes killed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
fn copy_dir_all(src: PathBuf, dst: PathBuf) -> Result<(), String> {
|
|
||||||
fs::create_dir_all(&dst).map_err(|e| e.to_string())?;
|
|
||||||
log::info!("Copying from {:?} to {:?}", src, dst);
|
|
||||||
for entry in fs::read_dir(src).map_err(|e| e.to_string())? {
|
|
||||||
let entry = entry.map_err(|e| e.to_string())?;
|
|
||||||
let ty = entry.file_type().map_err(|e| e.to_string())?;
|
|
||||||
if ty.is_dir() {
|
|
||||||
copy_dir_all(entry.path(), dst.join(entry.file_name())).map_err(|e| e.to_string())?;
|
|
||||||
} else {
|
|
||||||
fs::copy(entry.path(), dst.join(entry.file_name())).map_err(|e| e.to_string())?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn setup_engine_binaries(app: &App) -> Result<(), String> {
|
|
||||||
// Copy engine binaries to app_data
|
|
||||||
let app_data_dir = get_jan_data_folder_path(app.handle().clone());
|
|
||||||
let binaries_dir = app.handle().path().resource_dir().unwrap().join("binaries");
|
|
||||||
let resources_dir = app
|
|
||||||
.handle()
|
|
||||||
.path()
|
|
||||||
.resource_dir()
|
|
||||||
.unwrap()
|
|
||||||
.join("resources");
|
|
||||||
|
|
||||||
if let Err(e) = copy_dir_all(binaries_dir, app_data_dir.clone()) {
|
|
||||||
log::error!("Failed to copy binaries: {}", e);
|
|
||||||
}
|
|
||||||
if let Err(e) = copy_dir_all(resources_dir, app_data_dir.clone()) {
|
|
||||||
log::error!("Failed to copy resources: {}", e);
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|||||||
@ -10,8 +10,8 @@ use crate::core::state::AppState;
|
|||||||
pub enum ServerError {
|
pub enum ServerError {
|
||||||
#[error("Server is already running")]
|
#[error("Server is already running")]
|
||||||
AlreadyRunning,
|
AlreadyRunning,
|
||||||
#[error("Server is not running")]
|
// #[error("Server is not running")]
|
||||||
NotRunning,
|
// NotRunning,
|
||||||
#[error("Failed to locate server binary: {0}")]
|
#[error("Failed to locate server binary: {0}")]
|
||||||
BinaryNotFound(String),
|
BinaryNotFound(String),
|
||||||
#[error("Failed to determine resource path: {0}")]
|
#[error("Failed to determine resource path: {0}")]
|
||||||
|
|||||||
@ -1,16 +1,14 @@
|
|||||||
mod core;
|
mod core;
|
||||||
use core::{
|
use core::{
|
||||||
cmd::get_jan_data_folder_path,
|
cmd::get_jan_data_folder_path,
|
||||||
setup::{self, setup_engine_binaries, setup_mcp, setup_sidecar},
|
setup::{self, setup_mcp},
|
||||||
state::{generate_app_token, AppState},
|
state::{generate_app_token, AppState},
|
||||||
utils::download::DownloadManagerState,
|
utils::download::DownloadManagerState,
|
||||||
};
|
};
|
||||||
use std::{collections::HashMap, sync::Arc};
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
use tauri::Emitter;
|
|
||||||
use tokio::sync::Mutex;
|
use tokio::sync::Mutex;
|
||||||
|
|
||||||
use crate::core::setup::clean_up;
|
|
||||||
|
|
||||||
#[cfg_attr(mobile, tauri::mobile_entry_point)]
|
#[cfg_attr(mobile, tauri::mobile_entry_point)]
|
||||||
pub fn run() {
|
pub fn run() {
|
||||||
@ -122,17 +120,17 @@ pub fn run() {
|
|||||||
log::error!("Failed to install extensions: {}", e);
|
log::error!("Failed to install extensions: {}", e);
|
||||||
}
|
}
|
||||||
setup_mcp(app);
|
setup_mcp(app);
|
||||||
setup_sidecar(app).expect("Failed to setup sidecar");
|
|
||||||
setup_engine_binaries(app).expect("Failed to setup engine binaries");
|
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
.on_window_event(|window, event| match event {
|
.on_window_event(|window, event| match event {
|
||||||
tauri::WindowEvent::CloseRequested { .. } => {
|
tauri::WindowEvent::CloseRequested { .. } => {
|
||||||
if window.label() == "main" {
|
if window.label() == "main" {
|
||||||
window.emit("kill-sidecar", ()).unwrap();
|
|
||||||
window.emit("kill-mcp-servers", ()).unwrap();
|
window.emit("kill-mcp-servers", ()).unwrap();
|
||||||
clean_up();
|
clean_up();
|
||||||
}
|
}
|
||||||
|
let client = Client::new();
|
||||||
|
let url = "http://127.0.0.1:39291/processManager/destroy";
|
||||||
|
let _ = client.delete(url).send();
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
})
|
})
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user