Resolved conflicts by keeping HEAD changes
This commit is contained in:
parent
19274f7e69
commit
a8abc9f9aa
@ -1,2 +0,0 @@
|
||||
bin
|
||||
!version.txt
|
||||
@ -1,75 +0,0 @@
|
||||
# Create a Jan Extension using Typescript
|
||||
|
||||
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
|
||||
|
||||
## Create Your Own Extension
|
||||
|
||||
To create your own extension, you can use this repository as a template! Just follow the below instructions:
|
||||
|
||||
1. Click the Use this template button at the top of the repository
|
||||
2. Select Create a new repository
|
||||
3. Select an owner and name for your new repository
|
||||
4. Click Create repository
|
||||
5. Clone your new repository
|
||||
|
||||
## Initial Setup
|
||||
|
||||
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
|
||||
|
||||
> [!NOTE]
|
||||
>
|
||||
> You'll need to have a reasonably modern version of
|
||||
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
|
||||
> [`nodenv`](https://github.com/nodenv/nodenv) or
|
||||
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
|
||||
> root of your repository to install the version specified in
|
||||
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
|
||||
|
||||
1. :hammer_and_wrench: Install the dependencies
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
1. :building_construction: Package the TypeScript for distribution
|
||||
|
||||
```bash
|
||||
npm run bundle
|
||||
```
|
||||
|
||||
1. :white_check_mark: Check your artifact
|
||||
|
||||
There will be a tgz file in your extension directory now
|
||||
|
||||
## Update the Extension Metadata
|
||||
|
||||
The [`package.json`](package.json) file defines metadata about your extension, such as
|
||||
extension name, main entry, description and version.
|
||||
|
||||
When you copy this repository, update `package.json` with the name, description for your extension.
|
||||
|
||||
## Update the Extension Code
|
||||
|
||||
The [`src/`](./src/) directory is the heart of your extension! This contains the
|
||||
source code that will be run when your extension functions are invoked. You can replace the
|
||||
contents of this directory with your own code.
|
||||
|
||||
There are a few things to keep in mind when writing your extension code:
|
||||
|
||||
- Most Jan Extension functions are processed asynchronously.
|
||||
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
|
||||
|
||||
```typescript
|
||||
import { events, MessageEvent, MessageRequest } from '@janhq/core'
|
||||
|
||||
function onStart(): Promise<any> {
|
||||
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
|
||||
this.inference(data)
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
For more information about the Jan Extension Core module, see the
|
||||
[documentation](https://github.com/menloresearch/jan/blob/main/core/README.md).
|
||||
|
||||
So, what are you waiting for? Go ahead and start customizing your extension!
|
||||
@ -1 +0,0 @@
|
||||
1.0.13-rc9
|
||||
@ -1,40 +0,0 @@
|
||||
@echo off
|
||||
set BIN_PATH=./bin
|
||||
set SHARED_PATH=./../../electron/shared
|
||||
set /p CORTEX_VERSION=<./bin/version.txt
|
||||
set ENGINE_VERSION=b5509
|
||||
|
||||
@REM Download llama.cpp binaries
|
||||
set DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
||||
set DOWNLOAD_GGML_URL=https://github.com/ggml-org/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
||||
set CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%
|
||||
set SUBFOLDERS=win-noavx-cuda-cu12.0-x64 win-noavx-cuda-cu11.7-x64 win-avx2-cuda-cu12.0-x64 win-avx2-cuda-cu11.7-x64 win-noavx-x64 win-avx-x64 win-avx2-x64 win-avx512-x64 win-vulkan-x64
|
||||
|
||||
call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/menloresearch/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-cu12.0-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx2-cuda-cu12.0-x64/%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-cu11.7-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx2-cuda-cu11.7-x64/%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-cu12.0-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-noavx-cuda-cu12.0-x64/%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-cu11.7-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-noavx-cuda-cu11.7-x64/%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-noavx-x64/%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx-x64/%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx2-x64/%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx512-x64/%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %DOWNLOAD_GGML_URL%-vulkan-x64.zip -e --strip 1 -o %SHARED_PATH%/engines/llama.cpp/win-vulkan-x64/%ENGINE_VERSION%
|
||||
call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cudart-llama-bin-win-cu12.0-x64.tar.gz -e --strip 1 -o %BIN_PATH%
|
||||
call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cudart-llama-bin-win-cu11.7-x64.tar.gz -e --strip 1 -o %BIN_PATH%
|
||||
|
||||
move %BIN_PATH%\cortex-server-beta.exe %BIN_PATH%\cortex-server.exe
|
||||
del %BIN_PATH%\cortex-beta.exe
|
||||
del %BIN_PATH%\cortex.exe
|
||||
|
||||
@REM Loop through each folder and move DLLs
|
||||
for %%F in (%SUBFOLDERS%) do (
|
||||
echo Processing folder: %SHARED_PATH%\engines\llama.cpp\%%F\%ENGINE_VERSION%
|
||||
|
||||
@REM Move cu*.dll files
|
||||
for %%D in (%SHARED_PATH%\engines\llama.cpp\%%F\%ENGINE_VERSION%\cu*.dll) do (
|
||||
move "%%D" "%BIN_PATH%"
|
||||
)
|
||||
)
|
||||
|
||||
echo DLL files moved successfully.
|
||||
@ -1,50 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Read CORTEX_VERSION
|
||||
CORTEX_VERSION=$(cat ./bin/version.txt)
|
||||
ENGINE_VERSION=b5509
|
||||
CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
|
||||
ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
|
||||
CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}
|
||||
BIN_PATH=./bin
|
||||
SHARED_PATH="../../electron/shared"
|
||||
# Detect platform
|
||||
OS_TYPE=$(uname)
|
||||
|
||||
if [ "$OS_TYPE" == "Linux" ]; then
|
||||
# Linux downloads
|
||||
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
|
||||
mv ./bin/cortex-server-beta ./bin/cortex-server
|
||||
rm -rf ./bin/cortex
|
||||
rm -rf ./bin/cortex-beta
|
||||
chmod +x "./bin/cortex-server"
|
||||
|
||||
# Download engines for Linux
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-noavx-x64/${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx-x64/${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx2-x64/${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx512-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx512-x64/${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-cuda-cu12.0-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx2-cuda-cu12.0-x64/${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-cuda-cu11.7-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx2-cuda-cu11.7-x64/${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-cuda-cu12.0-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-noavx-cuda-cu12.0-x64/${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-cuda-cu11.7-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-noavx-cuda-cu11.7-x64/${ENGINE_VERSION}" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-vulkan-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-vulkan-x64/${ENGINE_VERSION}" 1
|
||||
download "${CUDA_DOWNLOAD_URL}/cudart-llama-bin-linux-cu12.0-x64.tar.gz" -e --strip 1 -o "${BIN_PATH}" 1
|
||||
download "${CUDA_DOWNLOAD_URL}/cudart-llama-bin-linux-cu11.7-x64.tar.gz" -e --strip 1 -o "${BIN_PATH}" 1
|
||||
|
||||
elif [ "$OS_TYPE" == "Darwin" ]; then
|
||||
# macOS downloads
|
||||
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1
|
||||
mv ./bin/cortex-server-beta ./bin/cortex-server
|
||||
rm -rf ./bin/cortex
|
||||
rm -rf ./bin/cortex-beta
|
||||
chmod +x "./bin/cortex-server"
|
||||
|
||||
# Download engines for macOS
|
||||
download "${ENGINE_DOWNLOAD_URL}-macos-arm64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/macos-arm64/${ENGINE_VERSION}"
|
||||
download "${ENGINE_DOWNLOAD_URL}-macos-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/macos-x64/${ENGINE_VERSION}"
|
||||
|
||||
else
|
||||
echo "Unsupported operating system: $OS_TYPE"
|
||||
exit 1
|
||||
fi
|
||||
@ -1,67 +0,0 @@
|
||||
{
|
||||
"name": "@janhq/inference-cortex-extension",
|
||||
"productName": "Cortex Inference Engine",
|
||||
"version": "1.0.25",
|
||||
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
"license": "AGPL-3.0",
|
||||
"scripts": {
|
||||
"test": "vitest run",
|
||||
"build": "rolldown -c rolldown.config.mjs",
|
||||
"downloadcortex:linux:darwin": "./download.sh",
|
||||
"downloadcortex:win32": "download.bat",
|
||||
"downloadcortex": "run-script-os",
|
||||
"build:publish:darwin": "rimraf *.tgz --glob || true && yarn build && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
"build:publish:win32:linux": "rimraf *.tgz --glob || true && yarn build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
"build:publish": "run-script-os"
|
||||
},
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
"./main": "./dist/node/index.cjs.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@types/decompress": "^4.2.7",
|
||||
"@types/jest": "^29.5.12",
|
||||
"@types/node": "^20.11.4",
|
||||
"@types/os-utils": "^0.0.4",
|
||||
"@types/tcp-port-used": "^1.0.4",
|
||||
"cpx": "^1.5.0",
|
||||
"download-cli": "^1.1.1",
|
||||
"jest": "^29.7.0",
|
||||
"rimraf": "^3.0.2",
|
||||
"rolldown": "1.0.0-beta.1",
|
||||
"run-script-os": "^1.1.6",
|
||||
"ts-jest": "^29.1.2",
|
||||
"typescript": "^5.3.3",
|
||||
"vitest": "^3.0.8"
|
||||
},
|
||||
"dependencies": {
|
||||
"@janhq/core": "../../core/package.tgz",
|
||||
"fetch-retry": "^5.0.6",
|
||||
"ky": "^1.7.2",
|
||||
"p-queue": "^8.0.1",
|
||||
"rxjs": "^7.8.1",
|
||||
"ulidx": "^2.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"files": [
|
||||
"dist/*",
|
||||
"package.json",
|
||||
"README.md"
|
||||
],
|
||||
"bundleDependencies": [
|
||||
"tcp-port-used",
|
||||
"fetch-retry",
|
||||
"@janhq/core",
|
||||
"decompress"
|
||||
],
|
||||
"installConfig": {
|
||||
"hoistingLimits": "workspaces"
|
||||
},
|
||||
"packageManager": "yarn@4.5.3"
|
||||
}
|
||||
@ -1,126 +0,0 @@
|
||||
[
|
||||
{
|
||||
"key": "auto_unload_models",
|
||||
"title": "Auto-Unload Old Models",
|
||||
"description": "Automatically unloads models that are not in use to free up memory. Ensure only one model is loaded at a time.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "context_shift",
|
||||
"title": "Context Shift",
|
||||
"description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "cont_batching",
|
||||
"title": "Continuous Batching",
|
||||
"description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "n_parallel",
|
||||
"title": "Parallel Operations",
|
||||
"description": "Number of prompts that can be processed simultaneously by the model.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": "",
|
||||
"placeholder": "1",
|
||||
"type": "number",
|
||||
"textAlign": "right"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "cpu_threads",
|
||||
"title": "CPU Threads",
|
||||
"description": "Number of CPU cores used for model processing when running without GPU.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": "",
|
||||
"placeholder": "-1 (auto-detect)",
|
||||
"type": "number",
|
||||
"textAlign": "right"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "threads_batch",
|
||||
"title": "Threads (Batch)",
|
||||
"description": "Number of threads for batch and prompt processing (default: same as Threads).",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": "",
|
||||
"placeholder": "-1 (same as Threads)",
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "flash_attn",
|
||||
"title": "Flash Attention",
|
||||
"description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "caching_enabled",
|
||||
"title": "Caching",
|
||||
"description": "Stores recent prompts and responses to improve speed when similar questions are asked.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "cache_type",
|
||||
"title": "KV Cache Type",
|
||||
"description": "Controls memory usage and precision trade-off.",
|
||||
"controllerType": "dropdown",
|
||||
"controllerProps": {
|
||||
"value": "q8_0",
|
||||
"options": [
|
||||
{
|
||||
"value": "q4_0",
|
||||
"name": "q4_0"
|
||||
},
|
||||
{
|
||||
"value": "q8_0",
|
||||
"name": "q8_0"
|
||||
},
|
||||
{
|
||||
"value": "f16",
|
||||
"name": "f16"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "use_mmap",
|
||||
"title": "mmap",
|
||||
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "hugging-face-access-token",
|
||||
"title": "Hugging Face Access Token",
|
||||
"description": "Access tokens programmatically authenticate your identity to the Hugging Face Hub, allowing applications to perform specific actions specified by the scope of permissions granted.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": "",
|
||||
"placeholder": "hf_**********************************",
|
||||
"type": "password",
|
||||
"inputActions": ["unobscure", "copy"]
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -1,44 +0,0 @@
|
||||
import { defineConfig } from 'rolldown'
|
||||
import packageJson from './package.json' with { type: 'json' }
|
||||
import defaultSettingJson from './resources/default_settings.json' with { type: 'json' }
|
||||
|
||||
export default defineConfig([
|
||||
{
|
||||
input: 'src/index.ts',
|
||||
output: {
|
||||
format: 'esm',
|
||||
file: 'dist/index.js',
|
||||
},
|
||||
platform: 'browser',
|
||||
define: {
|
||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||
SETTINGS: JSON.stringify(defaultSettingJson),
|
||||
CORTEX_API_URL: JSON.stringify(
|
||||
`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
||||
),
|
||||
CORTEX_SOCKET_URL: JSON.stringify(
|
||||
`ws://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
||||
),
|
||||
CORTEX_ENGINE_VERSION: JSON.stringify('b5509'),
|
||||
},
|
||||
},
|
||||
{
|
||||
input: 'src/node/index.ts',
|
||||
external: ['@janhq/core/node'],
|
||||
output: {
|
||||
format: 'cjs',
|
||||
file: 'dist/node/index.cjs.js',
|
||||
sourcemap: false,
|
||||
inlineDynamicImports: true,
|
||||
},
|
||||
resolve: {
|
||||
extensions: ['.js', '.ts', '.json'],
|
||||
},
|
||||
define: {
|
||||
CORTEX_API_URL: JSON.stringify(
|
||||
`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
|
||||
),
|
||||
},
|
||||
platform: 'node',
|
||||
},
|
||||
])
|
||||
@ -1,5 +0,0 @@
|
||||
declare const NODE: string
|
||||
declare const CORTEX_API_URL: string
|
||||
declare const CORTEX_SOCKET_URL: string
|
||||
declare const CORTEX_ENGINE_VERSION: string
|
||||
declare const SETTINGS: any
|
||||
@ -1,452 +0,0 @@
|
||||
import { describe, beforeEach, it, expect, vi, afterEach } from 'vitest'
|
||||
|
||||
// Must mock before imports
|
||||
vi.mock('@janhq/core', () => {
|
||||
return {
|
||||
executeOnMain: vi.fn().mockResolvedValue({}),
|
||||
events: {
|
||||
emit: vi.fn()
|
||||
},
|
||||
extractModelLoadParams: vi.fn().mockReturnValue({}),
|
||||
ModelEvent: {
|
||||
OnModelsUpdate: 'OnModelsUpdate',
|
||||
OnModelStopped: 'OnModelStopped'
|
||||
},
|
||||
EngineEvent: {
|
||||
OnEngineUpdate: 'OnEngineUpdate'
|
||||
},
|
||||
InferenceEngine: {
|
||||
cortex: 'cortex',
|
||||
nitro: 'nitro',
|
||||
cortex_llamacpp: 'cortex_llamacpp'
|
||||
},
|
||||
LocalOAIEngine: class LocalOAIEngine {
|
||||
onLoad() {}
|
||||
onUnload() {}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
import JanInferenceCortexExtension, { Settings } from './index'
|
||||
import { InferenceEngine, ModelEvent, EngineEvent, executeOnMain, events } from '@janhq/core'
|
||||
import ky from 'ky'
|
||||
|
||||
// Mock global variables
|
||||
const CORTEX_API_URL = 'http://localhost:3000'
|
||||
const CORTEX_SOCKET_URL = 'ws://localhost:3000'
|
||||
const SETTINGS = [
|
||||
{ id: 'n_parallel', name: 'Parallel Execution', description: 'Number of parallel executions', type: 'number', value: '4' },
|
||||
{ id: 'cont_batching', name: 'Continuous Batching', description: 'Enable continuous batching', type: 'boolean', value: true },
|
||||
{ id: 'caching_enabled', name: 'Caching', description: 'Enable caching', type: 'boolean', value: true },
|
||||
{ id: 'flash_attn', name: 'Flash Attention', description: 'Enable flash attention', type: 'boolean', value: true },
|
||||
{ id: 'cache_type', name: 'Cache Type', description: 'Type of cache to use', type: 'string', value: 'f16' },
|
||||
{ id: 'use_mmap', name: 'Use Memory Map', description: 'Use memory mapping', type: 'boolean', value: true },
|
||||
{ id: 'cpu_threads', name: 'CPU Threads', description: 'Number of CPU threads', type: 'number', value: '' }
|
||||
]
|
||||
const NODE = 'node'
|
||||
|
||||
// Mock globals
|
||||
vi.stubGlobal('CORTEX_API_URL', CORTEX_API_URL)
|
||||
vi.stubGlobal('CORTEX_SOCKET_URL', CORTEX_SOCKET_URL)
|
||||
vi.stubGlobal('SETTINGS', SETTINGS)
|
||||
vi.stubGlobal('NODE', NODE)
|
||||
vi.stubGlobal('window', {
|
||||
addEventListener: vi.fn()
|
||||
})
|
||||
|
||||
// Mock WebSocket
|
||||
class MockWebSocket {
|
||||
url :string
|
||||
listeners: {}
|
||||
onclose: Function
|
||||
|
||||
constructor(url) {
|
||||
this.url = url
|
||||
this.listeners = {}
|
||||
}
|
||||
|
||||
addEventListener(event, listener) {
|
||||
this.listeners[event] = listener
|
||||
}
|
||||
|
||||
emit(event, data) {
|
||||
if (this.listeners[event]) {
|
||||
this.listeners[event](data)
|
||||
}
|
||||
}
|
||||
|
||||
close() {
|
||||
if (this.onclose) {
|
||||
this.onclose({ code: 1000 })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Mock global WebSocket
|
||||
// @ts-ignore
|
||||
global.WebSocket = vi.fn().mockImplementation((url) => new MockWebSocket(url))
|
||||
|
||||
describe('JanInferenceCortexExtension', () => {
|
||||
let extension
|
||||
|
||||
beforeEach(() => {
|
||||
// Reset mocks
|
||||
vi.clearAllMocks()
|
||||
|
||||
// Create a new instance for each test
|
||||
extension = new JanInferenceCortexExtension()
|
||||
|
||||
// Mock the getSetting method
|
||||
extension.getSetting = vi.fn().mockImplementation((key, defaultValue) => {
|
||||
switch(key) {
|
||||
case Settings.n_parallel:
|
||||
return '4'
|
||||
case Settings.cont_batching:
|
||||
return true
|
||||
case Settings.caching_enabled:
|
||||
return true
|
||||
case Settings.flash_attn:
|
||||
return true
|
||||
case Settings.cache_type:
|
||||
return 'f16'
|
||||
case Settings.use_mmap:
|
||||
return true
|
||||
case Settings.cpu_threads:
|
||||
return ''
|
||||
default:
|
||||
return defaultValue
|
||||
}
|
||||
})
|
||||
|
||||
// Mock methods
|
||||
extension.registerSettings = vi.fn()
|
||||
extension.onLoad = vi.fn()
|
||||
extension.clean = vi.fn().mockResolvedValue({})
|
||||
extension.healthz = vi.fn().mockResolvedValue({})
|
||||
extension.subscribeToEvents = vi.fn()
|
||||
})
|
||||
|
||||
describe('onSettingUpdate', () => {
|
||||
it('should update n_parallel setting correctly', () => {
|
||||
extension.onSettingUpdate(Settings.n_parallel, '8')
|
||||
expect(extension.n_parallel).toBe(8)
|
||||
})
|
||||
|
||||
it('should update cont_batching setting correctly', () => {
|
||||
extension.onSettingUpdate(Settings.cont_batching, false)
|
||||
expect(extension.cont_batching).toBe(false)
|
||||
})
|
||||
|
||||
it('should update caching_enabled setting correctly', () => {
|
||||
extension.onSettingUpdate(Settings.caching_enabled, false)
|
||||
expect(extension.caching_enabled).toBe(false)
|
||||
})
|
||||
|
||||
it('should update flash_attn setting correctly', () => {
|
||||
extension.onSettingUpdate(Settings.flash_attn, false)
|
||||
expect(extension.flash_attn).toBe(false)
|
||||
})
|
||||
|
||||
it('should update cache_type setting correctly', () => {
|
||||
extension.onSettingUpdate(Settings.cache_type, 'f32')
|
||||
expect(extension.cache_type).toBe('f32')
|
||||
})
|
||||
|
||||
it('should update use_mmap setting correctly', () => {
|
||||
extension.onSettingUpdate(Settings.use_mmap, false)
|
||||
expect(extension.use_mmap).toBe(false)
|
||||
})
|
||||
|
||||
it('should update cpu_threads setting correctly', () => {
|
||||
extension.onSettingUpdate(Settings.cpu_threads, '4')
|
||||
expect(extension.cpu_threads).toBe(4)
|
||||
})
|
||||
|
||||
it('should not update cpu_threads when value is not a number', () => {
|
||||
extension.cpu_threads = undefined
|
||||
extension.onSettingUpdate(Settings.cpu_threads, 'not-a-number')
|
||||
expect(extension.cpu_threads).toBeUndefined()
|
||||
})
|
||||
})
|
||||
|
||||
describe('onUnload', () => {
|
||||
it('should clean up resources correctly', async () => {
|
||||
extension.shouldReconnect = true
|
||||
|
||||
await extension.onUnload()
|
||||
|
||||
expect(extension.shouldReconnect).toBe(false)
|
||||
expect(extension.clean).toHaveBeenCalled()
|
||||
expect(executeOnMain).toHaveBeenCalledWith(NODE, 'dispose')
|
||||
})
|
||||
})
|
||||
|
||||
describe('loadModel', () => {
|
||||
it('should remove llama_model_path and mmproj from settings', async () => {
|
||||
// Setup
|
||||
const model = {
|
||||
id: 'test-model',
|
||||
settings: {
|
||||
llama_model_path: '/path/to/model',
|
||||
mmproj: '/path/to/mmproj',
|
||||
some_setting: 'value'
|
||||
},
|
||||
engine: InferenceEngine.cortex_llamacpp
|
||||
}
|
||||
|
||||
// Mock ky.post
|
||||
vi.spyOn(ky, 'post').mockImplementation(() => ({
|
||||
// @ts-ignore
|
||||
json: () => Promise.resolve({}),
|
||||
catch: () => ({
|
||||
finally: () => ({
|
||||
// @ts-ignore
|
||||
then: () => Promise.resolve({})
|
||||
})
|
||||
})
|
||||
}))
|
||||
|
||||
// Setup queue for testing
|
||||
extension.queue = { add: vi.fn(fn => fn()) }
|
||||
|
||||
// Execute
|
||||
await extension.loadModel(model)
|
||||
|
||||
// Verify settings were filtered
|
||||
expect(model.settings).not.toHaveProperty('llama_model_path')
|
||||
expect(model.settings).not.toHaveProperty('mmproj')
|
||||
expect(model.settings).toHaveProperty('some_setting')
|
||||
})
|
||||
|
||||
it('should convert nitro to cortex_llamacpp engine', async () => {
|
||||
// Setup
|
||||
const model = {
|
||||
id: 'test-model',
|
||||
settings: {},
|
||||
engine: InferenceEngine.nitro
|
||||
}
|
||||
|
||||
// Mock ky.post
|
||||
const mockKyPost = vi.spyOn(ky, 'post').mockImplementation(() => ({
|
||||
// @ts-ignore
|
||||
json: () => Promise.resolve({}),
|
||||
catch: () => ({
|
||||
finally: () => ({
|
||||
// @ts-ignore
|
||||
then: () => Promise.resolve({})
|
||||
})
|
||||
})
|
||||
}))
|
||||
|
||||
// Setup queue for testing
|
||||
extension.queue = { add: vi.fn(fn => fn()) }
|
||||
|
||||
// Execute
|
||||
await extension.loadModel(model)
|
||||
|
||||
// Verify API call
|
||||
expect(mockKyPost).toHaveBeenCalledWith(
|
||||
`${CORTEX_API_URL}/v1/models/start`,
|
||||
expect.objectContaining({
|
||||
json: expect.objectContaining({
|
||||
engine: InferenceEngine.cortex_llamacpp
|
||||
})
|
||||
})
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('unloadModel', () => {
|
||||
it('should call the correct API endpoint and abort loading if in progress', async () => {
|
||||
// Setup
|
||||
const model = { id: 'test-model' }
|
||||
const mockAbort = vi.fn()
|
||||
extension.abortControllers.set(model.id, { abort: mockAbort })
|
||||
|
||||
// Mock ky.post
|
||||
const mockKyPost = vi.spyOn(ky, 'post').mockImplementation(() => ({
|
||||
// @ts-ignore
|
||||
json: () => Promise.resolve({}),
|
||||
finally: () => ({
|
||||
// @ts-ignore
|
||||
then: () => Promise.resolve({})
|
||||
})
|
||||
}))
|
||||
|
||||
// Execute
|
||||
await extension.unloadModel(model)
|
||||
|
||||
// Verify API call
|
||||
expect(mockKyPost).toHaveBeenCalledWith(
|
||||
`${CORTEX_API_URL}/v1/models/stop`,
|
||||
expect.objectContaining({
|
||||
json: { model: model.id }
|
||||
})
|
||||
)
|
||||
|
||||
// Verify abort controller was called
|
||||
expect(mockAbort).toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
describe('clean', () => {
|
||||
it('should make a DELETE request to destroy process manager', async () => {
|
||||
// Mock the ky.delete function directly
|
||||
const mockDelete = vi.fn().mockReturnValue({
|
||||
catch: vi.fn().mockReturnValue(Promise.resolve({}))
|
||||
})
|
||||
|
||||
// Replace the original implementation
|
||||
vi.spyOn(ky, 'delete').mockImplementation(mockDelete)
|
||||
|
||||
// Override the clean method to use the real implementation
|
||||
// @ts-ignore
|
||||
extension.clean = JanInferenceCortexExtension.prototype.clean
|
||||
|
||||
// Call the method
|
||||
await extension.clean()
|
||||
|
||||
// Verify the correct API call was made
|
||||
expect(mockDelete).toHaveBeenCalledWith(
|
||||
`${CORTEX_API_URL}/processmanager/destroy`,
|
||||
expect.objectContaining({
|
||||
timeout: 2000,
|
||||
retry: expect.objectContaining({
|
||||
limit: 0
|
||||
})
|
||||
})
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('WebSocket events', () => {
|
||||
it('should handle WebSocket events correctly', () => {
|
||||
// Create a mock implementation for subscribeToEvents that stores the socket
|
||||
let messageHandler;
|
||||
let closeHandler;
|
||||
|
||||
// Override the private method
|
||||
extension.subscribeToEvents = function() {
|
||||
this.socket = new MockWebSocket('ws://localhost:3000/events');
|
||||
this.socket.addEventListener('message', (event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
|
||||
// Store for testing
|
||||
messageHandler = data;
|
||||
|
||||
const transferred = data.task.items.reduce(
|
||||
(acc, cur) => acc + cur.downloadedBytes,
|
||||
0
|
||||
);
|
||||
const total = data.task.items.reduce(
|
||||
(acc, cur) => acc + cur.bytes,
|
||||
0
|
||||
);
|
||||
const percent = total > 0 ? transferred / total : 0;
|
||||
|
||||
events.emit(
|
||||
data.type === 'DownloadUpdated' ? 'onFileDownloadUpdate' :
|
||||
data.type === 'DownloadSuccess' ? 'onFileDownloadSuccess' :
|
||||
data.type,
|
||||
{
|
||||
modelId: data.task.id,
|
||||
percent: percent,
|
||||
size: {
|
||||
transferred: transferred,
|
||||
total: total,
|
||||
},
|
||||
downloadType: data.task.type,
|
||||
}
|
||||
);
|
||||
|
||||
if (data.task.type === 'Engine') {
|
||||
events.emit(EngineEvent.OnEngineUpdate, {
|
||||
type: data.type,
|
||||
percent: percent,
|
||||
id: data.task.id,
|
||||
});
|
||||
}
|
||||
else if (data.type === 'DownloadSuccess') {
|
||||
setTimeout(() => {
|
||||
events.emit(ModelEvent.OnModelsUpdate, {
|
||||
fetch: true,
|
||||
});
|
||||
}, 500);
|
||||
}
|
||||
});
|
||||
|
||||
this.socket.onclose = (event) => {
|
||||
closeHandler = event;
|
||||
// Notify app to update model running state
|
||||
events.emit(ModelEvent.OnModelStopped, {});
|
||||
};
|
||||
};
|
||||
|
||||
// Setup queue
|
||||
extension.queue = {
|
||||
add: vi.fn(fn => fn())
|
||||
};
|
||||
|
||||
// Execute the method
|
||||
extension.subscribeToEvents();
|
||||
|
||||
// Simulate a message event
|
||||
extension.socket.listeners.message({
|
||||
data: JSON.stringify({
|
||||
type: 'DownloadUpdated',
|
||||
task: {
|
||||
id: 'test-model',
|
||||
type: 'Model',
|
||||
items: [
|
||||
{ downloadedBytes: 50, bytes: 100 }
|
||||
]
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
// Verify event emission
|
||||
expect(events.emit).toHaveBeenCalledWith(
|
||||
'onFileDownloadUpdate',
|
||||
expect.objectContaining({
|
||||
modelId: 'test-model',
|
||||
percent: 0.5
|
||||
})
|
||||
);
|
||||
|
||||
// Simulate a download success event
|
||||
vi.useFakeTimers();
|
||||
extension.socket.listeners.message({
|
||||
data: JSON.stringify({
|
||||
type: 'DownloadSuccess',
|
||||
task: {
|
||||
id: 'test-model',
|
||||
type: 'Model',
|
||||
items: [
|
||||
{ downloadedBytes: 100, bytes: 100 }
|
||||
]
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
// Fast-forward time to trigger the timeout
|
||||
vi.advanceTimersByTime(500);
|
||||
|
||||
// Verify the ModelEvent.OnModelsUpdate event was emitted
|
||||
expect(events.emit).toHaveBeenCalledWith(
|
||||
ModelEvent.OnModelsUpdate,
|
||||
{ fetch: true }
|
||||
);
|
||||
|
||||
vi.useRealTimers();
|
||||
|
||||
// Trigger websocket close
|
||||
extension.socket.onclose({ code: 1000 });
|
||||
|
||||
// Verify OnModelStopped event was emitted
|
||||
expect(events.emit).toHaveBeenCalledWith(
|
||||
ModelEvent.OnModelStopped,
|
||||
{}
|
||||
);
|
||||
});
|
||||
})
|
||||
})
|
||||
@ -1,435 +0,0 @@
|
||||
/**
|
||||
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
* @version 1.0.0
|
||||
* @module inference-extension/src/index
|
||||
*/
|
||||
|
||||
import {
|
||||
Model,
|
||||
EngineEvent,
|
||||
LocalOAIEngine,
|
||||
extractModelLoadParams,
|
||||
events,
|
||||
ModelEvent,
|
||||
} from '@janhq/core'
|
||||
import ky, { KyInstance } from 'ky'
|
||||
|
||||
/**
|
||||
* Event subscription types of Downloader
|
||||
*/
|
||||
enum DownloadTypes {
|
||||
DownloadUpdated = 'onFileDownloadUpdate',
|
||||
DownloadError = 'onFileDownloadError',
|
||||
DownloadSuccess = 'onFileDownloadSuccess',
|
||||
DownloadStopped = 'onFileDownloadStopped',
|
||||
DownloadStarted = 'onFileDownloadStarted',
|
||||
}
|
||||
|
||||
enum Settings {
|
||||
n_parallel = 'n_parallel',
|
||||
cont_batching = 'cont_batching',
|
||||
caching_enabled = 'caching_enabled',
|
||||
flash_attn = 'flash_attn',
|
||||
cache_type = 'cache_type',
|
||||
use_mmap = 'use_mmap',
|
||||
cpu_threads = 'cpu_threads',
|
||||
huggingfaceToken = 'hugging-face-access-token',
|
||||
auto_unload_models = 'auto_unload_models',
|
||||
context_shift = 'context_shift',
|
||||
}
|
||||
|
||||
type LoadedModelResponse = { data: { engine: string; id: string }[] }
|
||||
|
||||
/**
|
||||
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
*/
|
||||
export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
nodeModule: string = 'node'
|
||||
|
||||
provider: string = 'cortex'
|
||||
|
||||
shouldReconnect = true
|
||||
|
||||
/** Default Engine model load settings */
|
||||
n_parallel?: number
|
||||
cont_batching: boolean = false
|
||||
caching_enabled: boolean = true
|
||||
flash_attn: boolean = true
|
||||
use_mmap: boolean = true
|
||||
cache_type: string = 'q8'
|
||||
cpu_threads?: number
|
||||
auto_unload_models: boolean = true
|
||||
reasoning_budget = -1 // Default reasoning budget in seconds
|
||||
context_shift = false
|
||||
/**
|
||||
* The URL for making inference requests.
|
||||
*/
|
||||
inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
|
||||
|
||||
/**
|
||||
* Socket instance of events subscription
|
||||
*/
|
||||
socket?: WebSocket = undefined
|
||||
|
||||
abortControllers = new Map<string, AbortController>()
|
||||
|
||||
api?: KyInstance
|
||||
/**
|
||||
* Get the API instance
|
||||
* @returns
|
||||
*/
|
||||
async apiInstance(): Promise<KyInstance> {
|
||||
if (this.api) return this.api
|
||||
const apiKey = await window.core?.api.appToken()
|
||||
this.api = ky.extend({
|
||||
prefixUrl: CORTEX_API_URL,
|
||||
headers: apiKey
|
||||
? {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
}
|
||||
: {},
|
||||
retry: 10,
|
||||
})
|
||||
return this.api
|
||||
}
|
||||
|
||||
/**
|
||||
* Authorization headers for the API requests.
|
||||
* @returns
|
||||
*/
|
||||
headers(): Promise<HeadersInit> {
|
||||
return window.core?.api.appToken().then((token: string) => ({
|
||||
Authorization: `Bearer ${token}`,
|
||||
}))
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when the extension is loaded.
|
||||
*/
|
||||
async onLoad() {
|
||||
super.onLoad()
|
||||
|
||||
// Register Settings
|
||||
this.registerSettings(SETTINGS)
|
||||
|
||||
const numParallel = await this.getSetting<string>(Settings.n_parallel, '')
|
||||
if (numParallel.length > 0 && parseInt(numParallel) > 0) {
|
||||
this.n_parallel = parseInt(numParallel)
|
||||
}
|
||||
if (this.n_parallel && this.n_parallel > 1)
|
||||
this.cont_batching = await this.getSetting<boolean>(
|
||||
Settings.cont_batching,
|
||||
false
|
||||
)
|
||||
this.caching_enabled = await this.getSetting<boolean>(
|
||||
Settings.caching_enabled,
|
||||
true
|
||||
)
|
||||
this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
|
||||
this.context_shift = await this.getSetting<boolean>(
|
||||
Settings.context_shift,
|
||||
false
|
||||
)
|
||||
this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
|
||||
if (this.caching_enabled)
|
||||
this.cache_type = await this.getSetting<string>(Settings.cache_type, 'q8')
|
||||
this.auto_unload_models = await this.getSetting<boolean>(
|
||||
Settings.auto_unload_models,
|
||||
true
|
||||
)
|
||||
const threads_number = Number(
|
||||
await this.getSetting<string>(Settings.cpu_threads, '')
|
||||
)
|
||||
|
||||
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
|
||||
|
||||
const huggingfaceToken = await this.getSetting<string>(
|
||||
Settings.huggingfaceToken,
|
||||
''
|
||||
)
|
||||
if (huggingfaceToken) {
|
||||
this.updateCortexConfig({ huggingface_token: huggingfaceToken })
|
||||
}
|
||||
this.subscribeToEvents()
|
||||
|
||||
window.addEventListener('beforeunload', () => {
|
||||
this.clean()
|
||||
})
|
||||
|
||||
// Migrate configs
|
||||
if (!localStorage.getItem('cortex_migration_completed')) {
|
||||
const config = await this.getCortexConfig()
|
||||
console.log('Start cortex.cpp migration', config)
|
||||
if (config && config.huggingface_token) {
|
||||
this.updateSettings([
|
||||
{
|
||||
key: Settings.huggingfaceToken,
|
||||
controllerProps: {
|
||||
value: config.huggingface_token,
|
||||
},
|
||||
},
|
||||
])
|
||||
this.updateCortexConfig({
|
||||
huggingface_token: config.huggingface_token,
|
||||
})
|
||||
localStorage.setItem('cortex_migration_completed', 'true')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async onUnload() {
|
||||
console.log('Clean up cortex.cpp services')
|
||||
this.shouldReconnect = false
|
||||
this.clean()
|
||||
super.onUnload()
|
||||
}
|
||||
|
||||
/**
|
||||
* Subscribe to settings update and make change accordingly
|
||||
* @param key
|
||||
* @param value
|
||||
*/
|
||||
onSettingUpdate<T>(key: string, value: T): void {
|
||||
if (key === Settings.n_parallel && typeof value === 'string') {
|
||||
if (value.length > 0 && parseInt(value) > 0) {
|
||||
this.n_parallel = parseInt(value)
|
||||
}
|
||||
} else if (key === Settings.cont_batching && typeof value === 'boolean') {
|
||||
this.cont_batching = value as boolean
|
||||
} else if (key === Settings.caching_enabled && typeof value === 'boolean') {
|
||||
this.caching_enabled = value as boolean
|
||||
} else if (key === Settings.flash_attn && typeof value === 'boolean') {
|
||||
this.flash_attn = value as boolean
|
||||
} else if (key === Settings.cache_type && typeof value === 'string') {
|
||||
this.cache_type = value as string
|
||||
} else if (key === Settings.use_mmap && typeof value === 'boolean') {
|
||||
this.use_mmap = value as boolean
|
||||
} else if (key === Settings.cpu_threads && typeof value === 'string') {
|
||||
const threads_number = Number(value)
|
||||
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
|
||||
} else if (key === Settings.huggingfaceToken) {
|
||||
this.updateCortexConfig({ huggingface_token: value })
|
||||
} else if (key === Settings.auto_unload_models) {
|
||||
this.auto_unload_models = value as boolean
|
||||
} else if (key === Settings.context_shift && typeof value === 'boolean') {
|
||||
this.context_shift = value
|
||||
}
|
||||
}
|
||||
|
||||
override async loadModel(
|
||||
model: Partial<Model> & {
|
||||
id: string
|
||||
settings?: object
|
||||
file_path?: string
|
||||
},
|
||||
abortController: AbortController
|
||||
): Promise<void> {
|
||||
// Cortex will handle these settings
|
||||
const { llama_model_path, mmproj, ...settings } = model.settings ?? {}
|
||||
model.settings = settings
|
||||
|
||||
const controller = abortController ?? new AbortController()
|
||||
const { signal } = controller
|
||||
|
||||
this.abortControllers.set(model.id, controller)
|
||||
|
||||
const loadedModels = await this.activeModels()
|
||||
|
||||
// This is to avoid loading the same model multiple times
|
||||
if (loadedModels.some((e: { id: string }) => e.id === model.id)) {
|
||||
console.log(`Model ${model.id} already loaded`)
|
||||
return
|
||||
}
|
||||
if (this.auto_unload_models) {
|
||||
// Unload the last used model if it is not the same as the current one
|
||||
for (const lastUsedModel of loadedModels) {
|
||||
if (lastUsedModel.id !== model.id) {
|
||||
console.log(`Unloading last used model: ${lastUsedModel.id}`)
|
||||
await this.unloadModel(lastUsedModel as Model)
|
||||
}
|
||||
}
|
||||
}
|
||||
const modelSettings = extractModelLoadParams(model.settings)
|
||||
return await this.apiInstance().then((api) =>
|
||||
api
|
||||
.post('v1/models/start', {
|
||||
json: {
|
||||
...modelSettings,
|
||||
model: model.id,
|
||||
engine:
|
||||
model.engine === 'nitro' // Legacy model cache
|
||||
? 'llama-cpp'
|
||||
: model.engine,
|
||||
...(this.n_parallel ? { n_parallel: this.n_parallel } : {}),
|
||||
...(this.use_mmap ? { use_mmap: true } : {}),
|
||||
...(this.caching_enabled ? { caching_enabled: true } : {}),
|
||||
...(this.flash_attn ? { flash_attn: true } : {}),
|
||||
...(this.caching_enabled && this.cache_type
|
||||
? { cache_type: this.cache_type }
|
||||
: {}),
|
||||
...(this.cpu_threads && this.cpu_threads > 0
|
||||
? { cpu_threads: this.cpu_threads }
|
||||
: {}),
|
||||
...(this.cont_batching && this.n_parallel && this.n_parallel > 1
|
||||
? { cont_batching: this.cont_batching }
|
||||
: {}),
|
||||
...(model.id.toLowerCase().includes('jan-nano')
|
||||
? { reasoning_budget: 0 }
|
||||
: { reasoning_budget: this.reasoning_budget }),
|
||||
...(this.context_shift !== true // explicit true required to enable context shift
|
||||
? { 'no-context-shift': true }
|
||||
: {}),
|
||||
...(modelSettings.ngl === -1 || modelSettings.ngl === undefined
|
||||
? { ngl: 100 }
|
||||
: {}),
|
||||
},
|
||||
timeout: false,
|
||||
signal,
|
||||
})
|
||||
.json()
|
||||
.catch(async (e) => {
|
||||
throw (await e.response?.json()) ?? e
|
||||
})
|
||||
.finally(() => this.abortControllers.delete(model.id))
|
||||
.then()
|
||||
)
|
||||
}
|
||||
|
||||
override async unloadModel(model: Model): Promise<void> {
|
||||
return this.apiInstance().then((api) =>
|
||||
api
|
||||
.post('v1/models/stop', {
|
||||
json: { model: model.id },
|
||||
retry: {
|
||||
limit: 0,
|
||||
},
|
||||
})
|
||||
.json()
|
||||
.finally(() => {
|
||||
this.abortControllers.get(model.id)?.abort()
|
||||
})
|
||||
.then()
|
||||
)
|
||||
}
|
||||
|
||||
async activeModels(): Promise<(object & { id: string })[]> {
|
||||
return await this.apiInstance()
|
||||
.then((e) =>
|
||||
e.get('inferences/server/models', {
|
||||
retry: {
|
||||
limit: 0, // Do not retry
|
||||
},
|
||||
})
|
||||
)
|
||||
.then((e) => e.json())
|
||||
.then((e) => (e as LoadedModelResponse).data ?? [])
|
||||
.catch(() => [])
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean cortex processes
|
||||
* @returns
|
||||
*/
|
||||
private async clean(): Promise<any> {
|
||||
return this.apiInstance()
|
||||
.then((api) =>
|
||||
api.delete('processmanager/destroy', {
|
||||
timeout: 2000, // maximum 2 seconds
|
||||
retry: {
|
||||
limit: 0,
|
||||
},
|
||||
})
|
||||
)
|
||||
.catch(() => {
|
||||
// Do nothing
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Update cortex config
|
||||
* @param body
|
||||
*/
|
||||
private async updateCortexConfig(body: {
|
||||
[key: string]: any
|
||||
}): Promise<void> {
|
||||
return this.apiInstance()
|
||||
.then((api) => api.patch('v1/configs', { json: body }).then(() => {}))
|
||||
.catch((e) => console.debug(e))
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cortex config
|
||||
* @param body
|
||||
*/
|
||||
private async getCortexConfig(): Promise<any> {
|
||||
return this.apiInstance()
|
||||
.then((api) => api.get('v1/configs').json())
|
||||
.catch((e) => console.debug(e))
|
||||
}
|
||||
|
||||
/**
|
||||
* Subscribe to cortex.cpp websocket events
|
||||
*/
|
||||
private subscribeToEvents() {
|
||||
this.socket = new WebSocket(`${CORTEX_SOCKET_URL}/events`)
|
||||
|
||||
this.socket.addEventListener('message', (event) => {
|
||||
const data = JSON.parse(event.data)
|
||||
|
||||
const transferred = data.task.items.reduce(
|
||||
(acc: number, cur: any) => acc + cur.downloadedBytes,
|
||||
0
|
||||
)
|
||||
const total = data.task.items.reduce(
|
||||
(acc: number, cur: any) => acc + cur.bytes,
|
||||
0
|
||||
)
|
||||
const percent = total > 0 ? transferred / total : 0
|
||||
|
||||
events.emit(DownloadTypes[data.type as keyof typeof DownloadTypes], {
|
||||
modelId: data.task.id,
|
||||
percent: percent,
|
||||
size: {
|
||||
transferred: transferred,
|
||||
total: total,
|
||||
},
|
||||
downloadType: data.task.type,
|
||||
})
|
||||
|
||||
if (data.task.type === 'Engine') {
|
||||
events.emit(EngineEvent.OnEngineUpdate, {
|
||||
type: DownloadTypes[data.type as keyof typeof DownloadTypes],
|
||||
percent: percent,
|
||||
id: data.task.id,
|
||||
})
|
||||
} else {
|
||||
if (data.type === DownloadTypes.DownloadSuccess) {
|
||||
// Delay for the state update from cortex.cpp
|
||||
// Just to be sure
|
||||
setTimeout(() => {
|
||||
events.emit(ModelEvent.OnModelsUpdate, {
|
||||
fetch: true,
|
||||
})
|
||||
}, 500)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* This is to handle the server segfault issue
|
||||
*/
|
||||
this.socket.onclose = (event) => {
|
||||
// Notify app to update model running state
|
||||
events.emit(ModelEvent.OnModelStopped, {})
|
||||
|
||||
// Reconnect to the /events websocket
|
||||
if (this.shouldReconnect) {
|
||||
setTimeout(() => this.subscribeToEvents(), 1000)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,144 +0,0 @@
|
||||
import { describe, it, expect, vi } from 'vitest'
|
||||
// Mocks
|
||||
|
||||
const CORTEX_API_URL = 'http://localhost:3000'
|
||||
vi.stubGlobal('CORTEX_API_URL', CORTEX_API_URL)
|
||||
|
||||
vi.mock('@janhq/core/node', (actual) => ({
|
||||
...actual(),
|
||||
getJanDataFolderPath: () => '',
|
||||
appResourcePath: () => '/mock/path',
|
||||
log: vi.fn(),
|
||||
getSystemResourceInfo: () => {
|
||||
return {
|
||||
cpu: {
|
||||
cores: 1,
|
||||
logicalCores: 1,
|
||||
threads: 1,
|
||||
model: 'model',
|
||||
speed: 1,
|
||||
},
|
||||
memory: {
|
||||
total: 1,
|
||||
free: 1,
|
||||
},
|
||||
gpu: {
|
||||
model: 'model',
|
||||
memory: 1,
|
||||
cuda: {
|
||||
version: 'version',
|
||||
devices: 'devices',
|
||||
},
|
||||
vulkan: {
|
||||
version: 'version',
|
||||
devices: 'devices',
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
vi.mock('fs', () => ({
|
||||
default: {
|
||||
readdirSync: () => [],
|
||||
},
|
||||
}))
|
||||
|
||||
vi.mock('./watchdog', () => {
|
||||
return {
|
||||
ProcessWatchdog: vi.fn().mockImplementation(() => {
|
||||
return {
|
||||
start: vi.fn(),
|
||||
terminate: vi.fn(),
|
||||
}
|
||||
}),
|
||||
}
|
||||
})
|
||||
|
||||
vi.mock('child_process', () => ({
|
||||
exec: () => {
|
||||
return {
|
||||
stdout: { on: vi.fn() },
|
||||
stderr: { on: vi.fn() },
|
||||
on: vi.fn(),
|
||||
}
|
||||
},
|
||||
spawn: () => {
|
||||
return {
|
||||
stdout: { on: vi.fn() },
|
||||
stderr: { on: vi.fn() },
|
||||
on: vi.fn(),
|
||||
pid: '111',
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
import index from './index'
|
||||
|
||||
describe('Cortex extension node interface', () => {
|
||||
describe('run', () => {
|
||||
it('should start the cortex subprocess on macOS', async () => {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'darwin',
|
||||
})
|
||||
|
||||
const result = await index.run()
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('should start the cortex subprocess on Windows', async () => {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'win32',
|
||||
})
|
||||
|
||||
const result = await index.run()
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('should set the proper environment variables based on platform', async () => {
|
||||
// Test for Windows
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'win32',
|
||||
})
|
||||
process.env.PATH = '/original/path'
|
||||
|
||||
await index.run()
|
||||
expect(process.env.PATH).toContain('/original/path')
|
||||
|
||||
// Test for non-Windows (macOS/Linux)
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'darwin',
|
||||
})
|
||||
process.env.LD_LIBRARY_PATH = '/original/ld/path'
|
||||
|
||||
await index.run()
|
||||
expect(process.env.LD_LIBRARY_PATH).toContain('/original/ld/path')
|
||||
})
|
||||
})
|
||||
|
||||
describe('dispose', () => {
|
||||
it('should dispose a model successfully on Mac', async () => {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'darwin',
|
||||
})
|
||||
|
||||
// Call the dispose function
|
||||
const result = index.dispose()
|
||||
|
||||
// Assert that the result is as expected
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('should kill the subprocess successfully on Windows', async () => {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'win32',
|
||||
})
|
||||
|
||||
// Call the dispose function
|
||||
const result = index.dispose()
|
||||
|
||||
// Assert that the result is as expected
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
})
|
||||
})
|
||||
@ -1,103 +0,0 @@
|
||||
import path from 'path'
|
||||
import { appResourcePath, getJanDataFolderPath, log } from '@janhq/core/node'
|
||||
import { ProcessWatchdog } from './watchdog'
|
||||
|
||||
let watchdog: ProcessWatchdog | undefined = undefined
|
||||
|
||||
/**
|
||||
* Spawns a Nitro subprocess.
|
||||
* @returns A promise that resolves when the Nitro subprocess is started.
|
||||
*/
|
||||
function run(): Promise<any> {
|
||||
log(`[CORTEX]:: Spawning cortex subprocess...`)
|
||||
|
||||
return new Promise<void>(async (resolve, reject) => {
|
||||
// let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||
let binaryName = `cortex-server${
|
||||
process.platform === 'win32' ? '.exe' : ''
|
||||
}`
|
||||
const binPath = path.join(__dirname, '..', 'bin')
|
||||
|
||||
const executablePath = path.join(binPath, binaryName)
|
||||
|
||||
addEnvPaths(binPath)
|
||||
|
||||
const sharedPath = path.join(appResourcePath(), 'shared')
|
||||
// Execute the binary
|
||||
log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`)
|
||||
|
||||
const dataFolderPath = getJanDataFolderPath()
|
||||
if (watchdog) {
|
||||
watchdog.terminate()
|
||||
}
|
||||
|
||||
// The HOST address to use for the cortex subprocess
|
||||
const LOCAL_PORT = CORTEX_API_URL.split(':').pop() ?? '39291'
|
||||
|
||||
watchdog = new ProcessWatchdog(
|
||||
executablePath,
|
||||
[
|
||||
'--start-server',
|
||||
'--port',
|
||||
LOCAL_PORT.toString(),
|
||||
'--config_file_path',
|
||||
`${path.join(dataFolderPath, '.janrc')}`,
|
||||
'--data_folder_path',
|
||||
dataFolderPath,
|
||||
'config',
|
||||
'--api_keys',
|
||||
process.env.appToken ?? 'cortex.cpp',
|
||||
],
|
||||
{
|
||||
env: {
|
||||
...process.env,
|
||||
// CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
|
||||
// // Vulkan - Support 1 device at a time for now
|
||||
// ...(gpuVisibleDevices?.length > 0 && {
|
||||
// GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
|
||||
// }),
|
||||
},
|
||||
cwd: sharedPath,
|
||||
}
|
||||
)
|
||||
watchdog.start()
|
||||
resolve()
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Every module should have a dispose function
|
||||
* This will be called when the extension is unloaded and should clean up any resources
|
||||
* Also called when app is closed
|
||||
*/
|
||||
function dispose() {
|
||||
watchdog?.terminate()
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the environment paths for the cortex subprocess
|
||||
* @param dest
|
||||
*/
|
||||
function addEnvPaths(dest: string) {
|
||||
// Add engine path to the PATH and LD_LIBRARY_PATH
|
||||
if (process.platform === 'win32') {
|
||||
process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
|
||||
} else {
|
||||
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
|
||||
path.delimiter,
|
||||
dest
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cortex process info
|
||||
*/
|
||||
export interface CortexProcessInfo {
|
||||
isRunning: boolean
|
||||
}
|
||||
|
||||
export default {
|
||||
run,
|
||||
dispose,
|
||||
}
|
||||
@ -1,84 +0,0 @@
|
||||
import { log } from '@janhq/core/node'
|
||||
import { spawn, ChildProcess } from 'child_process'
|
||||
import { EventEmitter } from 'events'
|
||||
|
||||
interface WatchdogOptions {
|
||||
cwd?: string
|
||||
restartDelay?: number
|
||||
maxRestarts?: number
|
||||
env?: NodeJS.ProcessEnv
|
||||
}
|
||||
|
||||
export class ProcessWatchdog extends EventEmitter {
|
||||
private command: string
|
||||
private args: string[]
|
||||
private options: WatchdogOptions
|
||||
private process: ChildProcess | null
|
||||
private restartDelay: number
|
||||
private maxRestarts: number
|
||||
private restartCount: number
|
||||
private isTerminating: boolean
|
||||
|
||||
constructor(command: string, args: string[], options: WatchdogOptions = {}) {
|
||||
super()
|
||||
this.command = command
|
||||
this.args = args
|
||||
this.options = options
|
||||
this.process = null
|
||||
this.restartDelay = options.restartDelay || 5000
|
||||
this.maxRestarts = options.maxRestarts || 5
|
||||
this.restartCount = 0
|
||||
this.isTerminating = false
|
||||
}
|
||||
|
||||
start(): void {
|
||||
this.spawnProcess()
|
||||
}
|
||||
|
||||
private spawnProcess(): void {
|
||||
if (this.isTerminating) return
|
||||
|
||||
log(`Starting process: ${this.command} ${this.args.join(' ')}`)
|
||||
this.process = spawn(this.command, this.args, this.options)
|
||||
|
||||
this.process.stdout?.on('data', (data: Buffer) => {
|
||||
log(`Process output: ${data}`)
|
||||
this.emit('output', data.toString())
|
||||
})
|
||||
|
||||
this.process.stderr?.on('data', (data: Buffer) => {
|
||||
log(`Process error: ${data}`)
|
||||
this.emit('error', data.toString())
|
||||
})
|
||||
|
||||
this.process.on('close', (code: number | null) => {
|
||||
log(`Process exited with code ${code}`)
|
||||
this.emit('close', code)
|
||||
if (!this.isTerminating) {
|
||||
this.restartProcess()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
private restartProcess(): void {
|
||||
if (this.restartCount < this.maxRestarts) {
|
||||
this.restartCount++
|
||||
log(
|
||||
`Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})`
|
||||
)
|
||||
setTimeout(() => this.spawnProcess(), this.restartDelay)
|
||||
} else {
|
||||
log('Max restart attempts reached. Exiting watchdog.')
|
||||
this.emit('maxRestartsReached')
|
||||
}
|
||||
}
|
||||
|
||||
terminate(): void {
|
||||
this.isTerminating = true
|
||||
if (this.process) {
|
||||
log('Terminating watched process...')
|
||||
this.process.kill()
|
||||
}
|
||||
this.emit('terminated')
|
||||
}
|
||||
}
|
||||
@ -1,15 +0,0 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"moduleResolution": "node",
|
||||
"target": "es2016",
|
||||
"module": "esnext",
|
||||
"strict": true,
|
||||
"sourceMap": true,
|
||||
"esModuleInterop": true,
|
||||
"outDir": "dist",
|
||||
"importHelpers": true,
|
||||
"typeRoots": ["node_modules/@types"]
|
||||
},
|
||||
"include": ["src"],
|
||||
"exclude": ["src/**/*.test.ts"]
|
||||
}
|
||||
@ -21,6 +21,7 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@janhq/core": "../../core/package.tgz",
|
||||
"@tauri-apps/api": "^1.4.0",
|
||||
"fetch-retry": "^5.0.6",
|
||||
"ulidx": "^2.3.0"
|
||||
},
|
||||
|
||||
@ -6,10 +6,55 @@
|
||||
* @module llamacpp-extension/src/index
|
||||
*/
|
||||
|
||||
import { RemoteOAIEngine, getJanDataFolderPath, fs, ModelCapability, Model } from '@janhq/core'
|
||||
import {
|
||||
AIEngine,
|
||||
localProvider,
|
||||
getJanDataFolderPath,
|
||||
fs,
|
||||
Model,
|
||||
} from '@janhq/core'
|
||||
|
||||
export enum Settings {
|
||||
port = 'port',
|
||||
import { invoke } from '@tauri-apps/api/tauri'
|
||||
import {
|
||||
LocalProvider,
|
||||
ModelInfo,
|
||||
ListOptions,
|
||||
ListResult,
|
||||
PullOptions,
|
||||
PullResult,
|
||||
LoadOptions,
|
||||
SessionInfo,
|
||||
UnloadOptions,
|
||||
UnloadResult,
|
||||
ChatOptions,
|
||||
ChatCompletion,
|
||||
ChatCompletionChunk,
|
||||
DeleteOptions,
|
||||
DeleteResult,
|
||||
ImportOptions,
|
||||
ImportResult,
|
||||
AbortPullOptions,
|
||||
AbortPullResult,
|
||||
ChatCompletionRequest,
|
||||
} from './types'
|
||||
|
||||
/**
|
||||
* Helper to convert GGUF model filename to a more structured ID/name
|
||||
* Example: "mistral-7b-instruct-v0.2.Q4_K_M.gguf" -> { baseModelId: "mistral-7b-instruct-v0.2", quant: "Q4_K_M" }
|
||||
**/
|
||||
function parseGGUFFileName(filename: string): {
|
||||
baseModelId: string
|
||||
quant?: string
|
||||
} {
|
||||
const nameWithoutExt = filename.replace(/\.gguf$/i, '')
|
||||
// Try to split by common quantization patterns (e.g., .Q4_K_M, -IQ2_XS)
|
||||
const match = nameWithoutExt.match(
|
||||
/^(.*?)[-_]([QqIiFf]\w{1,3}_\w{1,3}|[Qq]\d+_[KkSsMmXxLl\d]+|[IiQq]\d+_[XxSsMm]+|[Qq]\d+)$/
|
||||
)
|
||||
if (match && match[1] && match[2]) {
|
||||
return { baseModelId: match[1], quant: match[2] }
|
||||
}
|
||||
return { baseModelId: nameWithoutExt }
|
||||
}
|
||||
|
||||
/**
|
||||
@ -17,99 +62,246 @@ export enum Settings {
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
*/
|
||||
export default class LlamacppProvider extends RemoteOAIEngine {
|
||||
inferenceUrl: string = ''
|
||||
baseURL: string = ''
|
||||
provider: string = ENGINE
|
||||
export default class inference_llamacpp_extension
|
||||
extends AIEngine
|
||||
implements localProvider
|
||||
{
|
||||
provider: string = 'llamacpp'
|
||||
readonly providerId: string = 'llamcpp'
|
||||
|
||||
private activeSessions: Map<string, SessionInfo> = new Map()
|
||||
|
||||
private modelsBasePath!: string
|
||||
|
||||
override async onLoad(): Promise<void> {
|
||||
super.onLoad()
|
||||
super.onLoad() // Calls registerEngine() from AIEngine
|
||||
this.registerSettings(SETTINGS_DEFINITIONS)
|
||||
|
||||
// Register Settings
|
||||
this.registerSettings(SETTINGS)
|
||||
|
||||
// register models
|
||||
const models = await this.listModels()
|
||||
this.registerModels(models)
|
||||
|
||||
// NOTE: port 0 may mean request free port from OS. we may want
|
||||
// to take advantage of this. llama-server --port 0 on macOS works.
|
||||
const port = await this.getSetting<number>(Settings.port, 0)
|
||||
this.updateBaseUrl(port)
|
||||
}
|
||||
|
||||
// onSettingUpdate<T>(key: string, value: T): void {
|
||||
// if (key === Settings.apiKey) {
|
||||
// this.apiKey = value as string
|
||||
// } else if (key === Settings.baseUrl) {
|
||||
// if (typeof value !== 'string') return
|
||||
// this.updateBaseUrl(value)
|
||||
// }
|
||||
// }
|
||||
|
||||
updateBaseUrl(value: number): void {
|
||||
if (value == 0) {
|
||||
// set to default value
|
||||
SETTINGS.forEach((setting) => {
|
||||
if (setting.key === Settings.port) {
|
||||
value = setting.controllerProps.value as number
|
||||
}
|
||||
})
|
||||
const customPath = await this.getSetting<string>(
|
||||
LlamaCppSettings.ModelsPath,
|
||||
''
|
||||
)
|
||||
if (customPath && (await fs.exists(customPath))) {
|
||||
this.modelsBasePath = customPath
|
||||
} else {
|
||||
this.modelsBasePath = await path.join(
|
||||
await getJanDataFolderPath(),
|
||||
'models',
|
||||
ENGINE_ID
|
||||
)
|
||||
}
|
||||
this.baseURL = `http://127.0.0.1:${value}`
|
||||
this.inferenceUrl = `${this.baseURL}/chat/completions`
|
||||
await fs.createDirAll(this.modelsBasePath)
|
||||
|
||||
console.log(
|
||||
`${this.providerId} provider loaded. Models path: ${this.modelsBasePath}`
|
||||
)
|
||||
|
||||
// Optionally, list and register models with the core system if AIEngine expects it
|
||||
// const models = await this.listModels({ providerId: this.providerId });
|
||||
// this.registerModels(this.mapModelInfoToCoreModel(models)); // mapModelInfoToCoreModel would be a helper
|
||||
}
|
||||
|
||||
async listModels(): Promise<Model[]> {
|
||||
let modelIds = []
|
||||
async getModelsPath(): Promise<string> {
|
||||
// Ensure modelsBasePath is initialized
|
||||
if (!this.modelsBasePath) {
|
||||
const customPath = await this.getSetting<string>(
|
||||
LlamaCppSettings.ModelsPath,
|
||||
''
|
||||
)
|
||||
if (customPath && (await fs.exists(customPath))) {
|
||||
this.modelsBasePath = customPath
|
||||
} else {
|
||||
this.modelsBasePath = await path.join(
|
||||
await getJanDataFolderPath(),
|
||||
'models',
|
||||
ENGINE_ID
|
||||
)
|
||||
}
|
||||
await fs.createDirAll(this.modelsBasePath)
|
||||
}
|
||||
return this.modelsBasePath
|
||||
}
|
||||
|
||||
const modelsFolder = `${await getJanDataFolderPath()}/models`
|
||||
async listModels(_opts: ListOptions): Promise<ListResult> {
|
||||
const modelsDir = await this.getModelsPath()
|
||||
const result: ModelInfo[] = []
|
||||
|
||||
// cortexso models
|
||||
const cortexsoFolder = `${modelsFolder}/cortex.so`
|
||||
const modelDirs = await fs.readdirSync(cortexsoFolder)
|
||||
for (const modelDir of modelDirs) {
|
||||
const modelName = modelDir.split('/').pop()
|
||||
try {
|
||||
if (!(await fs.exists(modelsDir))) {
|
||||
await fs.createDirAll(modelsDir)
|
||||
return []
|
||||
}
|
||||
|
||||
// TODO: try removing this check
|
||||
// skip files start with . e.g. .DS_store
|
||||
if (!modelName || modelName.startsWith('.')) continue
|
||||
const entries = await fs.readDir(modelsDir)
|
||||
for (const entry of entries) {
|
||||
if (entry.name?.endsWith('.gguf') && entry.isFile) {
|
||||
const modelPath = await path.join(modelsDir, entry.name)
|
||||
const stats = await fs.stat(modelPath) // Tauri's fs.stat or Node's fs.statSync
|
||||
const parsedName = parseGGUFFileName(entry.name)
|
||||
|
||||
const variantDirs = await fs.readdirSync(modelDir)
|
||||
for (const variantDir of variantDirs) {
|
||||
// NOTE: we can't detect unfinished download here
|
||||
const ggufPath = `${variantDir}/model.gguf`
|
||||
|
||||
if (await fs.existsSync(ggufPath)) {
|
||||
const variantName = variantDir.split('/').pop()
|
||||
modelIds.push(`${modelName}/${variantName}`)
|
||||
result.push({
|
||||
id: `${parsedName.baseModelId}${parsedName.quant ? `/${parsedName.quant}` : ''}`, // e.g., "mistral-7b/Q4_0"
|
||||
name: entry.name.replace('.gguf', ''), // Or a more human-friendly name
|
||||
quant_type: parsedName.quant,
|
||||
providerId: this.providerId,
|
||||
sizeBytes: stats.size,
|
||||
path: modelPath,
|
||||
tags: [this.providerId, parsedName.quant || 'unknown_quant'].filter(
|
||||
Boolean
|
||||
) as string[],
|
||||
})
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`[${this.providerId}] Error listing models:`, error)
|
||||
// Depending on desired behavior, either throw or return empty/partial list
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// pullModel
|
||||
async pullModel(opts: PullOptions): Promise<PullResult> {
|
||||
// TODO: Implement pullModel
|
||||
return 0;
|
||||
}
|
||||
|
||||
// abortPull
|
||||
async abortPull(opts: AbortPullOptions): Promise<AbortPullResult> {
|
||||
// TODO: implement abortPull
|
||||
}
|
||||
|
||||
async loadModel(opts: LoadOptions): Promise<SessionInfo> {
|
||||
if (opts.providerId !== this.providerId) {
|
||||
throw new Error('Invalid providerId for LlamaCppProvider.loadModel')
|
||||
}
|
||||
|
||||
// TODO: list models under huggingface.co
|
||||
const sessionId = uuidv4()
|
||||
const loadParams = {
|
||||
model_path: opts.modelPath,
|
||||
session_id: sessionId, // Pass sessionId to Rust for tracking
|
||||
// Default llama.cpp server options, can be overridden by opts.options
|
||||
port: opts.options?.port ?? 0, // 0 for dynamic port assignment by OS
|
||||
n_gpu_layers:
|
||||
opts.options?.n_gpu_layers ??
|
||||
(await this.getSetting(LlamaCppSettings.DefaultNGpuLayers, -1)),
|
||||
n_ctx:
|
||||
opts.options?.n_ctx ??
|
||||
(await this.getSetting(LlamaCppSettings.DefaultNContext, 2048)),
|
||||
// Spread any other options from opts.options
|
||||
...(opts.options || {}),
|
||||
}
|
||||
|
||||
const models = modelIds.map((modelId) => {
|
||||
try {
|
||||
console.log(
|
||||
`[${this.providerId}] Requesting to load model: ${opts.modelPath} with options:`,
|
||||
loadParams
|
||||
)
|
||||
// This matches the Rust handler: core::utils::extensions::inference_llamacpp_extension::server::load
|
||||
const rustResponse: {
|
||||
session_id: string
|
||||
port: number
|
||||
model_path: string
|
||||
settings: Record<string, unknown>
|
||||
} = await invoke('plugin:llamacpp|load', { params: loadParams }) // Adjust namespace if needed
|
||||
|
||||
if (!rustResponse || !rustResponse.port) {
|
||||
throw new Error(
|
||||
'Rust load function did not return expected port or session info.'
|
||||
)
|
||||
}
|
||||
|
||||
const sessionInfo: SessionInfo = {
|
||||
sessionId: rustResponse.session_id, // Use sessionId from Rust if it regenerates/confirms it
|
||||
port: rustResponse.port,
|
||||
modelPath: rustResponse.model_path,
|
||||
providerId: this.providerId,
|
||||
settings: rustResponse.settings, // Settings actually used by the server
|
||||
}
|
||||
|
||||
this.activeSessions.set(sessionInfo.sessionId, sessionInfo)
|
||||
console.log(
|
||||
`[${this.providerId}] Model loaded: ${sessionInfo.modelPath} on port ${sessionInfo.port}, session: ${sessionInfo.sessionId}`
|
||||
)
|
||||
return sessionInfo
|
||||
} catch (error) {
|
||||
console.error(
|
||||
`[${this.providerId}] Error loading model ${opts.modelPath}:`,
|
||||
error
|
||||
)
|
||||
throw error // Re-throw to be handled by the caller
|
||||
}
|
||||
}
|
||||
|
||||
async unloadModel(opts: UnloadOptions): Promise<UnloadResult> {
|
||||
if (opts.providerId !== this.providerId) {
|
||||
return { success: false, error: 'Invalid providerId' }
|
||||
}
|
||||
const session = this.activeSessions.get(opts.sessionId)
|
||||
if (!session) {
|
||||
return {
|
||||
sources: [],
|
||||
object: 'model',
|
||||
version: '1.0',
|
||||
format: 'api',
|
||||
id: modelId,
|
||||
name: modelId,
|
||||
created: 0,
|
||||
description: '',
|
||||
settings: {},
|
||||
parameters: {},
|
||||
metadata: {
|
||||
author: '',
|
||||
tags: [],
|
||||
size: 0,
|
||||
},
|
||||
engine: this.provider,
|
||||
capabilities: [ModelCapability.completion],
|
||||
success: false,
|
||||
error: `No active session found for id: ${opts.sessionId}`,
|
||||
}
|
||||
})
|
||||
return models
|
||||
}
|
||||
|
||||
try {
|
||||
console.log(
|
||||
`[${this.providerId}] Requesting to unload model for session: ${opts.sessionId}`
|
||||
)
|
||||
// Matches: core::utils::extensions::inference_llamacpp_extension::server::unload
|
||||
const rustResponse: { success: boolean; error?: string } = await invoke(
|
||||
'plugin:llamacpp|unload',
|
||||
{ sessionId: opts.sessionId }
|
||||
)
|
||||
|
||||
if (rustResponse.success) {
|
||||
this.activeSessions.delete(opts.sessionId)
|
||||
console.log(
|
||||
`[${this.providerId}] Session ${opts.sessionId} unloaded successfully.`
|
||||
)
|
||||
return { success: true }
|
||||
} else {
|
||||
console.error(
|
||||
`[${this.providerId}] Failed to unload session ${opts.sessionId}: ${rustResponse.error}`
|
||||
)
|
||||
return {
|
||||
success: false,
|
||||
error: rustResponse.error || 'Unknown error during unload',
|
||||
}
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error(
|
||||
`[${this.providerId}] Error invoking unload for session ${opts.sessionId}:`,
|
||||
error
|
||||
)
|
||||
return { success: false, error: error.message || String(error) }
|
||||
}
|
||||
}
|
||||
|
||||
async chat(
|
||||
opts: ChatOptions
|
||||
): Promise<ChatCompletion | AsyncIterable<ChatCompletionChunk>> {}
|
||||
|
||||
async deleteModel(opts: DeleteOptions): Promise<DeleteResult> {}
|
||||
|
||||
async importModel(opts: ImportOptions): Promise<ImportResult> {}
|
||||
|
||||
override async loadModel(model: Model): Promise<any> {
|
||||
if (model.engine?.toString() !== this.provider) return Promise.resolve()
|
||||
console.log(
|
||||
`[${this.providerId} AIEngine] Received OnModelInit for:`,
|
||||
model.id
|
||||
)
|
||||
return super.loadModel(model)
|
||||
}
|
||||
|
||||
override async unloadModel(model?: Model): Promise<any> {
|
||||
if (model?.engine && model.engine.toString() !== this.provider)
|
||||
return Promise.resolve()
|
||||
console.log(
|
||||
`[${this.providerId} AIEngine] Received OnModelStop for:`,
|
||||
model?.id || 'all models'
|
||||
)
|
||||
return super.unloadModel(model)
|
||||
}
|
||||
}
|
||||
|
||||
199
extensions/llamacpp-extension/src/types.ts
Normal file
199
extensions/llamacpp-extension/src/types.ts
Normal file
@ -0,0 +1,199 @@
|
||||
// src/providers/local/types.ts
|
||||
|
||||
// --- Re-using OpenAI types (minimal definitions for this example) ---
|
||||
// In a real project, you'd import these from 'openai' or a shared types package.
|
||||
export interface ChatCompletionRequestMessage {
|
||||
role: 'system' | 'user' | 'assistant' | 'tool';
|
||||
content: string | null;
|
||||
name?: string;
|
||||
tool_calls?: any[]; // Simplified
|
||||
tool_call_id?: string;
|
||||
}
|
||||
|
||||
export interface ChatCompletionRequest {
|
||||
model: string; // Model ID, though for local it might be implicit via sessionId
|
||||
messages: ChatCompletionRequestMessage[];
|
||||
temperature?: number | null;
|
||||
top_p?: number | null;
|
||||
n?: number | null;
|
||||
stream?: boolean | null;
|
||||
stop?: string | string[] | null;
|
||||
max_tokens?: number;
|
||||
presence_penalty?: number | null;
|
||||
frequency_penalty?: number | null;
|
||||
logit_bias?: Record<string, number> | null;
|
||||
user?: string;
|
||||
// ... TODO: other OpenAI params
|
||||
}
|
||||
|
||||
export interface ChatCompletionChunkChoiceDelta {
|
||||
content?: string | null;
|
||||
role?: 'system' | 'user' | 'assistant' | 'tool';
|
||||
tool_calls?: any[]; // Simplified
|
||||
}
|
||||
|
||||
export interface ChatCompletionChunkChoice {
|
||||
index: number;
|
||||
delta: ChatCompletionChunkChoiceDelta;
|
||||
finish_reason?: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call' | null;
|
||||
}
|
||||
|
||||
export interface ChatCompletionChunk {
|
||||
id: string;
|
||||
object: 'chat.completion.chunk';
|
||||
created: number;
|
||||
model: string;
|
||||
choices: ChatCompletionChunkChoice[];
|
||||
system_fingerprint?: string;
|
||||
}
|
||||
|
||||
|
||||
export interface ChatCompletionChoice {
|
||||
index: number;
|
||||
message: ChatCompletionRequestMessage; // Response message
|
||||
finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call';
|
||||
logprobs?: any; // Simplified
|
||||
}
|
||||
|
||||
export interface ChatCompletion {
|
||||
id: string;
|
||||
object: 'chat.completion';
|
||||
created: number;
|
||||
model: string; // Model ID used
|
||||
choices: ChatCompletionChoice[];
|
||||
usage?: {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
total_tokens: number;
|
||||
};
|
||||
system_fingerprint?: string;
|
||||
}
|
||||
// --- End OpenAI types ---
|
||||
|
||||
|
||||
// Shared model metadata
|
||||
export interface ModelInfo {
|
||||
id: string; // e.g. "qwen3-4B" or "org/model/quant"
|
||||
name: string; // human‑readable, e.g., "Qwen3 4B Q4_0"
|
||||
quant_type?: string; // q4_0 (optional as it might be part of ID or name)
|
||||
providerId: string; // e.g. "llama.cpp"
|
||||
sizeBytes: number;
|
||||
tags?: string[];
|
||||
path?: string; // Absolute path to the model file, if applicable
|
||||
// Additional provider-specific metadata can be added here
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
// 1. /list
|
||||
export interface ListOptions {
|
||||
providerId: string; // To specify which provider if a central manager calls this
|
||||
}
|
||||
export type ListResult = ModelInfo[];
|
||||
|
||||
// 2. /pull
|
||||
export interface PullOptions {
|
||||
providerId: string;
|
||||
modelId: string; // Identifier for the model to pull (e.g., from a known registry)
|
||||
downloadUrl: string; // URL to download the model from
|
||||
/** optional callback to receive download progress */
|
||||
onProgress?: (progress: { percent: number; downloadedBytes: number; totalBytes?: number; }) => void;
|
||||
}
|
||||
export interface PullResult {
|
||||
success: boolean;
|
||||
path?: string; // local file path to the pulled model
|
||||
error?: string;
|
||||
modelInfo?: ModelInfo; // Info of the pulled model
|
||||
}
|
||||
|
||||
// 3. /load
|
||||
export interface LoadOptions {
|
||||
providerId: string;
|
||||
modelPath: string;
|
||||
/** any provider‑specific tuning options for llama.cpp server */
|
||||
options?: {
|
||||
port?: number; // 0 means dynamic port
|
||||
n_gpu_layers?: number;
|
||||
n_ctx?: number; // context size
|
||||
// ... other llama-cpp-python or llama.cpp server flags
|
||||
[key: string]: any;
|
||||
};
|
||||
}
|
||||
|
||||
export interface SessionInfo {
|
||||
sessionId: string; // opaque handle for unload/chat
|
||||
port: number; // llama-server output port (corrected from portid)
|
||||
modelPath: string; // path of the loaded model
|
||||
providerId: string;
|
||||
settings: Record<string, unknown>; // The actual settings used to load
|
||||
}
|
||||
|
||||
// 4. /unload
|
||||
export interface UnloadOptions {
|
||||
providerId: string;
|
||||
sessionId: string;
|
||||
}
|
||||
export interface UnloadResult {
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
// 5. /chat
|
||||
export interface ChatOptions {
|
||||
providerId: string;
|
||||
sessionId: string;
|
||||
/** Full OpenAI ChatCompletionRequest payload */
|
||||
payload: ChatCompletionRequest;
|
||||
}
|
||||
// Output for /chat will be Promise<ChatCompletion> for non-streaming
|
||||
// or Promise<AsyncIterable<ChatCompletionChunk>> for streaming
|
||||
|
||||
// 6. /delete
|
||||
export interface DeleteOptions {
|
||||
providerId: string;
|
||||
modelId: string; // The ID of the model to delete (implies finding its path)
|
||||
modelPath?: string; // Optionally, direct path can be provided
|
||||
}
|
||||
export interface DeleteResult {
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
// 7. /import
|
||||
export interface ImportOptions {
|
||||
providerId: string;
|
||||
sourcePath: string; // Path to the local model file to import
|
||||
desiredModelId?: string; // Optional: if user wants to name it specifically
|
||||
}
|
||||
export interface ImportResult {
|
||||
success: boolean;
|
||||
modelInfo?: ModelInfo;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
// 8. /abortPull
|
||||
export interface AbortPullOptions {
|
||||
providerId: string;
|
||||
modelId: string; // The modelId whose download is to be aborted
|
||||
}
|
||||
export interface AbortPullResult {
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
|
||||
// The interface for any local provider
|
||||
export interface LocalProvider {
|
||||
readonly providerId: string;
|
||||
|
||||
listModels(opts: ListOptions): Promise<ListResult>;
|
||||
pullModel(opts: PullOptions): Promise<PullResult>;
|
||||
loadModel(opts: LoadOptions): Promise<SessionInfo>;
|
||||
unloadModel(opts: UnloadOptions): Promise<UnloadResult>;
|
||||
chat(opts: ChatOptions): Promise<ChatCompletion | AsyncIterable<ChatCompletionChunk>>;
|
||||
deleteModel(opts: DeleteOptions): Promise<DeleteResult>;
|
||||
importModel(opts: ImportOptions): Promise<ImportResult>;
|
||||
abortPull(opts: AbortPullOptions): Promise<AbortPullResult>;
|
||||
|
||||
// Optional: for direct access to underlying client if needed for specific streaming cases
|
||||
getChatClient?(sessionId: string): any; // e.g., an OpenAI client instance configured for the session
|
||||
}
|
||||
3700
extensions/yarn.lock
3700
extensions/yarn.lock
File diff suppressed because it is too large
Load Diff
@ -3,12 +3,9 @@ use std::{
|
||||
fs::{self, File},
|
||||
io::Read,
|
||||
path::PathBuf,
|
||||
sync::Arc,
|
||||
};
|
||||
use tar::Archive;
|
||||
use tauri::{App, Emitter, Listener, Manager};
|
||||
use tauri_plugin_shell::process::{CommandChild, CommandEvent};
|
||||
use tauri_plugin_shell::ShellExt;
|
||||
use tauri::{App, Emitter, Manager};
|
||||
use tauri_plugin_store::StoreExt;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::time::{sleep, Duration}; // Using tokio::sync::Mutex
|
||||
@ -200,22 +197,18 @@ pub fn setup_mcp(app: &App) {
|
||||
let state = app.state::<AppState>();
|
||||
let servers = state.mcp_servers.clone();
|
||||
let app_handle: tauri::AppHandle = app.handle().clone();
|
||||
|
||||
// Setup kill-mcp-servers event listener (similar to cortex kill-sidecar)
|
||||
let app_handle_for_kill = app_handle.clone();
|
||||
app_handle.listen("kill-mcp-servers", move |_event| {
|
||||
let app_handle = app_handle_for_kill.clone();
|
||||
tauri::async_runtime::spawn(async move {
|
||||
log::info!("Received kill-mcp-servers event - cleaning up MCP servers");
|
||||
|
||||
let app_state = app_handle.state::<AppState>();
|
||||
|
||||
// Stop all running MCP servers
|
||||
if let Err(e) = super::mcp::stop_mcp_servers(app_state.mcp_servers.clone()).await {
|
||||
log::error!("Failed to stop MCP servers: {}", e);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clear active servers and restart counts
|
||||
{
|
||||
let mut active_servers = app_state.mcp_active_servers.lock().await;
|
||||
@ -225,11 +218,9 @@ pub fn setup_mcp(app: &App) {
|
||||
let mut restart_counts = app_state.mcp_restart_counts.lock().await;
|
||||
restart_counts.clear();
|
||||
}
|
||||
|
||||
log::info!("MCP servers cleaned up successfully");
|
||||
});
|
||||
});
|
||||
|
||||
tauri::async_runtime::spawn(async move {
|
||||
if let Err(e) = run_mcp_commands(&app_handle, servers).await {
|
||||
log::error!("Failed to run mcp commands: {}", e);
|
||||
@ -471,65 +462,22 @@ pub fn setup_sidecar(app: &App) -> Result<(), String> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
//pub fn setup_engine_binaries(app: &App) -> Result<(), String> {
|
||||
// // Copy engine binaries to app_data
|
||||
// let app_data_dir = app.handle().path().app_data_dir().unwrap();
|
||||
// let binaries_dir = app.handle().path().resource_dir().unwrap().join("binaries");
|
||||
// let themes_dir = app
|
||||
// .handle()
|
||||
// .path()
|
||||
// .resource_dir()
|
||||
// .unwrap()
|
||||
// .join("resources");
|
||||
//
|
||||
// Clean up function to kill the sidecar process
|
||||
//
|
||||
pub fn clean_up() {
|
||||
#[cfg(windows)]
|
||||
{
|
||||
use std::os::windows::process::CommandExt;
|
||||
let _ = std::process::Command::new("taskkill")
|
||||
.args(["-f", "-im", "llama-server.exe"])
|
||||
.creation_flags(0x08000000)
|
||||
.spawn();
|
||||
let _ = std::process::Command::new("taskkill")
|
||||
.args(["-f", "-im", "cortex-server.exe"])
|
||||
.creation_flags(0x08000000)
|
||||
.spawn();
|
||||
}
|
||||
#[cfg(unix)]
|
||||
{
|
||||
let _ = std::process::Command::new("pkill")
|
||||
.args(["-f", "llama-server"])
|
||||
.spawn();
|
||||
let _ = std::process::Command::new("pkill")
|
||||
.args(["-f", "cortex-server"])
|
||||
.spawn();
|
||||
}
|
||||
log::info!("Clean up function executed, sidecar processes killed.");
|
||||
}
|
||||
|
||||
fn copy_dir_all(src: PathBuf, dst: PathBuf) -> Result<(), String> {
|
||||
fs::create_dir_all(&dst).map_err(|e| e.to_string())?;
|
||||
log::info!("Copying from {:?} to {:?}", src, dst);
|
||||
for entry in fs::read_dir(src).map_err(|e| e.to_string())? {
|
||||
let entry = entry.map_err(|e| e.to_string())?;
|
||||
let ty = entry.file_type().map_err(|e| e.to_string())?;
|
||||
if ty.is_dir() {
|
||||
copy_dir_all(entry.path(), dst.join(entry.file_name())).map_err(|e| e.to_string())?;
|
||||
} else {
|
||||
fs::copy(entry.path(), dst.join(entry.file_name())).map_err(|e| e.to_string())?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn setup_engine_binaries(app: &App) -> Result<(), String> {
|
||||
// Copy engine binaries to app_data
|
||||
let app_data_dir = get_jan_data_folder_path(app.handle().clone());
|
||||
let binaries_dir = app.handle().path().resource_dir().unwrap().join("binaries");
|
||||
let resources_dir = app
|
||||
.handle()
|
||||
.path()
|
||||
.resource_dir()
|
||||
.unwrap()
|
||||
.join("resources");
|
||||
|
||||
if let Err(e) = copy_dir_all(binaries_dir, app_data_dir.clone()) {
|
||||
log::error!("Failed to copy binaries: {}", e);
|
||||
}
|
||||
if let Err(e) = copy_dir_all(resources_dir, app_data_dir.clone()) {
|
||||
log::error!("Failed to copy resources: {}", e);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// if let Err(e) = copy_dir_all(binaries_dir, app_data_dir.clone()) {
|
||||
// log::error!("Failed to copy binaries: {}", e);
|
||||
// }
|
||||
// if let Err(e) = copy_dir_all(themes_dir, app_data_dir.clone()) {
|
||||
// log::error!("Failed to copy themes: {}", e);
|
||||
// }
|
||||
// Ok(())
|
||||
//}
|
||||
|
||||
@ -10,8 +10,8 @@ use crate::core::state::AppState;
|
||||
pub enum ServerError {
|
||||
#[error("Server is already running")]
|
||||
AlreadyRunning,
|
||||
#[error("Server is not running")]
|
||||
NotRunning,
|
||||
// #[error("Server is not running")]
|
||||
// NotRunning,
|
||||
#[error("Failed to locate server binary: {0}")]
|
||||
BinaryNotFound(String),
|
||||
#[error("Failed to determine resource path: {0}")]
|
||||
|
||||
@ -1,16 +1,14 @@
|
||||
mod core;
|
||||
use core::{
|
||||
cmd::get_jan_data_folder_path,
|
||||
setup::{self, setup_engine_binaries, setup_mcp, setup_sidecar},
|
||||
setup::{self, setup_mcp},
|
||||
state::{generate_app_token, AppState},
|
||||
utils::download::DownloadManagerState,
|
||||
};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use tauri::Emitter;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use crate::core::setup::clean_up;
|
||||
|
||||
#[cfg_attr(mobile, tauri::mobile_entry_point)]
|
||||
pub fn run() {
|
||||
@ -122,17 +120,17 @@ pub fn run() {
|
||||
log::error!("Failed to install extensions: {}", e);
|
||||
}
|
||||
setup_mcp(app);
|
||||
setup_sidecar(app).expect("Failed to setup sidecar");
|
||||
setup_engine_binaries(app).expect("Failed to setup engine binaries");
|
||||
Ok(())
|
||||
})
|
||||
.on_window_event(|window, event| match event {
|
||||
tauri::WindowEvent::CloseRequested { .. } => {
|
||||
if window.label() == "main" {
|
||||
window.emit("kill-sidecar", ()).unwrap();
|
||||
window.emit("kill-mcp-servers", ()).unwrap();
|
||||
clean_up();
|
||||
}
|
||||
let client = Client::new();
|
||||
let url = "http://127.0.0.1:39291/processManager/destroy";
|
||||
let _ = client.delete(url).send();
|
||||
}
|
||||
_ => {}
|
||||
})
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user