Resolved conflicts by keeping HEAD changes

This commit is contained in:
Akarshan Biswas 2025-05-17 12:55:38 +05:30 committed by Louis
parent 19274f7e69
commit a8abc9f9aa
No known key found for this signature in database
GPG Key ID: 44FA9F4D33C37DE2
22 changed files with 541 additions and 5458 deletions

View File

@ -1,2 +0,0 @@
bin
!version.txt

View File

@ -1,75 +0,0 @@
# Create a Jan Extension using Typescript
Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
## Create Your Own Extension
To create your own extension, you can use this repository as a template! Just follow the below instructions:
1. Click the Use this template button at the top of the repository
2. Select Create a new repository
3. Select an owner and name for your new repository
4. Click Create repository
5. Clone your new repository
## Initial Setup
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
> [!NOTE]
>
> You'll need to have a reasonably modern version of
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
> [`nodenv`](https://github.com/nodenv/nodenv) or
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
> root of your repository to install the version specified in
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
1. :hammer_and_wrench: Install the dependencies
```bash
npm install
```
1. :building_construction: Package the TypeScript for distribution
```bash
npm run bundle
```
1. :white_check_mark: Check your artifact
There will be a tgz file in your extension directory now
## Update the Extension Metadata
The [`package.json`](package.json) file defines metadata about your extension, such as
extension name, main entry, description and version.
When you copy this repository, update `package.json` with the name, description for your extension.
## Update the Extension Code
The [`src/`](./src/) directory is the heart of your extension! This contains the
source code that will be run when your extension functions are invoked. You can replace the
contents of this directory with your own code.
There are a few things to keep in mind when writing your extension code:
- Most Jan Extension functions are processed asynchronously.
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
```typescript
import { events, MessageEvent, MessageRequest } from '@janhq/core'
function onStart(): Promise<any> {
return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
this.inference(data)
)
}
```
For more information about the Jan Extension Core module, see the
[documentation](https://github.com/menloresearch/jan/blob/main/core/README.md).
So, what are you waiting for? Go ahead and start customizing your extension!

View File

@ -1 +0,0 @@
1.0.13-rc9

View File

@ -1,40 +0,0 @@
@echo off
set BIN_PATH=./bin
set SHARED_PATH=./../../electron/shared
set /p CORTEX_VERSION=<./bin/version.txt
set ENGINE_VERSION=b5509
@REM Download llama.cpp binaries
set DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
set DOWNLOAD_GGML_URL=https://github.com/ggml-org/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
set CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%
set SUBFOLDERS=win-noavx-cuda-cu12.0-x64 win-noavx-cuda-cu11.7-x64 win-avx2-cuda-cu12.0-x64 win-avx2-cuda-cu11.7-x64 win-noavx-x64 win-avx-x64 win-avx2-x64 win-avx512-x64 win-vulkan-x64
call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/menloresearch/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-cu12.0-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx2-cuda-cu12.0-x64/%ENGINE_VERSION%
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-cu11.7-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx2-cuda-cu11.7-x64/%ENGINE_VERSION%
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-cu12.0-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-noavx-cuda-cu12.0-x64/%ENGINE_VERSION%
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-cu11.7-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-noavx-cuda-cu11.7-x64/%ENGINE_VERSION%
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-noavx-x64/%ENGINE_VERSION%
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx-x64/%ENGINE_VERSION%
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx2-x64/%ENGINE_VERSION%
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx512-x64/%ENGINE_VERSION%
call .\node_modules\.bin\download %DOWNLOAD_GGML_URL%-vulkan-x64.zip -e --strip 1 -o %SHARED_PATH%/engines/llama.cpp/win-vulkan-x64/%ENGINE_VERSION%
call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cudart-llama-bin-win-cu12.0-x64.tar.gz -e --strip 1 -o %BIN_PATH%
call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cudart-llama-bin-win-cu11.7-x64.tar.gz -e --strip 1 -o %BIN_PATH%
move %BIN_PATH%\cortex-server-beta.exe %BIN_PATH%\cortex-server.exe
del %BIN_PATH%\cortex-beta.exe
del %BIN_PATH%\cortex.exe
@REM Loop through each folder and move DLLs
for %%F in (%SUBFOLDERS%) do (
echo Processing folder: %SHARED_PATH%\engines\llama.cpp\%%F\%ENGINE_VERSION%
@REM Move cu*.dll files
for %%D in (%SHARED_PATH%\engines\llama.cpp\%%F\%ENGINE_VERSION%\cu*.dll) do (
move "%%D" "%BIN_PATH%"
)
)
echo DLL files moved successfully.

View File

@ -1,50 +0,0 @@
#!/bin/bash
# Read CORTEX_VERSION
CORTEX_VERSION=$(cat ./bin/version.txt)
ENGINE_VERSION=b5509
CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}
BIN_PATH=./bin
SHARED_PATH="../../electron/shared"
# Detect platform
OS_TYPE=$(uname)
if [ "$OS_TYPE" == "Linux" ]; then
# Linux downloads
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
mv ./bin/cortex-server-beta ./bin/cortex-server
rm -rf ./bin/cortex
rm -rf ./bin/cortex-beta
chmod +x "./bin/cortex-server"
# Download engines for Linux
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-noavx-x64/${ENGINE_VERSION}" 1
download "${ENGINE_DOWNLOAD_URL}-linux-avx-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx-x64/${ENGINE_VERSION}" 1
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx2-x64/${ENGINE_VERSION}" 1
download "${ENGINE_DOWNLOAD_URL}-linux-avx512-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx512-x64/${ENGINE_VERSION}" 1
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-cuda-cu12.0-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx2-cuda-cu12.0-x64/${ENGINE_VERSION}" 1
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-cuda-cu11.7-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx2-cuda-cu11.7-x64/${ENGINE_VERSION}" 1
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-cuda-cu12.0-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-noavx-cuda-cu12.0-x64/${ENGINE_VERSION}" 1
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-cuda-cu11.7-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-noavx-cuda-cu11.7-x64/${ENGINE_VERSION}" 1
download "${ENGINE_DOWNLOAD_URL}-linux-vulkan-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-vulkan-x64/${ENGINE_VERSION}" 1
download "${CUDA_DOWNLOAD_URL}/cudart-llama-bin-linux-cu12.0-x64.tar.gz" -e --strip 1 -o "${BIN_PATH}" 1
download "${CUDA_DOWNLOAD_URL}/cudart-llama-bin-linux-cu11.7-x64.tar.gz" -e --strip 1 -o "${BIN_PATH}" 1
elif [ "$OS_TYPE" == "Darwin" ]; then
# macOS downloads
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1
mv ./bin/cortex-server-beta ./bin/cortex-server
rm -rf ./bin/cortex
rm -rf ./bin/cortex-beta
chmod +x "./bin/cortex-server"
# Download engines for macOS
download "${ENGINE_DOWNLOAD_URL}-macos-arm64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/macos-arm64/${ENGINE_VERSION}"
download "${ENGINE_DOWNLOAD_URL}-macos-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/macos-x64/${ENGINE_VERSION}"
else
echo "Unsupported operating system: $OS_TYPE"
exit 1
fi

View File

@ -1,67 +0,0 @@
{
"name": "@janhq/inference-cortex-extension",
"productName": "Cortex Inference Engine",
"version": "1.0.25",
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
"author": "Jan <service@jan.ai>",
"license": "AGPL-3.0",
"scripts": {
"test": "vitest run",
"build": "rolldown -c rolldown.config.mjs",
"downloadcortex:linux:darwin": "./download.sh",
"downloadcortex:win32": "download.bat",
"downloadcortex": "run-script-os",
"build:publish:darwin": "rimraf *.tgz --glob || true && yarn build && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
"build:publish:win32:linux": "rimraf *.tgz --glob || true && yarn build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
"build:publish": "run-script-os"
},
"exports": {
".": "./dist/index.js",
"./main": "./dist/node/index.cjs.js"
},
"devDependencies": {
"@jest/globals": "^29.7.0",
"@types/decompress": "^4.2.7",
"@types/jest": "^29.5.12",
"@types/node": "^20.11.4",
"@types/os-utils": "^0.0.4",
"@types/tcp-port-used": "^1.0.4",
"cpx": "^1.5.0",
"download-cli": "^1.1.1",
"jest": "^29.7.0",
"rimraf": "^3.0.2",
"rolldown": "1.0.0-beta.1",
"run-script-os": "^1.1.6",
"ts-jest": "^29.1.2",
"typescript": "^5.3.3",
"vitest": "^3.0.8"
},
"dependencies": {
"@janhq/core": "../../core/package.tgz",
"fetch-retry": "^5.0.6",
"ky": "^1.7.2",
"p-queue": "^8.0.1",
"rxjs": "^7.8.1",
"ulidx": "^2.3.0"
},
"engines": {
"node": ">=18.0.0"
},
"files": [
"dist/*",
"package.json",
"README.md"
],
"bundleDependencies": [
"tcp-port-used",
"fetch-retry",
"@janhq/core",
"decompress"
],
"installConfig": {
"hoistingLimits": "workspaces"
},
"packageManager": "yarn@4.5.3"
}

View File

@ -1,126 +0,0 @@
[
{
"key": "auto_unload_models",
"title": "Auto-Unload Old Models",
"description": "Automatically unloads models that are not in use to free up memory. Ensure only one model is loaded at a time.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "context_shift",
"title": "Context Shift",
"description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.",
"controllerType": "checkbox",
"controllerProps": {
"value": false
}
},
{
"key": "cont_batching",
"title": "Continuous Batching",
"description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
"controllerType": "checkbox",
"controllerProps": {
"value": ""
}
},
{
"key": "n_parallel",
"title": "Parallel Operations",
"description": "Number of prompts that can be processed simultaneously by the model.",
"controllerType": "input",
"controllerProps": {
"value": "",
"placeholder": "1",
"type": "number",
"textAlign": "right"
}
},
{
"key": "cpu_threads",
"title": "CPU Threads",
"description": "Number of CPU cores used for model processing when running without GPU.",
"controllerType": "input",
"controllerProps": {
"value": "",
"placeholder": "-1 (auto-detect)",
"type": "number",
"textAlign": "right"
}
},
{
"key": "threads_batch",
"title": "Threads (Batch)",
"description": "Number of threads for batch and prompt processing (default: same as Threads).",
"controllerType": "input",
"controllerProps": {
"value": "",
"placeholder": "-1 (same as Threads)",
"type": "number"
}
},
{
"key": "flash_attn",
"title": "Flash Attention",
"description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "caching_enabled",
"title": "Caching",
"description": "Stores recent prompts and responses to improve speed when similar questions are asked.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "cache_type",
"title": "KV Cache Type",
"description": "Controls memory usage and precision trade-off.",
"controllerType": "dropdown",
"controllerProps": {
"value": "q8_0",
"options": [
{
"value": "q4_0",
"name": "q4_0"
},
{
"value": "q8_0",
"name": "q8_0"
},
{
"value": "f16",
"name": "f16"
}
]
}
},
{
"key": "use_mmap",
"title": "mmap",
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "hugging-face-access-token",
"title": "Hugging Face Access Token",
"description": "Access tokens programmatically authenticate your identity to the Hugging Face Hub, allowing applications to perform specific actions specified by the scope of permissions granted.",
"controllerType": "input",
"controllerProps": {
"value": "",
"placeholder": "hf_**********************************",
"type": "password",
"inputActions": ["unobscure", "copy"]
}
}
]

View File

@ -1,44 +0,0 @@
import { defineConfig } from 'rolldown'
import packageJson from './package.json' with { type: 'json' }
import defaultSettingJson from './resources/default_settings.json' with { type: 'json' }
export default defineConfig([
{
input: 'src/index.ts',
output: {
format: 'esm',
file: 'dist/index.js',
},
platform: 'browser',
define: {
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
SETTINGS: JSON.stringify(defaultSettingJson),
CORTEX_API_URL: JSON.stringify(
`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
),
CORTEX_SOCKET_URL: JSON.stringify(
`ws://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
),
CORTEX_ENGINE_VERSION: JSON.stringify('b5509'),
},
},
{
input: 'src/node/index.ts',
external: ['@janhq/core/node'],
output: {
format: 'cjs',
file: 'dist/node/index.cjs.js',
sourcemap: false,
inlineDynamicImports: true,
},
resolve: {
extensions: ['.js', '.ts', '.json'],
},
define: {
CORTEX_API_URL: JSON.stringify(
`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
),
},
platform: 'node',
},
])

View File

@ -1,5 +0,0 @@
declare const NODE: string
declare const CORTEX_API_URL: string
declare const CORTEX_SOCKET_URL: string
declare const CORTEX_ENGINE_VERSION: string
declare const SETTINGS: any

View File

@ -1,452 +0,0 @@
import { describe, beforeEach, it, expect, vi, afterEach } from 'vitest'
// Must mock before imports
vi.mock('@janhq/core', () => {
return {
executeOnMain: vi.fn().mockResolvedValue({}),
events: {
emit: vi.fn()
},
extractModelLoadParams: vi.fn().mockReturnValue({}),
ModelEvent: {
OnModelsUpdate: 'OnModelsUpdate',
OnModelStopped: 'OnModelStopped'
},
EngineEvent: {
OnEngineUpdate: 'OnEngineUpdate'
},
InferenceEngine: {
cortex: 'cortex',
nitro: 'nitro',
cortex_llamacpp: 'cortex_llamacpp'
},
LocalOAIEngine: class LocalOAIEngine {
onLoad() {}
onUnload() {}
}
}
})
import JanInferenceCortexExtension, { Settings } from './index'
import { InferenceEngine, ModelEvent, EngineEvent, executeOnMain, events } from '@janhq/core'
import ky from 'ky'
// Mock global variables
const CORTEX_API_URL = 'http://localhost:3000'
const CORTEX_SOCKET_URL = 'ws://localhost:3000'
const SETTINGS = [
{ id: 'n_parallel', name: 'Parallel Execution', description: 'Number of parallel executions', type: 'number', value: '4' },
{ id: 'cont_batching', name: 'Continuous Batching', description: 'Enable continuous batching', type: 'boolean', value: true },
{ id: 'caching_enabled', name: 'Caching', description: 'Enable caching', type: 'boolean', value: true },
{ id: 'flash_attn', name: 'Flash Attention', description: 'Enable flash attention', type: 'boolean', value: true },
{ id: 'cache_type', name: 'Cache Type', description: 'Type of cache to use', type: 'string', value: 'f16' },
{ id: 'use_mmap', name: 'Use Memory Map', description: 'Use memory mapping', type: 'boolean', value: true },
{ id: 'cpu_threads', name: 'CPU Threads', description: 'Number of CPU threads', type: 'number', value: '' }
]
const NODE = 'node'
// Mock globals
vi.stubGlobal('CORTEX_API_URL', CORTEX_API_URL)
vi.stubGlobal('CORTEX_SOCKET_URL', CORTEX_SOCKET_URL)
vi.stubGlobal('SETTINGS', SETTINGS)
vi.stubGlobal('NODE', NODE)
vi.stubGlobal('window', {
addEventListener: vi.fn()
})
// Mock WebSocket
class MockWebSocket {
url :string
listeners: {}
onclose: Function
constructor(url) {
this.url = url
this.listeners = {}
}
addEventListener(event, listener) {
this.listeners[event] = listener
}
emit(event, data) {
if (this.listeners[event]) {
this.listeners[event](data)
}
}
close() {
if (this.onclose) {
this.onclose({ code: 1000 })
}
}
}
// Mock global WebSocket
// @ts-ignore
global.WebSocket = vi.fn().mockImplementation((url) => new MockWebSocket(url))
describe('JanInferenceCortexExtension', () => {
let extension
beforeEach(() => {
// Reset mocks
vi.clearAllMocks()
// Create a new instance for each test
extension = new JanInferenceCortexExtension()
// Mock the getSetting method
extension.getSetting = vi.fn().mockImplementation((key, defaultValue) => {
switch(key) {
case Settings.n_parallel:
return '4'
case Settings.cont_batching:
return true
case Settings.caching_enabled:
return true
case Settings.flash_attn:
return true
case Settings.cache_type:
return 'f16'
case Settings.use_mmap:
return true
case Settings.cpu_threads:
return ''
default:
return defaultValue
}
})
// Mock methods
extension.registerSettings = vi.fn()
extension.onLoad = vi.fn()
extension.clean = vi.fn().mockResolvedValue({})
extension.healthz = vi.fn().mockResolvedValue({})
extension.subscribeToEvents = vi.fn()
})
describe('onSettingUpdate', () => {
it('should update n_parallel setting correctly', () => {
extension.onSettingUpdate(Settings.n_parallel, '8')
expect(extension.n_parallel).toBe(8)
})
it('should update cont_batching setting correctly', () => {
extension.onSettingUpdate(Settings.cont_batching, false)
expect(extension.cont_batching).toBe(false)
})
it('should update caching_enabled setting correctly', () => {
extension.onSettingUpdate(Settings.caching_enabled, false)
expect(extension.caching_enabled).toBe(false)
})
it('should update flash_attn setting correctly', () => {
extension.onSettingUpdate(Settings.flash_attn, false)
expect(extension.flash_attn).toBe(false)
})
it('should update cache_type setting correctly', () => {
extension.onSettingUpdate(Settings.cache_type, 'f32')
expect(extension.cache_type).toBe('f32')
})
it('should update use_mmap setting correctly', () => {
extension.onSettingUpdate(Settings.use_mmap, false)
expect(extension.use_mmap).toBe(false)
})
it('should update cpu_threads setting correctly', () => {
extension.onSettingUpdate(Settings.cpu_threads, '4')
expect(extension.cpu_threads).toBe(4)
})
it('should not update cpu_threads when value is not a number', () => {
extension.cpu_threads = undefined
extension.onSettingUpdate(Settings.cpu_threads, 'not-a-number')
expect(extension.cpu_threads).toBeUndefined()
})
})
describe('onUnload', () => {
it('should clean up resources correctly', async () => {
extension.shouldReconnect = true
await extension.onUnload()
expect(extension.shouldReconnect).toBe(false)
expect(extension.clean).toHaveBeenCalled()
expect(executeOnMain).toHaveBeenCalledWith(NODE, 'dispose')
})
})
describe('loadModel', () => {
it('should remove llama_model_path and mmproj from settings', async () => {
// Setup
const model = {
id: 'test-model',
settings: {
llama_model_path: '/path/to/model',
mmproj: '/path/to/mmproj',
some_setting: 'value'
},
engine: InferenceEngine.cortex_llamacpp
}
// Mock ky.post
vi.spyOn(ky, 'post').mockImplementation(() => ({
// @ts-ignore
json: () => Promise.resolve({}),
catch: () => ({
finally: () => ({
// @ts-ignore
then: () => Promise.resolve({})
})
})
}))
// Setup queue for testing
extension.queue = { add: vi.fn(fn => fn()) }
// Execute
await extension.loadModel(model)
// Verify settings were filtered
expect(model.settings).not.toHaveProperty('llama_model_path')
expect(model.settings).not.toHaveProperty('mmproj')
expect(model.settings).toHaveProperty('some_setting')
})
it('should convert nitro to cortex_llamacpp engine', async () => {
// Setup
const model = {
id: 'test-model',
settings: {},
engine: InferenceEngine.nitro
}
// Mock ky.post
const mockKyPost = vi.spyOn(ky, 'post').mockImplementation(() => ({
// @ts-ignore
json: () => Promise.resolve({}),
catch: () => ({
finally: () => ({
// @ts-ignore
then: () => Promise.resolve({})
})
})
}))
// Setup queue for testing
extension.queue = { add: vi.fn(fn => fn()) }
// Execute
await extension.loadModel(model)
// Verify API call
expect(mockKyPost).toHaveBeenCalledWith(
`${CORTEX_API_URL}/v1/models/start`,
expect.objectContaining({
json: expect.objectContaining({
engine: InferenceEngine.cortex_llamacpp
})
})
)
})
})
describe('unloadModel', () => {
it('should call the correct API endpoint and abort loading if in progress', async () => {
// Setup
const model = { id: 'test-model' }
const mockAbort = vi.fn()
extension.abortControllers.set(model.id, { abort: mockAbort })
// Mock ky.post
const mockKyPost = vi.spyOn(ky, 'post').mockImplementation(() => ({
// @ts-ignore
json: () => Promise.resolve({}),
finally: () => ({
// @ts-ignore
then: () => Promise.resolve({})
})
}))
// Execute
await extension.unloadModel(model)
// Verify API call
expect(mockKyPost).toHaveBeenCalledWith(
`${CORTEX_API_URL}/v1/models/stop`,
expect.objectContaining({
json: { model: model.id }
})
)
// Verify abort controller was called
expect(mockAbort).toHaveBeenCalled()
})
})
describe('clean', () => {
it('should make a DELETE request to destroy process manager', async () => {
// Mock the ky.delete function directly
const mockDelete = vi.fn().mockReturnValue({
catch: vi.fn().mockReturnValue(Promise.resolve({}))
})
// Replace the original implementation
vi.spyOn(ky, 'delete').mockImplementation(mockDelete)
// Override the clean method to use the real implementation
// @ts-ignore
extension.clean = JanInferenceCortexExtension.prototype.clean
// Call the method
await extension.clean()
// Verify the correct API call was made
expect(mockDelete).toHaveBeenCalledWith(
`${CORTEX_API_URL}/processmanager/destroy`,
expect.objectContaining({
timeout: 2000,
retry: expect.objectContaining({
limit: 0
})
})
)
})
})
describe('WebSocket events', () => {
it('should handle WebSocket events correctly', () => {
// Create a mock implementation for subscribeToEvents that stores the socket
let messageHandler;
let closeHandler;
// Override the private method
extension.subscribeToEvents = function() {
this.socket = new MockWebSocket('ws://localhost:3000/events');
this.socket.addEventListener('message', (event) => {
const data = JSON.parse(event.data);
// Store for testing
messageHandler = data;
const transferred = data.task.items.reduce(
(acc, cur) => acc + cur.downloadedBytes,
0
);
const total = data.task.items.reduce(
(acc, cur) => acc + cur.bytes,
0
);
const percent = total > 0 ? transferred / total : 0;
events.emit(
data.type === 'DownloadUpdated' ? 'onFileDownloadUpdate' :
data.type === 'DownloadSuccess' ? 'onFileDownloadSuccess' :
data.type,
{
modelId: data.task.id,
percent: percent,
size: {
transferred: transferred,
total: total,
},
downloadType: data.task.type,
}
);
if (data.task.type === 'Engine') {
events.emit(EngineEvent.OnEngineUpdate, {
type: data.type,
percent: percent,
id: data.task.id,
});
}
else if (data.type === 'DownloadSuccess') {
setTimeout(() => {
events.emit(ModelEvent.OnModelsUpdate, {
fetch: true,
});
}, 500);
}
});
this.socket.onclose = (event) => {
closeHandler = event;
// Notify app to update model running state
events.emit(ModelEvent.OnModelStopped, {});
};
};
// Setup queue
extension.queue = {
add: vi.fn(fn => fn())
};
// Execute the method
extension.subscribeToEvents();
// Simulate a message event
extension.socket.listeners.message({
data: JSON.stringify({
type: 'DownloadUpdated',
task: {
id: 'test-model',
type: 'Model',
items: [
{ downloadedBytes: 50, bytes: 100 }
]
}
})
});
// Verify event emission
expect(events.emit).toHaveBeenCalledWith(
'onFileDownloadUpdate',
expect.objectContaining({
modelId: 'test-model',
percent: 0.5
})
);
// Simulate a download success event
vi.useFakeTimers();
extension.socket.listeners.message({
data: JSON.stringify({
type: 'DownloadSuccess',
task: {
id: 'test-model',
type: 'Model',
items: [
{ downloadedBytes: 100, bytes: 100 }
]
}
})
});
// Fast-forward time to trigger the timeout
vi.advanceTimersByTime(500);
// Verify the ModelEvent.OnModelsUpdate event was emitted
expect(events.emit).toHaveBeenCalledWith(
ModelEvent.OnModelsUpdate,
{ fetch: true }
);
vi.useRealTimers();
// Trigger websocket close
extension.socket.onclose({ code: 1000 });
// Verify OnModelStopped event was emitted
expect(events.emit).toHaveBeenCalledWith(
ModelEvent.OnModelStopped,
{}
);
});
})
})

View File

@ -1,435 +0,0 @@
/**
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
* @version 1.0.0
* @module inference-extension/src/index
*/
import {
Model,
EngineEvent,
LocalOAIEngine,
extractModelLoadParams,
events,
ModelEvent,
} from '@janhq/core'
import ky, { KyInstance } from 'ky'
/**
* Event subscription types of Downloader
*/
enum DownloadTypes {
DownloadUpdated = 'onFileDownloadUpdate',
DownloadError = 'onFileDownloadError',
DownloadSuccess = 'onFileDownloadSuccess',
DownloadStopped = 'onFileDownloadStopped',
DownloadStarted = 'onFileDownloadStarted',
}
enum Settings {
n_parallel = 'n_parallel',
cont_batching = 'cont_batching',
caching_enabled = 'caching_enabled',
flash_attn = 'flash_attn',
cache_type = 'cache_type',
use_mmap = 'use_mmap',
cpu_threads = 'cpu_threads',
huggingfaceToken = 'hugging-face-access-token',
auto_unload_models = 'auto_unload_models',
context_shift = 'context_shift',
}
type LoadedModelResponse = { data: { engine: string; id: string }[] }
/**
* A class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
*/
export default class JanInferenceCortexExtension extends LocalOAIEngine {
nodeModule: string = 'node'
provider: string = 'cortex'
shouldReconnect = true
/** Default Engine model load settings */
n_parallel?: number
cont_batching: boolean = false
caching_enabled: boolean = true
flash_attn: boolean = true
use_mmap: boolean = true
cache_type: string = 'q8'
cpu_threads?: number
auto_unload_models: boolean = true
reasoning_budget = -1 // Default reasoning budget in seconds
context_shift = false
/**
* The URL for making inference requests.
*/
inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
/**
* Socket instance of events subscription
*/
socket?: WebSocket = undefined
abortControllers = new Map<string, AbortController>()
api?: KyInstance
/**
* Get the API instance
* @returns
*/
async apiInstance(): Promise<KyInstance> {
if (this.api) return this.api
const apiKey = await window.core?.api.appToken()
this.api = ky.extend({
prefixUrl: CORTEX_API_URL,
headers: apiKey
? {
Authorization: `Bearer ${apiKey}`,
}
: {},
retry: 10,
})
return this.api
}
/**
* Authorization headers for the API requests.
* @returns
*/
headers(): Promise<HeadersInit> {
return window.core?.api.appToken().then((token: string) => ({
Authorization: `Bearer ${token}`,
}))
}
/**
* Called when the extension is loaded.
*/
async onLoad() {
super.onLoad()
// Register Settings
this.registerSettings(SETTINGS)
const numParallel = await this.getSetting<string>(Settings.n_parallel, '')
if (numParallel.length > 0 && parseInt(numParallel) > 0) {
this.n_parallel = parseInt(numParallel)
}
if (this.n_parallel && this.n_parallel > 1)
this.cont_batching = await this.getSetting<boolean>(
Settings.cont_batching,
false
)
this.caching_enabled = await this.getSetting<boolean>(
Settings.caching_enabled,
true
)
this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
this.context_shift = await this.getSetting<boolean>(
Settings.context_shift,
false
)
this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
if (this.caching_enabled)
this.cache_type = await this.getSetting<string>(Settings.cache_type, 'q8')
this.auto_unload_models = await this.getSetting<boolean>(
Settings.auto_unload_models,
true
)
const threads_number = Number(
await this.getSetting<string>(Settings.cpu_threads, '')
)
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
const huggingfaceToken = await this.getSetting<string>(
Settings.huggingfaceToken,
''
)
if (huggingfaceToken) {
this.updateCortexConfig({ huggingface_token: huggingfaceToken })
}
this.subscribeToEvents()
window.addEventListener('beforeunload', () => {
this.clean()
})
// Migrate configs
if (!localStorage.getItem('cortex_migration_completed')) {
const config = await this.getCortexConfig()
console.log('Start cortex.cpp migration', config)
if (config && config.huggingface_token) {
this.updateSettings([
{
key: Settings.huggingfaceToken,
controllerProps: {
value: config.huggingface_token,
},
},
])
this.updateCortexConfig({
huggingface_token: config.huggingface_token,
})
localStorage.setItem('cortex_migration_completed', 'true')
}
}
}
async onUnload() {
console.log('Clean up cortex.cpp services')
this.shouldReconnect = false
this.clean()
super.onUnload()
}
/**
* Subscribe to settings update and make change accordingly
* @param key
* @param value
*/
onSettingUpdate<T>(key: string, value: T): void {
if (key === Settings.n_parallel && typeof value === 'string') {
if (value.length > 0 && parseInt(value) > 0) {
this.n_parallel = parseInt(value)
}
} else if (key === Settings.cont_batching && typeof value === 'boolean') {
this.cont_batching = value as boolean
} else if (key === Settings.caching_enabled && typeof value === 'boolean') {
this.caching_enabled = value as boolean
} else if (key === Settings.flash_attn && typeof value === 'boolean') {
this.flash_attn = value as boolean
} else if (key === Settings.cache_type && typeof value === 'string') {
this.cache_type = value as string
} else if (key === Settings.use_mmap && typeof value === 'boolean') {
this.use_mmap = value as boolean
} else if (key === Settings.cpu_threads && typeof value === 'string') {
const threads_number = Number(value)
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
} else if (key === Settings.huggingfaceToken) {
this.updateCortexConfig({ huggingface_token: value })
} else if (key === Settings.auto_unload_models) {
this.auto_unload_models = value as boolean
} else if (key === Settings.context_shift && typeof value === 'boolean') {
this.context_shift = value
}
}
override async loadModel(
model: Partial<Model> & {
id: string
settings?: object
file_path?: string
},
abortController: AbortController
): Promise<void> {
// Cortex will handle these settings
const { llama_model_path, mmproj, ...settings } = model.settings ?? {}
model.settings = settings
const controller = abortController ?? new AbortController()
const { signal } = controller
this.abortControllers.set(model.id, controller)
const loadedModels = await this.activeModels()
// This is to avoid loading the same model multiple times
if (loadedModels.some((e: { id: string }) => e.id === model.id)) {
console.log(`Model ${model.id} already loaded`)
return
}
if (this.auto_unload_models) {
// Unload the last used model if it is not the same as the current one
for (const lastUsedModel of loadedModels) {
if (lastUsedModel.id !== model.id) {
console.log(`Unloading last used model: ${lastUsedModel.id}`)
await this.unloadModel(lastUsedModel as Model)
}
}
}
const modelSettings = extractModelLoadParams(model.settings)
return await this.apiInstance().then((api) =>
api
.post('v1/models/start', {
json: {
...modelSettings,
model: model.id,
engine:
model.engine === 'nitro' // Legacy model cache
? 'llama-cpp'
: model.engine,
...(this.n_parallel ? { n_parallel: this.n_parallel } : {}),
...(this.use_mmap ? { use_mmap: true } : {}),
...(this.caching_enabled ? { caching_enabled: true } : {}),
...(this.flash_attn ? { flash_attn: true } : {}),
...(this.caching_enabled && this.cache_type
? { cache_type: this.cache_type }
: {}),
...(this.cpu_threads && this.cpu_threads > 0
? { cpu_threads: this.cpu_threads }
: {}),
...(this.cont_batching && this.n_parallel && this.n_parallel > 1
? { cont_batching: this.cont_batching }
: {}),
...(model.id.toLowerCase().includes('jan-nano')
? { reasoning_budget: 0 }
: { reasoning_budget: this.reasoning_budget }),
...(this.context_shift !== true // explicit true required to enable context shift
? { 'no-context-shift': true }
: {}),
...(modelSettings.ngl === -1 || modelSettings.ngl === undefined
? { ngl: 100 }
: {}),
},
timeout: false,
signal,
})
.json()
.catch(async (e) => {
throw (await e.response?.json()) ?? e
})
.finally(() => this.abortControllers.delete(model.id))
.then()
)
}
override async unloadModel(model: Model): Promise<void> {
return this.apiInstance().then((api) =>
api
.post('v1/models/stop', {
json: { model: model.id },
retry: {
limit: 0,
},
})
.json()
.finally(() => {
this.abortControllers.get(model.id)?.abort()
})
.then()
)
}
async activeModels(): Promise<(object & { id: string })[]> {
return await this.apiInstance()
.then((e) =>
e.get('inferences/server/models', {
retry: {
limit: 0, // Do not retry
},
})
)
.then((e) => e.json())
.then((e) => (e as LoadedModelResponse).data ?? [])
.catch(() => [])
}
/**
* Clean cortex processes
* @returns
*/
private async clean(): Promise<any> {
return this.apiInstance()
.then((api) =>
api.delete('processmanager/destroy', {
timeout: 2000, // maximum 2 seconds
retry: {
limit: 0,
},
})
)
.catch(() => {
// Do nothing
})
}
/**
* Update cortex config
* @param body
*/
private async updateCortexConfig(body: {
[key: string]: any
}): Promise<void> {
return this.apiInstance()
.then((api) => api.patch('v1/configs', { json: body }).then(() => {}))
.catch((e) => console.debug(e))
}
/**
* Get cortex config
* @param body
*/
private async getCortexConfig(): Promise<any> {
return this.apiInstance()
.then((api) => api.get('v1/configs').json())
.catch((e) => console.debug(e))
}
/**
* Subscribe to cortex.cpp websocket events
*/
private subscribeToEvents() {
this.socket = new WebSocket(`${CORTEX_SOCKET_URL}/events`)
this.socket.addEventListener('message', (event) => {
const data = JSON.parse(event.data)
const transferred = data.task.items.reduce(
(acc: number, cur: any) => acc + cur.downloadedBytes,
0
)
const total = data.task.items.reduce(
(acc: number, cur: any) => acc + cur.bytes,
0
)
const percent = total > 0 ? transferred / total : 0
events.emit(DownloadTypes[data.type as keyof typeof DownloadTypes], {
modelId: data.task.id,
percent: percent,
size: {
transferred: transferred,
total: total,
},
downloadType: data.task.type,
})
if (data.task.type === 'Engine') {
events.emit(EngineEvent.OnEngineUpdate, {
type: DownloadTypes[data.type as keyof typeof DownloadTypes],
percent: percent,
id: data.task.id,
})
} else {
if (data.type === DownloadTypes.DownloadSuccess) {
// Delay for the state update from cortex.cpp
// Just to be sure
setTimeout(() => {
events.emit(ModelEvent.OnModelsUpdate, {
fetch: true,
})
}, 500)
}
}
})
/**
* This is to handle the server segfault issue
*/
this.socket.onclose = (event) => {
// Notify app to update model running state
events.emit(ModelEvent.OnModelStopped, {})
// Reconnect to the /events websocket
if (this.shouldReconnect) {
setTimeout(() => this.subscribeToEvents(), 1000)
}
}
}
}

View File

@ -1,144 +0,0 @@
import { describe, it, expect, vi } from 'vitest'
// Mocks
const CORTEX_API_URL = 'http://localhost:3000'
vi.stubGlobal('CORTEX_API_URL', CORTEX_API_URL)
vi.mock('@janhq/core/node', (actual) => ({
...actual(),
getJanDataFolderPath: () => '',
appResourcePath: () => '/mock/path',
log: vi.fn(),
getSystemResourceInfo: () => {
return {
cpu: {
cores: 1,
logicalCores: 1,
threads: 1,
model: 'model',
speed: 1,
},
memory: {
total: 1,
free: 1,
},
gpu: {
model: 'model',
memory: 1,
cuda: {
version: 'version',
devices: 'devices',
},
vulkan: {
version: 'version',
devices: 'devices',
},
},
}
},
}))
vi.mock('fs', () => ({
default: {
readdirSync: () => [],
},
}))
vi.mock('./watchdog', () => {
return {
ProcessWatchdog: vi.fn().mockImplementation(() => {
return {
start: vi.fn(),
terminate: vi.fn(),
}
}),
}
})
vi.mock('child_process', () => ({
exec: () => {
return {
stdout: { on: vi.fn() },
stderr: { on: vi.fn() },
on: vi.fn(),
}
},
spawn: () => {
return {
stdout: { on: vi.fn() },
stderr: { on: vi.fn() },
on: vi.fn(),
pid: '111',
}
},
}))
import index from './index'
describe('Cortex extension node interface', () => {
describe('run', () => {
it('should start the cortex subprocess on macOS', async () => {
Object.defineProperty(process, 'platform', {
value: 'darwin',
})
const result = await index.run()
expect(result).toBeUndefined()
})
it('should start the cortex subprocess on Windows', async () => {
Object.defineProperty(process, 'platform', {
value: 'win32',
})
const result = await index.run()
expect(result).toBeUndefined()
})
it('should set the proper environment variables based on platform', async () => {
// Test for Windows
Object.defineProperty(process, 'platform', {
value: 'win32',
})
process.env.PATH = '/original/path'
await index.run()
expect(process.env.PATH).toContain('/original/path')
// Test for non-Windows (macOS/Linux)
Object.defineProperty(process, 'platform', {
value: 'darwin',
})
process.env.LD_LIBRARY_PATH = '/original/ld/path'
await index.run()
expect(process.env.LD_LIBRARY_PATH).toContain('/original/ld/path')
})
})
describe('dispose', () => {
it('should dispose a model successfully on Mac', async () => {
Object.defineProperty(process, 'platform', {
value: 'darwin',
})
// Call the dispose function
const result = index.dispose()
// Assert that the result is as expected
expect(result).toBeUndefined()
})
it('should kill the subprocess successfully on Windows', async () => {
Object.defineProperty(process, 'platform', {
value: 'win32',
})
// Call the dispose function
const result = index.dispose()
// Assert that the result is as expected
expect(result).toBeUndefined()
})
})
})

View File

@ -1,103 +0,0 @@
import path from 'path'
import { appResourcePath, getJanDataFolderPath, log } from '@janhq/core/node'
import { ProcessWatchdog } from './watchdog'
let watchdog: ProcessWatchdog | undefined = undefined
/**
* Spawns a Nitro subprocess.
* @returns A promise that resolves when the Nitro subprocess is started.
*/
function run(): Promise<any> {
log(`[CORTEX]:: Spawning cortex subprocess...`)
return new Promise<void>(async (resolve, reject) => {
// let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
let binaryName = `cortex-server${
process.platform === 'win32' ? '.exe' : ''
}`
const binPath = path.join(__dirname, '..', 'bin')
const executablePath = path.join(binPath, binaryName)
addEnvPaths(binPath)
const sharedPath = path.join(appResourcePath(), 'shared')
// Execute the binary
log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`)
const dataFolderPath = getJanDataFolderPath()
if (watchdog) {
watchdog.terminate()
}
// The HOST address to use for the cortex subprocess
const LOCAL_PORT = CORTEX_API_URL.split(':').pop() ?? '39291'
watchdog = new ProcessWatchdog(
executablePath,
[
'--start-server',
'--port',
LOCAL_PORT.toString(),
'--config_file_path',
`${path.join(dataFolderPath, '.janrc')}`,
'--data_folder_path',
dataFolderPath,
'config',
'--api_keys',
process.env.appToken ?? 'cortex.cpp',
],
{
env: {
...process.env,
// CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
// // Vulkan - Support 1 device at a time for now
// ...(gpuVisibleDevices?.length > 0 && {
// GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
// }),
},
cwd: sharedPath,
}
)
watchdog.start()
resolve()
})
}
/**
* Every module should have a dispose function
* This will be called when the extension is unloaded and should clean up any resources
* Also called when app is closed
*/
function dispose() {
watchdog?.terminate()
}
/**
* Set the environment paths for the cortex subprocess
* @param dest
*/
function addEnvPaths(dest: string) {
// Add engine path to the PATH and LD_LIBRARY_PATH
if (process.platform === 'win32') {
process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
} else {
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
path.delimiter,
dest
)
}
}
/**
* Cortex process info
*/
export interface CortexProcessInfo {
isRunning: boolean
}
export default {
run,
dispose,
}

View File

@ -1,84 +0,0 @@
import { log } from '@janhq/core/node'
import { spawn, ChildProcess } from 'child_process'
import { EventEmitter } from 'events'
interface WatchdogOptions {
cwd?: string
restartDelay?: number
maxRestarts?: number
env?: NodeJS.ProcessEnv
}
export class ProcessWatchdog extends EventEmitter {
private command: string
private args: string[]
private options: WatchdogOptions
private process: ChildProcess | null
private restartDelay: number
private maxRestarts: number
private restartCount: number
private isTerminating: boolean
constructor(command: string, args: string[], options: WatchdogOptions = {}) {
super()
this.command = command
this.args = args
this.options = options
this.process = null
this.restartDelay = options.restartDelay || 5000
this.maxRestarts = options.maxRestarts || 5
this.restartCount = 0
this.isTerminating = false
}
start(): void {
this.spawnProcess()
}
private spawnProcess(): void {
if (this.isTerminating) return
log(`Starting process: ${this.command} ${this.args.join(' ')}`)
this.process = spawn(this.command, this.args, this.options)
this.process.stdout?.on('data', (data: Buffer) => {
log(`Process output: ${data}`)
this.emit('output', data.toString())
})
this.process.stderr?.on('data', (data: Buffer) => {
log(`Process error: ${data}`)
this.emit('error', data.toString())
})
this.process.on('close', (code: number | null) => {
log(`Process exited with code ${code}`)
this.emit('close', code)
if (!this.isTerminating) {
this.restartProcess()
}
})
}
private restartProcess(): void {
if (this.restartCount < this.maxRestarts) {
this.restartCount++
log(
`Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})`
)
setTimeout(() => this.spawnProcess(), this.restartDelay)
} else {
log('Max restart attempts reached. Exiting watchdog.')
this.emit('maxRestartsReached')
}
}
terminate(): void {
this.isTerminating = true
if (this.process) {
log('Terminating watched process...')
this.process.kill()
}
this.emit('terminated')
}
}

View File

@ -1,15 +0,0 @@
{
"compilerOptions": {
"moduleResolution": "node",
"target": "es2016",
"module": "esnext",
"strict": true,
"sourceMap": true,
"esModuleInterop": true,
"outDir": "dist",
"importHelpers": true,
"typeRoots": ["node_modules/@types"]
},
"include": ["src"],
"exclude": ["src/**/*.test.ts"]
}

View File

@ -21,6 +21,7 @@
},
"dependencies": {
"@janhq/core": "../../core/package.tgz",
"@tauri-apps/api": "^1.4.0",
"fetch-retry": "^5.0.6",
"ulidx": "^2.3.0"
},

View File

@ -6,10 +6,55 @@
* @module llamacpp-extension/src/index
*/
import { RemoteOAIEngine, getJanDataFolderPath, fs, ModelCapability, Model } from '@janhq/core'
import {
AIEngine,
localProvider,
getJanDataFolderPath,
fs,
Model,
} from '@janhq/core'
export enum Settings {
port = 'port',
import { invoke } from '@tauri-apps/api/tauri'
import {
LocalProvider,
ModelInfo,
ListOptions,
ListResult,
PullOptions,
PullResult,
LoadOptions,
SessionInfo,
UnloadOptions,
UnloadResult,
ChatOptions,
ChatCompletion,
ChatCompletionChunk,
DeleteOptions,
DeleteResult,
ImportOptions,
ImportResult,
AbortPullOptions,
AbortPullResult,
ChatCompletionRequest,
} from './types'
/**
* Helper to convert GGUF model filename to a more structured ID/name
* Example: "mistral-7b-instruct-v0.2.Q4_K_M.gguf" -> { baseModelId: "mistral-7b-instruct-v0.2", quant: "Q4_K_M" }
**/
function parseGGUFFileName(filename: string): {
baseModelId: string
quant?: string
} {
const nameWithoutExt = filename.replace(/\.gguf$/i, '')
// Try to split by common quantization patterns (e.g., .Q4_K_M, -IQ2_XS)
const match = nameWithoutExt.match(
/^(.*?)[-_]([QqIiFf]\w{1,3}_\w{1,3}|[Qq]\d+_[KkSsMmXxLl\d]+|[IiQq]\d+_[XxSsMm]+|[Qq]\d+)$/
)
if (match && match[1] && match[2]) {
return { baseModelId: match[1], quant: match[2] }
}
return { baseModelId: nameWithoutExt }
}
/**
@ -17,99 +62,246 @@ export enum Settings {
* The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
*/
export default class LlamacppProvider extends RemoteOAIEngine {
inferenceUrl: string = ''
baseURL: string = ''
provider: string = ENGINE
export default class inference_llamacpp_extension
extends AIEngine
implements localProvider
{
provider: string = 'llamacpp'
readonly providerId: string = 'llamcpp'
private activeSessions: Map<string, SessionInfo> = new Map()
private modelsBasePath!: string
override async onLoad(): Promise<void> {
super.onLoad()
super.onLoad() // Calls registerEngine() from AIEngine
this.registerSettings(SETTINGS_DEFINITIONS)
// Register Settings
this.registerSettings(SETTINGS)
const customPath = await this.getSetting<string>(
LlamaCppSettings.ModelsPath,
''
)
if (customPath && (await fs.exists(customPath))) {
this.modelsBasePath = customPath
} else {
this.modelsBasePath = await path.join(
await getJanDataFolderPath(),
'models',
ENGINE_ID
)
}
await fs.createDirAll(this.modelsBasePath)
// register models
const models = await this.listModels()
this.registerModels(models)
console.log(
`${this.providerId} provider loaded. Models path: ${this.modelsBasePath}`
)
// NOTE: port 0 may mean request free port from OS. we may want
// to take advantage of this. llama-server --port 0 on macOS works.
const port = await this.getSetting<number>(Settings.port, 0)
this.updateBaseUrl(port)
// Optionally, list and register models with the core system if AIEngine expects it
// const models = await this.listModels({ providerId: this.providerId });
// this.registerModels(this.mapModelInfoToCoreModel(models)); // mapModelInfoToCoreModel would be a helper
}
// onSettingUpdate<T>(key: string, value: T): void {
// if (key === Settings.apiKey) {
// this.apiKey = value as string
// } else if (key === Settings.baseUrl) {
// if (typeof value !== 'string') return
// this.updateBaseUrl(value)
// }
// }
updateBaseUrl(value: number): void {
if (value == 0) {
// set to default value
SETTINGS.forEach((setting) => {
if (setting.key === Settings.port) {
value = setting.controllerProps.value as number
async getModelsPath(): Promise<string> {
// Ensure modelsBasePath is initialized
if (!this.modelsBasePath) {
const customPath = await this.getSetting<string>(
LlamaCppSettings.ModelsPath,
''
)
if (customPath && (await fs.exists(customPath))) {
this.modelsBasePath = customPath
} else {
this.modelsBasePath = await path.join(
await getJanDataFolderPath(),
'models',
ENGINE_ID
)
}
await fs.createDirAll(this.modelsBasePath)
}
return this.modelsBasePath
}
async listModels(_opts: ListOptions): Promise<ListResult> {
const modelsDir = await this.getModelsPath()
const result: ModelInfo[] = []
try {
if (!(await fs.exists(modelsDir))) {
await fs.createDirAll(modelsDir)
return []
}
const entries = await fs.readDir(modelsDir)
for (const entry of entries) {
if (entry.name?.endsWith('.gguf') && entry.isFile) {
const modelPath = await path.join(modelsDir, entry.name)
const stats = await fs.stat(modelPath) // Tauri's fs.stat or Node's fs.statSync
const parsedName = parseGGUFFileName(entry.name)
result.push({
id: `${parsedName.baseModelId}${parsedName.quant ? `/${parsedName.quant}` : ''}`, // e.g., "mistral-7b/Q4_0"
name: entry.name.replace('.gguf', ''), // Or a more human-friendly name
quant_type: parsedName.quant,
providerId: this.providerId,
sizeBytes: stats.size,
path: modelPath,
tags: [this.providerId, parsedName.quant || 'unknown_quant'].filter(
Boolean
) as string[],
})
}
this.baseURL = `http://127.0.0.1:${value}`
this.inferenceUrl = `${this.baseURL}/chat/completions`
}
} catch (error) {
console.error(`[${this.providerId}] Error listing models:`, error)
// Depending on desired behavior, either throw or return empty/partial list
}
return result
}
async listModels(): Promise<Model[]> {
let modelIds = []
const modelsFolder = `${await getJanDataFolderPath()}/models`
// cortexso models
const cortexsoFolder = `${modelsFolder}/cortex.so`
const modelDirs = await fs.readdirSync(cortexsoFolder)
for (const modelDir of modelDirs) {
const modelName = modelDir.split('/').pop()
// TODO: try removing this check
// skip files start with . e.g. .DS_store
if (!modelName || modelName.startsWith('.')) continue
const variantDirs = await fs.readdirSync(modelDir)
for (const variantDir of variantDirs) {
// NOTE: we can't detect unfinished download here
const ggufPath = `${variantDir}/model.gguf`
if (await fs.existsSync(ggufPath)) {
const variantName = variantDir.split('/').pop()
modelIds.push(`${modelName}/${variantName}`)
}
}
// pullModel
async pullModel(opts: PullOptions): Promise<PullResult> {
// TODO: Implement pullModel
return 0;
}
// TODO: list models under huggingface.co
// abortPull
async abortPull(opts: AbortPullOptions): Promise<AbortPullResult> {
// TODO: implement abortPull
}
const models = modelIds.map((modelId) => {
async loadModel(opts: LoadOptions): Promise<SessionInfo> {
if (opts.providerId !== this.providerId) {
throw new Error('Invalid providerId for LlamaCppProvider.loadModel')
}
const sessionId = uuidv4()
const loadParams = {
model_path: opts.modelPath,
session_id: sessionId, // Pass sessionId to Rust for tracking
// Default llama.cpp server options, can be overridden by opts.options
port: opts.options?.port ?? 0, // 0 for dynamic port assignment by OS
n_gpu_layers:
opts.options?.n_gpu_layers ??
(await this.getSetting(LlamaCppSettings.DefaultNGpuLayers, -1)),
n_ctx:
opts.options?.n_ctx ??
(await this.getSetting(LlamaCppSettings.DefaultNContext, 2048)),
// Spread any other options from opts.options
...(opts.options || {}),
}
try {
console.log(
`[${this.providerId}] Requesting to load model: ${opts.modelPath} with options:`,
loadParams
)
// This matches the Rust handler: core::utils::extensions::inference_llamacpp_extension::server::load
const rustResponse: {
session_id: string
port: number
model_path: string
settings: Record<string, unknown>
} = await invoke('plugin:llamacpp|load', { params: loadParams }) // Adjust namespace if needed
if (!rustResponse || !rustResponse.port) {
throw new Error(
'Rust load function did not return expected port or session info.'
)
}
const sessionInfo: SessionInfo = {
sessionId: rustResponse.session_id, // Use sessionId from Rust if it regenerates/confirms it
port: rustResponse.port,
modelPath: rustResponse.model_path,
providerId: this.providerId,
settings: rustResponse.settings, // Settings actually used by the server
}
this.activeSessions.set(sessionInfo.sessionId, sessionInfo)
console.log(
`[${this.providerId}] Model loaded: ${sessionInfo.modelPath} on port ${sessionInfo.port}, session: ${sessionInfo.sessionId}`
)
return sessionInfo
} catch (error) {
console.error(
`[${this.providerId}] Error loading model ${opts.modelPath}:`,
error
)
throw error // Re-throw to be handled by the caller
}
}
async unloadModel(opts: UnloadOptions): Promise<UnloadResult> {
if (opts.providerId !== this.providerId) {
return { success: false, error: 'Invalid providerId' }
}
const session = this.activeSessions.get(opts.sessionId)
if (!session) {
return {
sources: [],
object: 'model',
version: '1.0',
format: 'api',
id: modelId,
name: modelId,
created: 0,
description: '',
settings: {},
parameters: {},
metadata: {
author: '',
tags: [],
size: 0,
},
engine: this.provider,
capabilities: [ModelCapability.completion],
}
})
return models
success: false,
error: `No active session found for id: ${opts.sessionId}`,
}
}
try {
console.log(
`[${this.providerId}] Requesting to unload model for session: ${opts.sessionId}`
)
// Matches: core::utils::extensions::inference_llamacpp_extension::server::unload
const rustResponse: { success: boolean; error?: string } = await invoke(
'plugin:llamacpp|unload',
{ sessionId: opts.sessionId }
)
if (rustResponse.success) {
this.activeSessions.delete(opts.sessionId)
console.log(
`[${this.providerId}] Session ${opts.sessionId} unloaded successfully.`
)
return { success: true }
} else {
console.error(
`[${this.providerId}] Failed to unload session ${opts.sessionId}: ${rustResponse.error}`
)
return {
success: false,
error: rustResponse.error || 'Unknown error during unload',
}
}
} catch (error: any) {
console.error(
`[${this.providerId}] Error invoking unload for session ${opts.sessionId}:`,
error
)
return { success: false, error: error.message || String(error) }
}
}
async chat(
opts: ChatOptions
): Promise<ChatCompletion | AsyncIterable<ChatCompletionChunk>> {}
async deleteModel(opts: DeleteOptions): Promise<DeleteResult> {}
async importModel(opts: ImportOptions): Promise<ImportResult> {}
override async loadModel(model: Model): Promise<any> {
if (model.engine?.toString() !== this.provider) return Promise.resolve()
console.log(
`[${this.providerId} AIEngine] Received OnModelInit for:`,
model.id
)
return super.loadModel(model)
}
override async unloadModel(model?: Model): Promise<any> {
if (model?.engine && model.engine.toString() !== this.provider)
return Promise.resolve()
console.log(
`[${this.providerId} AIEngine] Received OnModelStop for:`,
model?.id || 'all models'
)
return super.unloadModel(model)
}
}

View File

@ -0,0 +1,199 @@
// src/providers/local/types.ts
// --- Re-using OpenAI types (minimal definitions for this example) ---
// In a real project, you'd import these from 'openai' or a shared types package.
export interface ChatCompletionRequestMessage {
role: 'system' | 'user' | 'assistant' | 'tool';
content: string | null;
name?: string;
tool_calls?: any[]; // Simplified
tool_call_id?: string;
}
export interface ChatCompletionRequest {
model: string; // Model ID, though for local it might be implicit via sessionId
messages: ChatCompletionRequestMessage[];
temperature?: number | null;
top_p?: number | null;
n?: number | null;
stream?: boolean | null;
stop?: string | string[] | null;
max_tokens?: number;
presence_penalty?: number | null;
frequency_penalty?: number | null;
logit_bias?: Record<string, number> | null;
user?: string;
// ... TODO: other OpenAI params
}
export interface ChatCompletionChunkChoiceDelta {
content?: string | null;
role?: 'system' | 'user' | 'assistant' | 'tool';
tool_calls?: any[]; // Simplified
}
export interface ChatCompletionChunkChoice {
index: number;
delta: ChatCompletionChunkChoiceDelta;
finish_reason?: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call' | null;
}
export interface ChatCompletionChunk {
id: string;
object: 'chat.completion.chunk';
created: number;
model: string;
choices: ChatCompletionChunkChoice[];
system_fingerprint?: string;
}
export interface ChatCompletionChoice {
index: number;
message: ChatCompletionRequestMessage; // Response message
finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call';
logprobs?: any; // Simplified
}
export interface ChatCompletion {
id: string;
object: 'chat.completion';
created: number;
model: string; // Model ID used
choices: ChatCompletionChoice[];
usage?: {
prompt_tokens: number;
completion_tokens: number;
total_tokens: number;
};
system_fingerprint?: string;
}
// --- End OpenAI types ---
// Shared model metadata
export interface ModelInfo {
id: string; // e.g. "qwen3-4B" or "org/model/quant"
name: string; // humanreadable, e.g., "Qwen3 4B Q4_0"
quant_type?: string; // q4_0 (optional as it might be part of ID or name)
providerId: string; // e.g. "llama.cpp"
sizeBytes: number;
tags?: string[];
path?: string; // Absolute path to the model file, if applicable
// Additional provider-specific metadata can be added here
[key: string]: any;
}
// 1. /list
export interface ListOptions {
providerId: string; // To specify which provider if a central manager calls this
}
export type ListResult = ModelInfo[];
// 2. /pull
export interface PullOptions {
providerId: string;
modelId: string; // Identifier for the model to pull (e.g., from a known registry)
downloadUrl: string; // URL to download the model from
/** optional callback to receive download progress */
onProgress?: (progress: { percent: number; downloadedBytes: number; totalBytes?: number; }) => void;
}
export interface PullResult {
success: boolean;
path?: string; // local file path to the pulled model
error?: string;
modelInfo?: ModelInfo; // Info of the pulled model
}
// 3. /load
export interface LoadOptions {
providerId: string;
modelPath: string;
/** any providerspecific tuning options for llama.cpp server */
options?: {
port?: number; // 0 means dynamic port
n_gpu_layers?: number;
n_ctx?: number; // context size
// ... other llama-cpp-python or llama.cpp server flags
[key: string]: any;
};
}
export interface SessionInfo {
sessionId: string; // opaque handle for unload/chat
port: number; // llama-server output port (corrected from portid)
modelPath: string; // path of the loaded model
providerId: string;
settings: Record<string, unknown>; // The actual settings used to load
}
// 4. /unload
export interface UnloadOptions {
providerId: string;
sessionId: string;
}
export interface UnloadResult {
success: boolean;
error?: string;
}
// 5. /chat
export interface ChatOptions {
providerId: string;
sessionId: string;
/** Full OpenAI ChatCompletionRequest payload */
payload: ChatCompletionRequest;
}
// Output for /chat will be Promise<ChatCompletion> for non-streaming
// or Promise<AsyncIterable<ChatCompletionChunk>> for streaming
// 6. /delete
export interface DeleteOptions {
providerId: string;
modelId: string; // The ID of the model to delete (implies finding its path)
modelPath?: string; // Optionally, direct path can be provided
}
export interface DeleteResult {
success: boolean;
error?: string;
}
// 7. /import
export interface ImportOptions {
providerId: string;
sourcePath: string; // Path to the local model file to import
desiredModelId?: string; // Optional: if user wants to name it specifically
}
export interface ImportResult {
success: boolean;
modelInfo?: ModelInfo;
error?: string;
}
// 8. /abortPull
export interface AbortPullOptions {
providerId: string;
modelId: string; // The modelId whose download is to be aborted
}
export interface AbortPullResult {
success: boolean;
error?: string;
}
// The interface for any local provider
export interface LocalProvider {
readonly providerId: string;
listModels(opts: ListOptions): Promise<ListResult>;
pullModel(opts: PullOptions): Promise<PullResult>;
loadModel(opts: LoadOptions): Promise<SessionInfo>;
unloadModel(opts: UnloadOptions): Promise<UnloadResult>;
chat(opts: ChatOptions): Promise<ChatCompletion | AsyncIterable<ChatCompletionChunk>>;
deleteModel(opts: DeleteOptions): Promise<DeleteResult>;
importModel(opts: ImportOptions): Promise<ImportResult>;
abortPull(opts: AbortPullOptions): Promise<AbortPullResult>;
// Optional: for direct access to underlying client if needed for specific streaming cases
getChatClient?(sessionId: string): any; // e.g., an OpenAI client instance configured for the session
}

File diff suppressed because it is too large Load Diff

View File

@ -3,12 +3,9 @@ use std::{
fs::{self, File},
io::Read,
path::PathBuf,
sync::Arc,
};
use tar::Archive;
use tauri::{App, Emitter, Listener, Manager};
use tauri_plugin_shell::process::{CommandChild, CommandEvent};
use tauri_plugin_shell::ShellExt;
use tauri::{App, Emitter, Manager};
use tauri_plugin_store::StoreExt;
use tokio::sync::Mutex;
use tokio::time::{sleep, Duration}; // Using tokio::sync::Mutex
@ -200,22 +197,18 @@ pub fn setup_mcp(app: &App) {
let state = app.state::<AppState>();
let servers = state.mcp_servers.clone();
let app_handle: tauri::AppHandle = app.handle().clone();
// Setup kill-mcp-servers event listener (similar to cortex kill-sidecar)
let app_handle_for_kill = app_handle.clone();
app_handle.listen("kill-mcp-servers", move |_event| {
let app_handle = app_handle_for_kill.clone();
tauri::async_runtime::spawn(async move {
log::info!("Received kill-mcp-servers event - cleaning up MCP servers");
let app_state = app_handle.state::<AppState>();
// Stop all running MCP servers
if let Err(e) = super::mcp::stop_mcp_servers(app_state.mcp_servers.clone()).await {
log::error!("Failed to stop MCP servers: {}", e);
return;
}
// Clear active servers and restart counts
{
let mut active_servers = app_state.mcp_active_servers.lock().await;
@ -225,11 +218,9 @@ pub fn setup_mcp(app: &App) {
let mut restart_counts = app_state.mcp_restart_counts.lock().await;
restart_counts.clear();
}
log::info!("MCP servers cleaned up successfully");
});
});
tauri::async_runtime::spawn(async move {
if let Err(e) = run_mcp_commands(&app_handle, servers).await {
log::error!("Failed to run mcp commands: {}", e);
@ -471,65 +462,22 @@ pub fn setup_sidecar(app: &App) -> Result<(), String> {
Ok(())
}
//pub fn setup_engine_binaries(app: &App) -> Result<(), String> {
// // Copy engine binaries to app_data
// let app_data_dir = app.handle().path().app_data_dir().unwrap();
// let binaries_dir = app.handle().path().resource_dir().unwrap().join("binaries");
// let themes_dir = app
// .handle()
// .path()
// .resource_dir()
// .unwrap()
// .join("resources");
//
// Clean up function to kill the sidecar process
//
pub fn clean_up() {
#[cfg(windows)]
{
use std::os::windows::process::CommandExt;
let _ = std::process::Command::new("taskkill")
.args(["-f", "-im", "llama-server.exe"])
.creation_flags(0x08000000)
.spawn();
let _ = std::process::Command::new("taskkill")
.args(["-f", "-im", "cortex-server.exe"])
.creation_flags(0x08000000)
.spawn();
}
#[cfg(unix)]
{
let _ = std::process::Command::new("pkill")
.args(["-f", "llama-server"])
.spawn();
let _ = std::process::Command::new("pkill")
.args(["-f", "cortex-server"])
.spawn();
}
log::info!("Clean up function executed, sidecar processes killed.");
}
fn copy_dir_all(src: PathBuf, dst: PathBuf) -> Result<(), String> {
fs::create_dir_all(&dst).map_err(|e| e.to_string())?;
log::info!("Copying from {:?} to {:?}", src, dst);
for entry in fs::read_dir(src).map_err(|e| e.to_string())? {
let entry = entry.map_err(|e| e.to_string())?;
let ty = entry.file_type().map_err(|e| e.to_string())?;
if ty.is_dir() {
copy_dir_all(entry.path(), dst.join(entry.file_name())).map_err(|e| e.to_string())?;
} else {
fs::copy(entry.path(), dst.join(entry.file_name())).map_err(|e| e.to_string())?;
}
}
Ok(())
}
pub fn setup_engine_binaries(app: &App) -> Result<(), String> {
// Copy engine binaries to app_data
let app_data_dir = get_jan_data_folder_path(app.handle().clone());
let binaries_dir = app.handle().path().resource_dir().unwrap().join("binaries");
let resources_dir = app
.handle()
.path()
.resource_dir()
.unwrap()
.join("resources");
if let Err(e) = copy_dir_all(binaries_dir, app_data_dir.clone()) {
log::error!("Failed to copy binaries: {}", e);
}
if let Err(e) = copy_dir_all(resources_dir, app_data_dir.clone()) {
log::error!("Failed to copy resources: {}", e);
}
Ok(())
}
// if let Err(e) = copy_dir_all(binaries_dir, app_data_dir.clone()) {
// log::error!("Failed to copy binaries: {}", e);
// }
// if let Err(e) = copy_dir_all(themes_dir, app_data_dir.clone()) {
// log::error!("Failed to copy themes: {}", e);
// }
// Ok(())
//}

View File

@ -10,8 +10,8 @@ use crate::core::state::AppState;
pub enum ServerError {
#[error("Server is already running")]
AlreadyRunning,
#[error("Server is not running")]
NotRunning,
// #[error("Server is not running")]
// NotRunning,
#[error("Failed to locate server binary: {0}")]
BinaryNotFound(String),
#[error("Failed to determine resource path: {0}")]

View File

@ -1,16 +1,14 @@
mod core;
use core::{
cmd::get_jan_data_folder_path,
setup::{self, setup_engine_binaries, setup_mcp, setup_sidecar},
setup::{self, setup_mcp},
state::{generate_app_token, AppState},
utils::download::DownloadManagerState,
};
use std::{collections::HashMap, sync::Arc};
use tauri::Emitter;
use tokio::sync::Mutex;
use crate::core::setup::clean_up;
#[cfg_attr(mobile, tauri::mobile_entry_point)]
pub fn run() {
@ -122,17 +120,17 @@ pub fn run() {
log::error!("Failed to install extensions: {}", e);
}
setup_mcp(app);
setup_sidecar(app).expect("Failed to setup sidecar");
setup_engine_binaries(app).expect("Failed to setup engine binaries");
Ok(())
})
.on_window_event(|window, event| match event {
tauri::WindowEvent::CloseRequested { .. } => {
if window.label() == "main" {
window.emit("kill-sidecar", ()).unwrap();
window.emit("kill-mcp-servers", ()).unwrap();
clean_up();
}
let client = Client::new();
let url = "http://127.0.0.1:39291/processManager/destroy";
let _ = client.delete(url).send();
}
_ => {}
})