From 3f082372fd0896bdef25b14cfdc3f6f15a7e3c1d Mon Sep 17 00:00:00 2001 From: Thien Tran Date: Wed, 7 May 2025 15:23:40 +0700 Subject: [PATCH] add llamacpp-extension. can list some models --- extensions/llamacpp-extension/package.json | 42 +++++++ .../llamacpp-extension/rolldown.config.mjs | 17 +++ extensions/llamacpp-extension/settings.json | 98 +++++++++++++++ extensions/llamacpp-extension/src/env.d.ts | 2 + extensions/llamacpp-extension/src/index.ts | 115 ++++++++++++++++++ extensions/llamacpp-extension/tsconfig.json | 15 +++ 6 files changed, 289 insertions(+) create mode 100644 extensions/llamacpp-extension/package.json create mode 100644 extensions/llamacpp-extension/rolldown.config.mjs create mode 100644 extensions/llamacpp-extension/settings.json create mode 100644 extensions/llamacpp-extension/src/env.d.ts create mode 100644 extensions/llamacpp-extension/src/index.ts create mode 100644 extensions/llamacpp-extension/tsconfig.json diff --git a/extensions/llamacpp-extension/package.json b/extensions/llamacpp-extension/package.json new file mode 100644 index 000000000..4b193f4dc --- /dev/null +++ b/extensions/llamacpp-extension/package.json @@ -0,0 +1,42 @@ +{ + "name": "@janhq/llamacpp-extension", + "productName": "llama.cpp Inference Engine", + "version": "1.0.0", + "description": "This extension enables llama.cpp chat completion API calls", + "main": "dist/index.js", + "module": "dist/module.js", + "engine": "llama.cpp", + "author": "Jan ", + "license": "AGPL-3.0", + "scripts": { + "build": "rolldown -c rolldown.config.mjs", + "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install" + }, + "devDependencies": { + "cpx": "^1.5.0", + "rimraf": "^3.0.2", + "rolldown": "1.0.0-beta.1", + "ts-loader": "^9.5.0", + "typescript": "^5.7.2" + }, + "dependencies": { + "@janhq/core": "../../core/package.tgz", + "fetch-retry": "^5.0.6", + "ulidx": "^2.3.0" + }, + "engines": { + "node": ">=18.0.0" + }, + "files": [ + "dist/*", + "package.json", + "README.md" + ], + "bundleDependencies": [ + "fetch-retry" + ], + "installConfig": { + "hoistingLimits": "workspaces" + }, + "packageManager": "yarn@4.5.3" +} diff --git a/extensions/llamacpp-extension/rolldown.config.mjs b/extensions/llamacpp-extension/rolldown.config.mjs new file mode 100644 index 000000000..3b0adeed9 --- /dev/null +++ b/extensions/llamacpp-extension/rolldown.config.mjs @@ -0,0 +1,17 @@ + +import { defineConfig } from 'rolldown' +import pkgJson from './package.json' with { type: 'json' } +import settingJson from './settings.json' with { type: 'json' } + +export default defineConfig({ + input: 'src/index.ts', + output: { + format: 'esm', + file: 'dist/index.js', + }, + platform: 'browser', + define: { + SETTINGS: JSON.stringify(settingJson), + ENGINE: JSON.stringify(pkgJson.engine), + }, +}) diff --git a/extensions/llamacpp-extension/settings.json b/extensions/llamacpp-extension/settings.json new file mode 100644 index 000000000..b8b6ddd14 --- /dev/null +++ b/extensions/llamacpp-extension/settings.json @@ -0,0 +1,98 @@ +[ + { + "key": "port", + "title": "Port", + "description": "Port", + "controllerType": "input", + "controllerProps": { + "value": "8080", + "placeholder": "8080", + "type": "number", + "textAlign": "right" + } + }, + { + "key": "cont_batching", + "title": "Continuous Batching", + "description": "Allows processing prompts in parallel with text generation, which usually improves performance.", + "controllerType": "checkbox", + "controllerProps": { + "value": true + } + }, + { + "key": "n_parallel", + "title": "Parallel Operations", + "description": "Number of prompts that can be processed simultaneously by the model.", + "controllerType": "input", + "controllerProps": { + "value": "4", + "placeholder": "4", + "type": "number", + "textAlign": "right" + } + }, + { + "key": "cpu_threads", + "title": "CPU Threads", + "description": "Number of CPU cores used for model processing when running without GPU.", + "controllerType": "input", + "controllerProps": { + "value": "", + "placeholder": "Number of CPU threads", + "type": "number", + "textAlign": "right" + } + }, + { + "key": "flash_attn", + "title": "Flash Attention", + "description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.", + "controllerType": "checkbox", + "controllerProps": { + "value": true + } + }, + + { + "key": "caching_enabled", + "title": "Caching", + "description": "Stores recent prompts and responses to improve speed when similar questions are asked.", + "controllerType": "checkbox", + "controllerProps": { + "value": true + } + }, + { + "key": "cache_type", + "title": "KV Cache Type", + "description": "Controls memory usage and precision trade-off.", + "controllerType": "dropdown", + "controllerProps": { + "value": "f16", + "options": [ + { + "value": "q4_0", + "name": "q4_0" + }, + { + "value": "q8_0", + "name": "q8_0" + }, + { + "value": "f16", + "name": "f16" + } + ] + } + }, + { + "key": "use_mmap", + "title": "mmap", + "description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.", + "controllerType": "checkbox", + "controllerProps": { + "value": true + } + } +] diff --git a/extensions/llamacpp-extension/src/env.d.ts b/extensions/llamacpp-extension/src/env.d.ts new file mode 100644 index 000000000..2f5f7c894 --- /dev/null +++ b/extensions/llamacpp-extension/src/env.d.ts @@ -0,0 +1,2 @@ +declare const SETTINGS: SettingComponentProps[] +declare const ENGINE: string diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts new file mode 100644 index 000000000..2fce99924 --- /dev/null +++ b/extensions/llamacpp-extension/src/index.ts @@ -0,0 +1,115 @@ +/** + * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + * @version 1.0.0 + * @module llamacpp-extension/src/index + */ + +import { RemoteOAIEngine, getJanDataFolderPath, fs, ModelCapability, Model } from '@janhq/core' + +export enum Settings { + port = 'port', +} + +/** + * A class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + */ +export default class LlamacppProvider extends RemoteOAIEngine { + inferenceUrl: string = '' + baseURL: string = '' + provider: string = ENGINE + + override async onLoad(): Promise { + super.onLoad() + + // Register Settings + this.registerSettings(SETTINGS) + + // register models + const models = await this.listModels() + this.registerModels(models) + + // NOTE: port 0 may mean request free port from OS. we may want + // to take advantage of this. llama-server --port 0 on macOS works. + const port = await this.getSetting(Settings.port, 0) + this.updateBaseUrl(port) + } + + // onSettingUpdate(key: string, value: T): void { + // if (key === Settings.apiKey) { + // this.apiKey = value as string + // } else if (key === Settings.baseUrl) { + // if (typeof value !== 'string') return + // this.updateBaseUrl(value) + // } + // } + + updateBaseUrl(value: number): void { + if (value == 0) { + // set to default value + SETTINGS.forEach((setting) => { + if (setting.key === Settings.port) { + value = setting.controllerProps.value as number + } + }) + } + this.baseURL = `http://127.0.0.1:${value}` + this.inferenceUrl = `${this.baseURL}/chat/completions` + } + + async listModels(): Promise { + let modelIds = [] + + const modelsFolder = `${await getJanDataFolderPath()}/models` + + // cortexso models + const cortexsoFolder = `${modelsFolder}/cortex.so` + const modelDirs = await fs.readdirSync(cortexsoFolder) + for (const modelDir of modelDirs) { + const modelName = modelDir.split('/').pop() + + // TODO: try removing this check + // skip files start with . e.g. .DS_store + if (!modelName || modelName.startsWith('.')) continue + + const variantDirs = await fs.readdirSync(modelDir) + for (const variantDir of variantDirs) { + // NOTE: we can't detect unfinished download here + const ggufPath = `${variantDir}/model.gguf` + + if (await fs.existsSync(ggufPath)) { + const variantName = variantDir.split('/').pop() + modelIds.push(`${modelName}/${variantName}`) + } + } + } + + // TODO: list models under huggingface.co + + const models = modelIds.map((modelId) => { + return { + sources: [], + object: 'model', + version: '1.0', + format: 'api', + id: modelId, + name: modelId, + created: 0, + description: '', + settings: {}, + parameters: {}, + metadata: { + author: '', + tags: [], + size: 0, + }, + engine: this.provider, + capabilities: [ModelCapability.completion], + } + }) + return models + } +} diff --git a/extensions/llamacpp-extension/tsconfig.json b/extensions/llamacpp-extension/tsconfig.json new file mode 100644 index 000000000..6db951c9e --- /dev/null +++ b/extensions/llamacpp-extension/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "es2016", + "module": "ES6", + "moduleResolution": "node", + "outDir": "./dist", + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "strict": false, + "skipLibCheck": true, + "rootDir": "./src" + }, + "include": ["./src"], + "exclude": ["**/*.test.ts"] +}