add llamacpp-extension. can list some models

This commit is contained in:
Thien Tran 2025-05-07 15:23:40 +07:00 committed by Louis
parent 15f0b11c0d
commit 3f082372fd
No known key found for this signature in database
GPG Key ID: 44FA9F4D33C37DE2
6 changed files with 289 additions and 0 deletions

View File

@ -0,0 +1,42 @@
{
"name": "@janhq/llamacpp-extension",
"productName": "llama.cpp Inference Engine",
"version": "1.0.0",
"description": "This extension enables llama.cpp chat completion API calls",
"main": "dist/index.js",
"module": "dist/module.js",
"engine": "llama.cpp",
"author": "Jan <service@jan.ai>",
"license": "AGPL-3.0",
"scripts": {
"build": "rolldown -c rolldown.config.mjs",
"build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
},
"devDependencies": {
"cpx": "^1.5.0",
"rimraf": "^3.0.2",
"rolldown": "1.0.0-beta.1",
"ts-loader": "^9.5.0",
"typescript": "^5.7.2"
},
"dependencies": {
"@janhq/core": "../../core/package.tgz",
"fetch-retry": "^5.0.6",
"ulidx": "^2.3.0"
},
"engines": {
"node": ">=18.0.0"
},
"files": [
"dist/*",
"package.json",
"README.md"
],
"bundleDependencies": [
"fetch-retry"
],
"installConfig": {
"hoistingLimits": "workspaces"
},
"packageManager": "yarn@4.5.3"
}

View File

@ -0,0 +1,17 @@
import { defineConfig } from 'rolldown'
import pkgJson from './package.json' with { type: 'json' }
import settingJson from './settings.json' with { type: 'json' }
export default defineConfig({
input: 'src/index.ts',
output: {
format: 'esm',
file: 'dist/index.js',
},
platform: 'browser',
define: {
SETTINGS: JSON.stringify(settingJson),
ENGINE: JSON.stringify(pkgJson.engine),
},
})

View File

@ -0,0 +1,98 @@
[
{
"key": "port",
"title": "Port",
"description": "Port",
"controllerType": "input",
"controllerProps": {
"value": "8080",
"placeholder": "8080",
"type": "number",
"textAlign": "right"
}
},
{
"key": "cont_batching",
"title": "Continuous Batching",
"description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "n_parallel",
"title": "Parallel Operations",
"description": "Number of prompts that can be processed simultaneously by the model.",
"controllerType": "input",
"controllerProps": {
"value": "4",
"placeholder": "4",
"type": "number",
"textAlign": "right"
}
},
{
"key": "cpu_threads",
"title": "CPU Threads",
"description": "Number of CPU cores used for model processing when running without GPU.",
"controllerType": "input",
"controllerProps": {
"value": "",
"placeholder": "Number of CPU threads",
"type": "number",
"textAlign": "right"
}
},
{
"key": "flash_attn",
"title": "Flash Attention",
"description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "caching_enabled",
"title": "Caching",
"description": "Stores recent prompts and responses to improve speed when similar questions are asked.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "cache_type",
"title": "KV Cache Type",
"description": "Controls memory usage and precision trade-off.",
"controllerType": "dropdown",
"controllerProps": {
"value": "f16",
"options": [
{
"value": "q4_0",
"name": "q4_0"
},
{
"value": "q8_0",
"name": "q8_0"
},
{
"value": "f16",
"name": "f16"
}
]
}
},
{
"key": "use_mmap",
"title": "mmap",
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
}
]

View File

@ -0,0 +1,2 @@
declare const SETTINGS: SettingComponentProps[]
declare const ENGINE: string

View File

@ -0,0 +1,115 @@
/**
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
* @version 1.0.0
* @module llamacpp-extension/src/index
*/
import { RemoteOAIEngine, getJanDataFolderPath, fs, ModelCapability, Model } from '@janhq/core'
export enum Settings {
port = 'port',
}
/**
* A class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests.
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
*/
export default class LlamacppProvider extends RemoteOAIEngine {
inferenceUrl: string = ''
baseURL: string = ''
provider: string = ENGINE
override async onLoad(): Promise<void> {
super.onLoad()
// Register Settings
this.registerSettings(SETTINGS)
// register models
const models = await this.listModels()
this.registerModels(models)
// NOTE: port 0 may mean request free port from OS. we may want
// to take advantage of this. llama-server --port 0 on macOS works.
const port = await this.getSetting<number>(Settings.port, 0)
this.updateBaseUrl(port)
}
// onSettingUpdate<T>(key: string, value: T): void {
// if (key === Settings.apiKey) {
// this.apiKey = value as string
// } else if (key === Settings.baseUrl) {
// if (typeof value !== 'string') return
// this.updateBaseUrl(value)
// }
// }
updateBaseUrl(value: number): void {
if (value == 0) {
// set to default value
SETTINGS.forEach((setting) => {
if (setting.key === Settings.port) {
value = setting.controllerProps.value as number
}
})
}
this.baseURL = `http://127.0.0.1:${value}`
this.inferenceUrl = `${this.baseURL}/chat/completions`
}
async listModels(): Promise<Model[]> {
let modelIds = []
const modelsFolder = `${await getJanDataFolderPath()}/models`
// cortexso models
const cortexsoFolder = `${modelsFolder}/cortex.so`
const modelDirs = await fs.readdirSync(cortexsoFolder)
for (const modelDir of modelDirs) {
const modelName = modelDir.split('/').pop()
// TODO: try removing this check
// skip files start with . e.g. .DS_store
if (!modelName || modelName.startsWith('.')) continue
const variantDirs = await fs.readdirSync(modelDir)
for (const variantDir of variantDirs) {
// NOTE: we can't detect unfinished download here
const ggufPath = `${variantDir}/model.gguf`
if (await fs.existsSync(ggufPath)) {
const variantName = variantDir.split('/').pop()
modelIds.push(`${modelName}/${variantName}`)
}
}
}
// TODO: list models under huggingface.co
const models = modelIds.map((modelId) => {
return {
sources: [],
object: 'model',
version: '1.0',
format: 'api',
id: modelId,
name: modelId,
created: 0,
description: '',
settings: {},
parameters: {},
metadata: {
author: '',
tags: [],
size: 0,
},
engine: this.provider,
capabilities: [ModelCapability.completion],
}
})
return models
}
}

View File

@ -0,0 +1,15 @@
{
"compilerOptions": {
"target": "es2016",
"module": "ES6",
"moduleResolution": "node",
"outDir": "./dist",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": false,
"skipLibCheck": true,
"rootDir": "./src"
},
"include": ["./src"],
"exclude": ["**/*.test.ts"]
}