add llamacpp-extension. can list some models
This commit is contained in:
parent
15f0b11c0d
commit
3f082372fd
42
extensions/llamacpp-extension/package.json
Normal file
42
extensions/llamacpp-extension/package.json
Normal file
@ -0,0 +1,42 @@
|
||||
{
|
||||
"name": "@janhq/llamacpp-extension",
|
||||
"productName": "llama.cpp Inference Engine",
|
||||
"version": "1.0.0",
|
||||
"description": "This extension enables llama.cpp chat completion API calls",
|
||||
"main": "dist/index.js",
|
||||
"module": "dist/module.js",
|
||||
"engine": "llama.cpp",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
"license": "AGPL-3.0",
|
||||
"scripts": {
|
||||
"build": "rolldown -c rolldown.config.mjs",
|
||||
"build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
|
||||
},
|
||||
"devDependencies": {
|
||||
"cpx": "^1.5.0",
|
||||
"rimraf": "^3.0.2",
|
||||
"rolldown": "1.0.0-beta.1",
|
||||
"ts-loader": "^9.5.0",
|
||||
"typescript": "^5.7.2"
|
||||
},
|
||||
"dependencies": {
|
||||
"@janhq/core": "../../core/package.tgz",
|
||||
"fetch-retry": "^5.0.6",
|
||||
"ulidx": "^2.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"files": [
|
||||
"dist/*",
|
||||
"package.json",
|
||||
"README.md"
|
||||
],
|
||||
"bundleDependencies": [
|
||||
"fetch-retry"
|
||||
],
|
||||
"installConfig": {
|
||||
"hoistingLimits": "workspaces"
|
||||
},
|
||||
"packageManager": "yarn@4.5.3"
|
||||
}
|
||||
17
extensions/llamacpp-extension/rolldown.config.mjs
Normal file
17
extensions/llamacpp-extension/rolldown.config.mjs
Normal file
@ -0,0 +1,17 @@
|
||||
|
||||
import { defineConfig } from 'rolldown'
|
||||
import pkgJson from './package.json' with { type: 'json' }
|
||||
import settingJson from './settings.json' with { type: 'json' }
|
||||
|
||||
export default defineConfig({
|
||||
input: 'src/index.ts',
|
||||
output: {
|
||||
format: 'esm',
|
||||
file: 'dist/index.js',
|
||||
},
|
||||
platform: 'browser',
|
||||
define: {
|
||||
SETTINGS: JSON.stringify(settingJson),
|
||||
ENGINE: JSON.stringify(pkgJson.engine),
|
||||
},
|
||||
})
|
||||
98
extensions/llamacpp-extension/settings.json
Normal file
98
extensions/llamacpp-extension/settings.json
Normal file
@ -0,0 +1,98 @@
|
||||
[
|
||||
{
|
||||
"key": "port",
|
||||
"title": "Port",
|
||||
"description": "Port",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": "8080",
|
||||
"placeholder": "8080",
|
||||
"type": "number",
|
||||
"textAlign": "right"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "cont_batching",
|
||||
"title": "Continuous Batching",
|
||||
"description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "n_parallel",
|
||||
"title": "Parallel Operations",
|
||||
"description": "Number of prompts that can be processed simultaneously by the model.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": "4",
|
||||
"placeholder": "4",
|
||||
"type": "number",
|
||||
"textAlign": "right"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "cpu_threads",
|
||||
"title": "CPU Threads",
|
||||
"description": "Number of CPU cores used for model processing when running without GPU.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": "",
|
||||
"placeholder": "Number of CPU threads",
|
||||
"type": "number",
|
||||
"textAlign": "right"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "flash_attn",
|
||||
"title": "Flash Attention",
|
||||
"description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": true
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"key": "caching_enabled",
|
||||
"title": "Caching",
|
||||
"description": "Stores recent prompts and responses to improve speed when similar questions are asked.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "cache_type",
|
||||
"title": "KV Cache Type",
|
||||
"description": "Controls memory usage and precision trade-off.",
|
||||
"controllerType": "dropdown",
|
||||
"controllerProps": {
|
||||
"value": "f16",
|
||||
"options": [
|
||||
{
|
||||
"value": "q4_0",
|
||||
"name": "q4_0"
|
||||
},
|
||||
{
|
||||
"value": "q8_0",
|
||||
"name": "q8_0"
|
||||
},
|
||||
{
|
||||
"value": "f16",
|
||||
"name": "f16"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "use_mmap",
|
||||
"title": "mmap",
|
||||
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": true
|
||||
}
|
||||
}
|
||||
]
|
||||
2
extensions/llamacpp-extension/src/env.d.ts
vendored
Normal file
2
extensions/llamacpp-extension/src/env.d.ts
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
declare const SETTINGS: SettingComponentProps[]
|
||||
declare const ENGINE: string
|
||||
115
extensions/llamacpp-extension/src/index.ts
Normal file
115
extensions/llamacpp-extension/src/index.ts
Normal file
@ -0,0 +1,115 @@
|
||||
/**
|
||||
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
* @version 1.0.0
|
||||
* @module llamacpp-extension/src/index
|
||||
*/
|
||||
|
||||
import { RemoteOAIEngine, getJanDataFolderPath, fs, ModelCapability, Model } from '@janhq/core'
|
||||
|
||||
export enum Settings {
|
||||
port = 'port',
|
||||
}
|
||||
|
||||
/**
|
||||
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
*/
|
||||
export default class LlamacppProvider extends RemoteOAIEngine {
|
||||
inferenceUrl: string = ''
|
||||
baseURL: string = ''
|
||||
provider: string = ENGINE
|
||||
|
||||
override async onLoad(): Promise<void> {
|
||||
super.onLoad()
|
||||
|
||||
// Register Settings
|
||||
this.registerSettings(SETTINGS)
|
||||
|
||||
// register models
|
||||
const models = await this.listModels()
|
||||
this.registerModels(models)
|
||||
|
||||
// NOTE: port 0 may mean request free port from OS. we may want
|
||||
// to take advantage of this. llama-server --port 0 on macOS works.
|
||||
const port = await this.getSetting<number>(Settings.port, 0)
|
||||
this.updateBaseUrl(port)
|
||||
}
|
||||
|
||||
// onSettingUpdate<T>(key: string, value: T): void {
|
||||
// if (key === Settings.apiKey) {
|
||||
// this.apiKey = value as string
|
||||
// } else if (key === Settings.baseUrl) {
|
||||
// if (typeof value !== 'string') return
|
||||
// this.updateBaseUrl(value)
|
||||
// }
|
||||
// }
|
||||
|
||||
updateBaseUrl(value: number): void {
|
||||
if (value == 0) {
|
||||
// set to default value
|
||||
SETTINGS.forEach((setting) => {
|
||||
if (setting.key === Settings.port) {
|
||||
value = setting.controllerProps.value as number
|
||||
}
|
||||
})
|
||||
}
|
||||
this.baseURL = `http://127.0.0.1:${value}`
|
||||
this.inferenceUrl = `${this.baseURL}/chat/completions`
|
||||
}
|
||||
|
||||
async listModels(): Promise<Model[]> {
|
||||
let modelIds = []
|
||||
|
||||
const modelsFolder = `${await getJanDataFolderPath()}/models`
|
||||
|
||||
// cortexso models
|
||||
const cortexsoFolder = `${modelsFolder}/cortex.so`
|
||||
const modelDirs = await fs.readdirSync(cortexsoFolder)
|
||||
for (const modelDir of modelDirs) {
|
||||
const modelName = modelDir.split('/').pop()
|
||||
|
||||
// TODO: try removing this check
|
||||
// skip files start with . e.g. .DS_store
|
||||
if (!modelName || modelName.startsWith('.')) continue
|
||||
|
||||
const variantDirs = await fs.readdirSync(modelDir)
|
||||
for (const variantDir of variantDirs) {
|
||||
// NOTE: we can't detect unfinished download here
|
||||
const ggufPath = `${variantDir}/model.gguf`
|
||||
|
||||
if (await fs.existsSync(ggufPath)) {
|
||||
const variantName = variantDir.split('/').pop()
|
||||
modelIds.push(`${modelName}/${variantName}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: list models under huggingface.co
|
||||
|
||||
const models = modelIds.map((modelId) => {
|
||||
return {
|
||||
sources: [],
|
||||
object: 'model',
|
||||
version: '1.0',
|
||||
format: 'api',
|
||||
id: modelId,
|
||||
name: modelId,
|
||||
created: 0,
|
||||
description: '',
|
||||
settings: {},
|
||||
parameters: {},
|
||||
metadata: {
|
||||
author: '',
|
||||
tags: [],
|
||||
size: 0,
|
||||
},
|
||||
engine: this.provider,
|
||||
capabilities: [ModelCapability.completion],
|
||||
}
|
||||
})
|
||||
return models
|
||||
}
|
||||
}
|
||||
15
extensions/llamacpp-extension/tsconfig.json
Normal file
15
extensions/llamacpp-extension/tsconfig.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "es2016",
|
||||
"module": "ES6",
|
||||
"moduleResolution": "node",
|
||||
"outDir": "./dist",
|
||||
"esModuleInterop": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"strict": false,
|
||||
"skipLibCheck": true,
|
||||
"rootDir": "./src"
|
||||
},
|
||||
"include": ["./src"],
|
||||
"exclude": ["**/*.test.ts"]
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user