add llamacpp-extension. can list some models
This commit is contained in:
parent
15f0b11c0d
commit
3f082372fd
42
extensions/llamacpp-extension/package.json
Normal file
42
extensions/llamacpp-extension/package.json
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
{
|
||||||
|
"name": "@janhq/llamacpp-extension",
|
||||||
|
"productName": "llama.cpp Inference Engine",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "This extension enables llama.cpp chat completion API calls",
|
||||||
|
"main": "dist/index.js",
|
||||||
|
"module": "dist/module.js",
|
||||||
|
"engine": "llama.cpp",
|
||||||
|
"author": "Jan <service@jan.ai>",
|
||||||
|
"license": "AGPL-3.0",
|
||||||
|
"scripts": {
|
||||||
|
"build": "rolldown -c rolldown.config.mjs",
|
||||||
|
"build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"cpx": "^1.5.0",
|
||||||
|
"rimraf": "^3.0.2",
|
||||||
|
"rolldown": "1.0.0-beta.1",
|
||||||
|
"ts-loader": "^9.5.0",
|
||||||
|
"typescript": "^5.7.2"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@janhq/core": "../../core/package.tgz",
|
||||||
|
"fetch-retry": "^5.0.6",
|
||||||
|
"ulidx": "^2.3.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.0.0"
|
||||||
|
},
|
||||||
|
"files": [
|
||||||
|
"dist/*",
|
||||||
|
"package.json",
|
||||||
|
"README.md"
|
||||||
|
],
|
||||||
|
"bundleDependencies": [
|
||||||
|
"fetch-retry"
|
||||||
|
],
|
||||||
|
"installConfig": {
|
||||||
|
"hoistingLimits": "workspaces"
|
||||||
|
},
|
||||||
|
"packageManager": "yarn@4.5.3"
|
||||||
|
}
|
||||||
17
extensions/llamacpp-extension/rolldown.config.mjs
Normal file
17
extensions/llamacpp-extension/rolldown.config.mjs
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
import { defineConfig } from 'rolldown'
|
||||||
|
import pkgJson from './package.json' with { type: 'json' }
|
||||||
|
import settingJson from './settings.json' with { type: 'json' }
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
input: 'src/index.ts',
|
||||||
|
output: {
|
||||||
|
format: 'esm',
|
||||||
|
file: 'dist/index.js',
|
||||||
|
},
|
||||||
|
platform: 'browser',
|
||||||
|
define: {
|
||||||
|
SETTINGS: JSON.stringify(settingJson),
|
||||||
|
ENGINE: JSON.stringify(pkgJson.engine),
|
||||||
|
},
|
||||||
|
})
|
||||||
98
extensions/llamacpp-extension/settings.json
Normal file
98
extensions/llamacpp-extension/settings.json
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"key": "port",
|
||||||
|
"title": "Port",
|
||||||
|
"description": "Port",
|
||||||
|
"controllerType": "input",
|
||||||
|
"controllerProps": {
|
||||||
|
"value": "8080",
|
||||||
|
"placeholder": "8080",
|
||||||
|
"type": "number",
|
||||||
|
"textAlign": "right"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "cont_batching",
|
||||||
|
"title": "Continuous Batching",
|
||||||
|
"description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
|
||||||
|
"controllerType": "checkbox",
|
||||||
|
"controllerProps": {
|
||||||
|
"value": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "n_parallel",
|
||||||
|
"title": "Parallel Operations",
|
||||||
|
"description": "Number of prompts that can be processed simultaneously by the model.",
|
||||||
|
"controllerType": "input",
|
||||||
|
"controllerProps": {
|
||||||
|
"value": "4",
|
||||||
|
"placeholder": "4",
|
||||||
|
"type": "number",
|
||||||
|
"textAlign": "right"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "cpu_threads",
|
||||||
|
"title": "CPU Threads",
|
||||||
|
"description": "Number of CPU cores used for model processing when running without GPU.",
|
||||||
|
"controllerType": "input",
|
||||||
|
"controllerProps": {
|
||||||
|
"value": "",
|
||||||
|
"placeholder": "Number of CPU threads",
|
||||||
|
"type": "number",
|
||||||
|
"textAlign": "right"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "flash_attn",
|
||||||
|
"title": "Flash Attention",
|
||||||
|
"description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
|
||||||
|
"controllerType": "checkbox",
|
||||||
|
"controllerProps": {
|
||||||
|
"value": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"key": "caching_enabled",
|
||||||
|
"title": "Caching",
|
||||||
|
"description": "Stores recent prompts and responses to improve speed when similar questions are asked.",
|
||||||
|
"controllerType": "checkbox",
|
||||||
|
"controllerProps": {
|
||||||
|
"value": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "cache_type",
|
||||||
|
"title": "KV Cache Type",
|
||||||
|
"description": "Controls memory usage and precision trade-off.",
|
||||||
|
"controllerType": "dropdown",
|
||||||
|
"controllerProps": {
|
||||||
|
"value": "f16",
|
||||||
|
"options": [
|
||||||
|
{
|
||||||
|
"value": "q4_0",
|
||||||
|
"name": "q4_0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"value": "q8_0",
|
||||||
|
"name": "q8_0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"value": "f16",
|
||||||
|
"name": "f16"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "use_mmap",
|
||||||
|
"title": "mmap",
|
||||||
|
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
|
||||||
|
"controllerType": "checkbox",
|
||||||
|
"controllerProps": {
|
||||||
|
"value": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
2
extensions/llamacpp-extension/src/env.d.ts
vendored
Normal file
2
extensions/llamacpp-extension/src/env.d.ts
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
declare const SETTINGS: SettingComponentProps[]
|
||||||
|
declare const ENGINE: string
|
||||||
115
extensions/llamacpp-extension/src/index.ts
Normal file
115
extensions/llamacpp-extension/src/index.ts
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
/**
|
||||||
|
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
|
||||||
|
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||||
|
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||||
|
* @version 1.0.0
|
||||||
|
* @module llamacpp-extension/src/index
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { RemoteOAIEngine, getJanDataFolderPath, fs, ModelCapability, Model } from '@janhq/core'
|
||||||
|
|
||||||
|
export enum Settings {
|
||||||
|
port = 'port',
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
||||||
|
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||||
|
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||||
|
*/
|
||||||
|
export default class LlamacppProvider extends RemoteOAIEngine {
|
||||||
|
inferenceUrl: string = ''
|
||||||
|
baseURL: string = ''
|
||||||
|
provider: string = ENGINE
|
||||||
|
|
||||||
|
override async onLoad(): Promise<void> {
|
||||||
|
super.onLoad()
|
||||||
|
|
||||||
|
// Register Settings
|
||||||
|
this.registerSettings(SETTINGS)
|
||||||
|
|
||||||
|
// register models
|
||||||
|
const models = await this.listModels()
|
||||||
|
this.registerModels(models)
|
||||||
|
|
||||||
|
// NOTE: port 0 may mean request free port from OS. we may want
|
||||||
|
// to take advantage of this. llama-server --port 0 on macOS works.
|
||||||
|
const port = await this.getSetting<number>(Settings.port, 0)
|
||||||
|
this.updateBaseUrl(port)
|
||||||
|
}
|
||||||
|
|
||||||
|
// onSettingUpdate<T>(key: string, value: T): void {
|
||||||
|
// if (key === Settings.apiKey) {
|
||||||
|
// this.apiKey = value as string
|
||||||
|
// } else if (key === Settings.baseUrl) {
|
||||||
|
// if (typeof value !== 'string') return
|
||||||
|
// this.updateBaseUrl(value)
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
updateBaseUrl(value: number): void {
|
||||||
|
if (value == 0) {
|
||||||
|
// set to default value
|
||||||
|
SETTINGS.forEach((setting) => {
|
||||||
|
if (setting.key === Settings.port) {
|
||||||
|
value = setting.controllerProps.value as number
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
this.baseURL = `http://127.0.0.1:${value}`
|
||||||
|
this.inferenceUrl = `${this.baseURL}/chat/completions`
|
||||||
|
}
|
||||||
|
|
||||||
|
async listModels(): Promise<Model[]> {
|
||||||
|
let modelIds = []
|
||||||
|
|
||||||
|
const modelsFolder = `${await getJanDataFolderPath()}/models`
|
||||||
|
|
||||||
|
// cortexso models
|
||||||
|
const cortexsoFolder = `${modelsFolder}/cortex.so`
|
||||||
|
const modelDirs = await fs.readdirSync(cortexsoFolder)
|
||||||
|
for (const modelDir of modelDirs) {
|
||||||
|
const modelName = modelDir.split('/').pop()
|
||||||
|
|
||||||
|
// TODO: try removing this check
|
||||||
|
// skip files start with . e.g. .DS_store
|
||||||
|
if (!modelName || modelName.startsWith('.')) continue
|
||||||
|
|
||||||
|
const variantDirs = await fs.readdirSync(modelDir)
|
||||||
|
for (const variantDir of variantDirs) {
|
||||||
|
// NOTE: we can't detect unfinished download here
|
||||||
|
const ggufPath = `${variantDir}/model.gguf`
|
||||||
|
|
||||||
|
if (await fs.existsSync(ggufPath)) {
|
||||||
|
const variantName = variantDir.split('/').pop()
|
||||||
|
modelIds.push(`${modelName}/${variantName}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: list models under huggingface.co
|
||||||
|
|
||||||
|
const models = modelIds.map((modelId) => {
|
||||||
|
return {
|
||||||
|
sources: [],
|
||||||
|
object: 'model',
|
||||||
|
version: '1.0',
|
||||||
|
format: 'api',
|
||||||
|
id: modelId,
|
||||||
|
name: modelId,
|
||||||
|
created: 0,
|
||||||
|
description: '',
|
||||||
|
settings: {},
|
||||||
|
parameters: {},
|
||||||
|
metadata: {
|
||||||
|
author: '',
|
||||||
|
tags: [],
|
||||||
|
size: 0,
|
||||||
|
},
|
||||||
|
engine: this.provider,
|
||||||
|
capabilities: [ModelCapability.completion],
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return models
|
||||||
|
}
|
||||||
|
}
|
||||||
15
extensions/llamacpp-extension/tsconfig.json
Normal file
15
extensions/llamacpp-extension/tsconfig.json
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "es2016",
|
||||||
|
"module": "ES6",
|
||||||
|
"moduleResolution": "node",
|
||||||
|
"outDir": "./dist",
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"forceConsistentCasingInFileNames": true,
|
||||||
|
"strict": false,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"rootDir": "./src"
|
||||||
|
},
|
||||||
|
"include": ["./src"],
|
||||||
|
"exclude": ["**/*.test.ts"]
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user