add llamacpp-extension. can list some models

2025-05-07 15:23:40 +07:00 · 2025-05-07 15:23:40 +07:00 · 3f082372fd
commit 3f082372fd
parent 15f0b11c0d
6 changed files with 289 additions and 0 deletions
--- a/extensions/llamacpp-extension/package.json
+++ b/extensions/llamacpp-extension/package.json
@ -0,0 +1,42 @@
+{
+  "name": "@janhq/llamacpp-extension",
+  "productName": "llama.cpp Inference Engine",
+  "version": "1.0.0",
+  "description": "This extension enables llama.cpp chat completion API calls",
+  "main": "dist/index.js",
+  "module": "dist/module.js",
+  "engine": "llama.cpp",
+  "author": "Jan <service@jan.ai>",
+  "license": "AGPL-3.0",
+  "scripts": {
+    "build": "rolldown -c rolldown.config.mjs",
+    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
+  },
+  "devDependencies": {
+    "cpx": "^1.5.0",
+    "rimraf": "^3.0.2",
+    "rolldown": "1.0.0-beta.1",
+    "ts-loader": "^9.5.0",
+    "typescript": "^5.7.2"
+  },
+  "dependencies": {
+    "@janhq/core": "../../core/package.tgz",
+    "fetch-retry": "^5.0.6",
+    "ulidx": "^2.3.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "files": [
+    "dist/*",
+    "package.json",
+    "README.md"
+  ],
+  "bundleDependencies": [
+    "fetch-retry"
+  ],
+  "installConfig": {
+    "hoistingLimits": "workspaces"
+  },
+  "packageManager": "yarn@4.5.3"
+}
--- a/extensions/llamacpp-extension/rolldown.config.mjs
+++ b/extensions/llamacpp-extension/rolldown.config.mjs
@ -0,0 +1,17 @@
+
+import { defineConfig } from 'rolldown'
+import pkgJson from './package.json' with { type: 'json' }
+import settingJson from './settings.json' with { type: 'json' }
+
+export default defineConfig({
+  input: 'src/index.ts',
+  output: {
+    format: 'esm',
+    file: 'dist/index.js',
+  },
+  platform: 'browser',
+  define: {
+    SETTINGS: JSON.stringify(settingJson),
+    ENGINE: JSON.stringify(pkgJson.engine),
+  },
+})
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@ -0,0 +1,98 @@
+[
+  {
+    "key": "port",
+    "title": "Port",
+    "description": "Port",
+    "controllerType": "input",
+    "controllerProps": {
+      "value": "8080",
+      "placeholder": "8080",
+      "type": "number",
+      "textAlign": "right"
+    }
+  },
+  {
+    "key": "cont_batching",
+    "title": "Continuous Batching",
+    "description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
+    "controllerType": "checkbox",
+    "controllerProps": {
+      "value": true
+    }
+  },
+  {
+    "key": "n_parallel",
+    "title": "Parallel Operations",
+    "description": "Number of prompts that can be processed simultaneously by the model.",
+    "controllerType": "input",
+    "controllerProps": {
+      "value": "4",
+      "placeholder": "4",
+      "type": "number",
+      "textAlign": "right"
+    }
+  },
+  {
+    "key": "cpu_threads",
+    "title": "CPU Threads",
+    "description": "Number of CPU cores used for model processing when running without GPU.",
+    "controllerType": "input",
+    "controllerProps": {
+      "value": "",
+      "placeholder": "Number of CPU threads",
+      "type": "number",
+      "textAlign": "right"
+    }
+  },
+  {
+    "key": "flash_attn",
+    "title": "Flash Attention",
+    "description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
+    "controllerType": "checkbox",
+    "controllerProps": {
+      "value": true
+    }
+  },
+
+  {
+    "key": "caching_enabled",
+    "title": "Caching",
+    "description": "Stores recent prompts and responses to improve speed when similar questions are asked.",
+    "controllerType": "checkbox",
+    "controllerProps": {
+      "value": true
+    }
+  },
+  {
+    "key": "cache_type",
+    "title": "KV Cache Type",
+    "description": "Controls memory usage and precision trade-off.",
+    "controllerType": "dropdown",
+    "controllerProps": {
+      "value": "f16",
+      "options": [
+        {
+          "value": "q4_0",
+          "name": "q4_0"
+        },
+        {
+          "value": "q8_0",
+          "name": "q8_0"
+        },
+        {
+          "value": "f16",
+          "name": "f16"
+        }
+      ]
+    }
+  },
+  {
+    "key": "use_mmap",
+    "title": "mmap",
+    "description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
+    "controllerType": "checkbox",
+    "controllerProps": {
+      "value": true
+    }
+  }
+]
--- a/extensions/llamacpp-extension/src/env.d.ts
+++ b/extensions/llamacpp-extension/src/env.d.ts
@ -0,0 +1,2 @@
+declare const SETTINGS: SettingComponentProps[]
+declare const ENGINE: string
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -0,0 +1,115 @@
+/**
+ * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ * @version 1.0.0
+ * @module llamacpp-extension/src/index
+ */
+
+import { RemoteOAIEngine, getJanDataFolderPath, fs, ModelCapability, Model } from '@janhq/core'
+
+export enum Settings {
+  port = 'port',
+}
+
+/**
+ * A class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ */
+export default class LlamacppProvider extends RemoteOAIEngine {
+  inferenceUrl: string = ''
+  baseURL: string = ''
+  provider: string = ENGINE
+
+  override async onLoad(): Promise<void> {
+    super.onLoad()
+
+    // Register Settings
+    this.registerSettings(SETTINGS)
+
+    // register models
+    const models = await this.listModels()
+    this.registerModels(models)
+
+    // NOTE: port 0 may mean request free port from OS. we may want
+    // to take advantage of this. llama-server --port 0 on macOS works.
+    const port = await this.getSetting<number>(Settings.port, 0)
+    this.updateBaseUrl(port)
+  }
+
+  // onSettingUpdate<T>(key: string, value: T): void {
+  //   if (key === Settings.apiKey) {
+  //     this.apiKey = value as string
+  //   } else if (key === Settings.baseUrl) {
+  //     if (typeof value !== 'string') return
+  //     this.updateBaseUrl(value)
+  //   }
+  // }
+
+  updateBaseUrl(value: number): void {
+    if (value == 0) {
+      // set to default value
+      SETTINGS.forEach((setting) => {
+        if (setting.key === Settings.port) {
+          value = setting.controllerProps.value as number
+        }
+      })
+    }
+    this.baseURL = `http://127.0.0.1:${value}`
+    this.inferenceUrl = `${this.baseURL}/chat/completions`
+  }
+
+  async listModels(): Promise<Model[]> {
+    let modelIds = []
+
+    const modelsFolder = `${await getJanDataFolderPath()}/models`
+
+    // cortexso models
+    const cortexsoFolder = `${modelsFolder}/cortex.so`
+    const modelDirs = await fs.readdirSync(cortexsoFolder)
+    for (const modelDir of modelDirs) {
+      const modelName = modelDir.split('/').pop()
+
+      // TODO: try removing this check
+      // skip files start with . e.g. .DS_store
+      if (!modelName || modelName.startsWith('.')) continue
+
+      const variantDirs = await fs.readdirSync(modelDir)
+      for (const variantDir of variantDirs) {
+        // NOTE: we can't detect unfinished download here
+        const ggufPath = `${variantDir}/model.gguf`
+
+        if (await fs.existsSync(ggufPath)) {
+          const variantName = variantDir.split('/').pop()
+          modelIds.push(`${modelName}/${variantName}`)
+        }
+      }
+    }
+
+    // TODO: list models under huggingface.co
+
+    const models = modelIds.map((modelId) => {
+      return {
+        sources: [],
+        object: 'model',
+        version: '1.0',
+        format: 'api',
+        id: modelId,
+        name: modelId,
+        created: 0,
+        description: '',
+        settings: {},
+        parameters: {},
+        metadata: {
+          author: '',
+          tags: [],
+          size: 0,
+        },
+        engine: this.provider,
+        capabilities: [ModelCapability.completion],
+      }
+    })
+    return models
+  }
+}
--- a/extensions/llamacpp-extension/tsconfig.json
+++ b/extensions/llamacpp-extension/tsconfig.json
@ -0,0 +1,15 @@
+{
+  "compilerOptions": {
+    "target": "es2016",
+    "module": "ES6",
+    "moduleResolution": "node",
+    "outDir": "./dist",
+    "esModuleInterop": true,
+    "forceConsistentCasingInFileNames": true,
+    "strict": false,
+    "skipLibCheck": true,
+    "rootDir": "./src"
+  },
+  "include": ["./src"],
+  "exclude": ["**/*.test.ts"]
+}