/** * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package. * The class provides methods for initializing and stopping a model, and for making inference requests. * It also subscribes to events emitted by the @janhq/core package and handles new message requests. * @version 1.0.0 * @module inference-extension/src/index */ import { Model, executeOnMain, systemInformation, joinPath, dirName, LocalOAIEngine, InferenceEngine, getJanDataFolderPath, extractModelLoadParams, } from '@janhq/core' import PQueue from 'p-queue' import ky from 'ky' /** * A class that implements the InferenceExtension interface from the @janhq/core package. * The class provides methods for initializing and stopping a model, and for making inference requests. * It also subscribes to events emitted by the @janhq/core package and handles new message requests. */ export default class JanInferenceCortexExtension extends LocalOAIEngine { // DEPRECATED nodeModule: string = 'node' queue = new PQueue({ concurrency: 1 }) provider: string = InferenceEngine.cortex /** * The URL for making inference requests. */ inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions` /** * Subscribes to events emitted by the @janhq/core package. */ async onLoad() { const models = MODELS as Model[] this.registerModels(models) super.onLoad() // Run the process watchdog const systemInfo = await systemInformation() await this.clean() await executeOnMain(NODE, 'run', systemInfo) this.queue.add(() => this.healthz()) window.addEventListener('beforeunload', () => { this.clean() }) } onUnload(): void { this.clean() executeOnMain(NODE, 'dispose') super.onUnload() } override async loadModel( model: Model & { file_path?: string } ): Promise { if ( model.engine === InferenceEngine.nitro && model.settings.llama_model_path ) { // Legacy chat model support model.settings = { ...model.settings, llama_model_path: await getModelFilePath( model, model.settings.llama_model_path ), } } else { const { llama_model_path, ...settings } = model.settings model.settings = settings } if (model.engine === InferenceEngine.nitro && model.settings.mmproj) { // Legacy clip vision model support model.settings = { ...model.settings, mmproj: await getModelFilePath(model, model.settings.mmproj), } } else { const { mmproj, ...settings } = model.settings model.settings = settings } return await ky .post(`${CORTEX_API_URL}/v1/models/start`, { json: { ...extractModelLoadParams(model.settings), model: model.id, engine: model.engine === InferenceEngine.nitro // Legacy model cache ? InferenceEngine.cortex_llamacpp : model.engine, }, }) .json() .catch(async (e) => { throw (await e.response?.json()) ?? e }) .then() } override async unloadModel(model: Model): Promise { return ky .post(`${CORTEX_API_URL}/v1/models/stop`, { json: { model: model.id }, }) .json() .then() } /** * Do health check on cortex.cpp * @returns */ healthz(): Promise { return ky .get(`${CORTEX_API_URL}/healthz`, { retry: { limit: 10, methods: ['get'], }, }) .then(() => {}) } /** * Clean cortex processes * @returns */ clean(): Promise { return ky .delete(`${CORTEX_API_URL}/processmanager/destroy`, { timeout: 2000, // maximum 2 seconds }) .catch(() => { // Do nothing }) } } /// Legacy export const getModelFilePath = async ( model: Model, file: string ): Promise => { // Symlink to the model file if (!model.sources[0]?.url.startsWith('http')) { return model.sources[0]?.url } return joinPath([await getJanDataFolderPath(), 'models', model.id, file]) } ///