diff --git a/core/src/browser/extension.ts b/core/src/browser/extension.ts index 603445745..d934e1c06 100644 --- a/core/src/browser/extension.ts +++ b/core/src/browser/extension.ts @@ -1,6 +1,8 @@ -import { SettingComponentProps } from '../types' +import { Model, ModelEvent, SettingComponentProps } from '../types' import { getJanDataFolderPath, joinPath } from './core' +import { events } from './events' import { fs } from './fs' +import { ModelManager } from './models' export enum ExtensionTypeEnum { Assistant = 'assistant', @@ -103,6 +105,22 @@ export abstract class BaseExtension implements ExtensionType { return undefined } + /** + * Registers models - it persists in-memory shared ModelManager instance's data map. + * @param models + */ + async registerModels(models: Model[]): Promise { + for (const model of models) { + ModelManager.instance().register(model) + } + events.emit(ModelEvent.OnModelsUpdate, {}) + } + + /** + * Register settings for the extension. + * @param settings + * @returns + */ async registerSettings(settings: SettingComponentProps[]): Promise { if (!this.name) { console.error('Extension name is not defined') @@ -139,6 +157,12 @@ export abstract class BaseExtension implements ExtensionType { } } + /** + * Get the setting value for the key. + * @param key + * @param defaultValue + * @returns + */ async getSetting(key: string, defaultValue: T) { const keySetting = (await this.getSettings()).find((setting) => setting.key === key) @@ -168,6 +192,10 @@ export abstract class BaseExtension implements ExtensionType { return } + /** + * Get the settings for the extension. + * @returns + */ async getSettings(): Promise { if (!this.name) return [] @@ -189,6 +217,11 @@ export abstract class BaseExtension implements ExtensionType { } } + /** + * Update the settings for the extension. + * @param componentProps + * @returns + */ async updateSettings(componentProps: Partial[]): Promise { if (!this.name) return diff --git a/core/src/browser/extensions/engines/AIEngine.test.ts b/core/src/browser/extensions/engines/AIEngine.test.ts index 59dad280f..ab3280e1c 100644 --- a/core/src/browser/extensions/engines/AIEngine.test.ts +++ b/core/src/browser/extensions/engines/AIEngine.test.ts @@ -1,8 +1,6 @@ import { AIEngine } from './AIEngine' import { events } from '../../events' -import { ModelEvent, Model, ModelFile, InferenceEngine } from '../../../types' -import { EngineManager } from './EngineManager' -import { fs } from '../../fs' +import { ModelEvent, Model } from '../../../types' jest.mock('../../events') jest.mock('./EngineManager') @@ -26,7 +24,7 @@ describe('AIEngine', () => { }) it('should load model if provider matches', async () => { - const model: ModelFile = { id: 'model1', engine: 'test-provider' } as any + const model: any = { id: 'model1', engine: 'test-provider' } as any await engine.loadModel(model) @@ -34,7 +32,7 @@ describe('AIEngine', () => { }) it('should not load model if provider does not match', async () => { - const model: ModelFile = { id: 'model1', engine: 'other-provider' } as any + const model: any = { id: 'model1', engine: 'other-provider' } as any await engine.loadModel(model) diff --git a/core/src/browser/extensions/engines/AIEngine.ts b/core/src/browser/extensions/engines/AIEngine.ts index 75354de88..d0528b0ab 100644 --- a/core/src/browser/extensions/engines/AIEngine.ts +++ b/core/src/browser/extensions/engines/AIEngine.ts @@ -1,17 +1,14 @@ -import { getJanDataFolderPath, joinPath } from '../../core' import { events } from '../../events' import { BaseExtension } from '../../extension' -import { fs } from '../../fs' -import { MessageRequest, Model, ModelEvent, ModelFile } from '../../../types' +import { MessageRequest, Model, ModelEvent } from '../../../types' import { EngineManager } from './EngineManager' +import { ModelManager } from '../../models/manager' /** * Base AIEngine * Applicable to all AI Engines */ export abstract class AIEngine extends BaseExtension { - private static modelsFolder = 'models' - // The inference engine abstract provider: string @@ -21,7 +18,7 @@ export abstract class AIEngine extends BaseExtension { override onLoad() { this.registerEngine() - events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model)) + events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model)) events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model)) } @@ -32,53 +29,10 @@ export abstract class AIEngine extends BaseExtension { EngineManager.instance().register(this) } - async registerModels(models: Model[]): Promise { - const modelFolderPath = await joinPath([await getJanDataFolderPath(), AIEngine.modelsFolder]) - - let shouldNotifyModelUpdate = false - for (const model of models) { - const modelPath = await joinPath([modelFolderPath, model.id]) - const isExist = await fs.existsSync(modelPath) - - if (isExist) { - await this.migrateModelIfNeeded(model, modelPath) - continue - } - - await fs.mkdir(modelPath) - await fs.writeFileSync( - await joinPath([modelPath, 'model.json']), - JSON.stringify(model, null, 2) - ) - shouldNotifyModelUpdate = true - } - - if (shouldNotifyModelUpdate) { - events.emit(ModelEvent.OnModelsUpdate, {}) - } - } - - async migrateModelIfNeeded(model: Model, modelPath: string): Promise { - try { - const modelJson = await fs.readFileSync(await joinPath([modelPath, 'model.json']), 'utf-8') - const currentModel: Model = JSON.parse(modelJson) - if (currentModel.version !== model.version) { - await fs.writeFileSync( - await joinPath([modelPath, 'model.json']), - JSON.stringify(model, null, 2) - ) - - events.emit(ModelEvent.OnModelsUpdate, {}) - } - } catch (error) { - console.warn('Error while try to migrating model', error) - } - } - /** * Loads the model. */ - async loadModel(model: ModelFile): Promise { + async loadModel(model: Model): Promise { if (model.engine.toString() !== this.provider) return Promise.resolve() events.emit(ModelEvent.OnModelReady, model) return Promise.resolve() diff --git a/core/src/browser/extensions/engines/EngineManager.ts b/core/src/browser/extensions/engines/EngineManager.ts index 2980c5c65..90ce75ac5 100644 --- a/core/src/browser/extensions/engines/EngineManager.ts +++ b/core/src/browser/extensions/engines/EngineManager.ts @@ -1,3 +1,4 @@ +import { InferenceEngine } from '../../../types' import { AIEngine } from './AIEngine' /** @@ -20,6 +21,22 @@ export class EngineManager { * @returns The engine, if found. */ get(provider: string): T | undefined { + // Backward compatible provider + // nitro is migrated to cortex + if ( + [ + InferenceEngine.nitro, + InferenceEngine.cortex, + InferenceEngine.cortex_llamacpp, + InferenceEngine.cortex_onnx, + InferenceEngine.cortex_tensorrtllm, + InferenceEngine.cortex_onnx, + ] + .map((e) => e.toString()) + .includes(provider) + ) + provider = InferenceEngine.cortex + return this.engines.get(provider) as T | undefined } @@ -27,6 +44,6 @@ export class EngineManager { * The instance of the engine manager. */ static instance(): EngineManager { - return window.core?.engineManager as EngineManager ?? new EngineManager() + return (window.core?.engineManager as EngineManager) ?? new EngineManager() } } diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts index 4ae81496f..4a36f6b12 100644 --- a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts +++ b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts @@ -3,7 +3,7 @@ */ import { LocalOAIEngine } from './LocalOAIEngine' import { events } from '../../events' -import { ModelEvent, ModelFile, Model } from '../../../types' +import { ModelEvent, Model } from '../../../types' import { executeOnMain, systemInformation, dirName } from '../../core' jest.mock('../../core', () => ({ @@ -43,7 +43,7 @@ describe('LocalOAIEngine', () => { }) it('should load model correctly', async () => { - const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any + const model: Model = { engine: 'testProvider', file_path: 'path/to/model' } as any const modelFolder = 'path/to' const systemInfo = { os: 'testOS' } const res = { error: null } @@ -54,7 +54,6 @@ describe('LocalOAIEngine', () => { await engine.loadModel(model) - expect(dirName).toHaveBeenCalledWith(model.file_path) expect(systemInformation).toHaveBeenCalled() expect(executeOnMain).toHaveBeenCalledWith( engine.nodeModule, @@ -66,7 +65,7 @@ describe('LocalOAIEngine', () => { }) it('should handle load model error', async () => { - const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any + const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any const modelFolder = 'path/to' const systemInfo = { os: 'testOS' } const res = { error: 'load error' } diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.ts b/core/src/browser/extensions/engines/LocalOAIEngine.ts index 123b9a593..6c70fa186 100644 --- a/core/src/browser/extensions/engines/LocalOAIEngine.ts +++ b/core/src/browser/extensions/engines/LocalOAIEngine.ts @@ -1,6 +1,6 @@ import { executeOnMain, systemInformation, dirName } from '../../core' import { events } from '../../events' -import { Model, ModelEvent, ModelFile } from '../../../types' +import { Model, ModelEvent } from '../../../types' import { OAIEngine } from './OAIEngine' /** @@ -22,35 +22,36 @@ export abstract class LocalOAIEngine extends OAIEngine { override onLoad() { super.onLoad() // These events are applicable to local inference providers - events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model)) + events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model)) events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model)) } /** * Load the model. */ - override async loadModel(model: ModelFile): Promise { + override async loadModel(model: Model): Promise { if (model.engine.toString() !== this.provider) return - const modelFolder = await dirName(model.file_path) - const systemInfo = await systemInformation() - const res = await executeOnMain( - this.nodeModule, - this.loadModelFunctionName, - { - modelFolder, - model, - }, - systemInfo - ) + // const modelFolder = await dirName(model.file_path) + // const systemInfo = await systemInformation() + // const res = await executeOnMain( + // this.nodeModule, + // this.loadModelFunctionName, + // { + // modelFolder, + // model, + // }, + // systemInfo + // ) - if (res?.error) { - events.emit(ModelEvent.OnModelFail, { error: res.error }) - return Promise.reject(res.error) - } else { - this.loadedModel = model - events.emit(ModelEvent.OnModelReady, model) - return Promise.resolve() - } + // if (res?.error) { + // events.emit(ModelEvent.OnModelFail, { error: res.error }) + // return Promise.reject(res.error) + // } else { + // this.loadedModel = model + // events.emit(ModelEvent.OnModelReady, model) + // return Promise.resolve() + // } + return Promise.resolve() } /** * Stops the model. diff --git a/core/src/browser/extensions/engines/OAIEngine.ts b/core/src/browser/extensions/engines/OAIEngine.ts index a8dde4677..694a87264 100644 --- a/core/src/browser/extensions/engines/OAIEngine.ts +++ b/core/src/browser/extensions/engines/OAIEngine.ts @@ -55,7 +55,21 @@ export abstract class OAIEngine extends AIEngine { * Inference request */ override async inference(data: MessageRequest) { - if (data.model?.engine?.toString() !== this.provider) return + if (!data.model?.id) { + events.emit(MessageEvent.OnMessageResponse, { + status: MessageStatus.Error, + content: [ + { + type: ContentType.Text, + text: { + value: 'No model ID provided', + annotations: [], + }, + }, + ], + }) + return + } const timestamp = Date.now() const message: ThreadMessage = { @@ -89,7 +103,6 @@ export abstract class OAIEngine extends AIEngine { model: model.id, stream: true, ...model.parameters, - ...(this.provider === 'nitro' ? { engine: 'cortex.llamacpp'} : {}), } if (this.transformPayload) { requestBody = this.transformPayload(requestBody) diff --git a/core/src/browser/extensions/engines/helpers/sse.ts b/core/src/browser/extensions/engines/helpers/sse.ts index 024ced470..9713256b3 100644 --- a/core/src/browser/extensions/engines/helpers/sse.ts +++ b/core/src/browser/extensions/engines/helpers/sse.ts @@ -10,7 +10,7 @@ export function requestInference( requestBody: any, model: { id: string - parameters: ModelRuntimeParams + parameters?: ModelRuntimeParams }, controller?: AbortController, headers?: HeadersInit, @@ -22,7 +22,7 @@ export function requestInference( headers: { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*', - 'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json', + 'Accept': model.parameters?.stream ? 'text/event-stream' : 'application/json', ...headers, }, body: JSON.stringify(requestBody), @@ -45,7 +45,7 @@ export function requestInference( subscriber.complete() return } - if (model.parameters.stream === false) { + if (model.parameters?.stream === false) { const data = await response.json() if (transformResponse) { subscriber.next(transformResponse(data)) diff --git a/core/src/browser/extensions/model.ts b/core/src/browser/extensions/model.ts index 040542927..d111c1d3a 100644 --- a/core/src/browser/extensions/model.ts +++ b/core/src/browser/extensions/model.ts @@ -1,13 +1,5 @@ import { BaseExtension, ExtensionTypeEnum } from '../extension' -import { - GpuSetting, - HuggingFaceRepoData, - ImportingModel, - Model, - ModelFile, - ModelInterface, - OptionType, -} from '../../types' +import { Model, ModelInterface, OptionType } from '../../types' /** * Model extension for managing models. @@ -20,17 +12,10 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter return ExtensionTypeEnum.Model } - abstract downloadModel( - model: Model, - gpuSettings?: GpuSetting, - network?: { proxy: string; ignoreSSL?: boolean } - ): Promise - abstract cancelModelDownload(modelId: string): Promise - abstract deleteModel(model: ModelFile): Promise - abstract getDownloadedModels(): Promise - abstract getConfiguredModels(): Promise - abstract importModels(models: ImportingModel[], optionType: OptionType): Promise - abstract updateModelInfo(modelInfo: Partial): Promise - abstract fetchHuggingFaceRepoData(repoId: string): Promise - abstract getDefaultModel(): Promise + abstract getModels(): Promise + abstract pullModel(model: string): Promise + abstract cancelModelPull(modelId: string): Promise + abstract importModel(model: string, modePath: string): Promise + abstract updateModel(modelInfo: Partial): Promise + abstract deleteModel(model: string): Promise } diff --git a/core/src/browser/index.test.ts b/core/src/browser/index.test.ts index 339cd9046..c8cabbb0b 100644 --- a/core/src/browser/index.test.ts +++ b/core/src/browser/index.test.ts @@ -1,32 +1,37 @@ -import * as Core from './core'; -import * as Events from './events'; -import * as FileSystem from './fs'; -import * as Extension from './extension'; -import * as Extensions from './extensions'; -import * as Tools from './tools'; +import * as Core from './core' +import * as Events from './events' +import * as FileSystem from './fs' +import * as Extension from './extension' +import * as Extensions from './extensions' +import * as Tools from './tools' +import * as Models from './models' describe('Module Tests', () => { - it('should export Core module', () => { - expect(Core).toBeDefined(); - }); + it('should export Core module', () => { + expect(Core).toBeDefined() + }) - it('should export Event module', () => { - expect(Events).toBeDefined(); - }); + it('should export Event module', () => { + expect(Events).toBeDefined() + }) - it('should export Filesystem module', () => { - expect(FileSystem).toBeDefined(); - }); + it('should export Filesystem module', () => { + expect(FileSystem).toBeDefined() + }) - it('should export Extension module', () => { - expect(Extension).toBeDefined(); - }); + it('should export Extension module', () => { + expect(Extension).toBeDefined() + }) - it('should export all base extensions', () => { - expect(Extensions).toBeDefined(); - }); + it('should export all base extensions', () => { + expect(Extensions).toBeDefined() + }) - it('should export all base tools', () => { - expect(Tools).toBeDefined(); - }); -}); \ No newline at end of file + it('should export all base tools', () => { + expect(Tools).toBeDefined() + }) + + it('should export all base tools', () => { + expect(Models).toBeDefined() + }) +}) diff --git a/core/src/browser/index.ts b/core/src/browser/index.ts index a7803c7e0..a6ce187ca 100644 --- a/core/src/browser/index.ts +++ b/core/src/browser/index.ts @@ -33,3 +33,9 @@ export * from './extensions' * @module */ export * from './tools' + +/** + * Export all base models. + * @module + */ +export * from './models' diff --git a/core/src/browser/models/index.ts b/core/src/browser/models/index.ts new file mode 100644 index 000000000..c16479b2b --- /dev/null +++ b/core/src/browser/models/index.ts @@ -0,0 +1,5 @@ +/** + * Export ModelManager + * @module + */ +export { ModelManager } from './manager' diff --git a/core/src/browser/models/manager.ts b/core/src/browser/models/manager.ts new file mode 100644 index 000000000..4853989fe --- /dev/null +++ b/core/src/browser/models/manager.ts @@ -0,0 +1,40 @@ +import { Model, ModelEvent } from '../../types' +import { events } from '../events' + +/** + * Manages the registered models across extensions. + */ +export class ModelManager { + public models = new Map() + + constructor() { + if (window) { + window.core.modelManager = this + } + } + + /** + * Registers a model. + * @param model - The model to register. + */ + register(model: T) { + this.models.set(model.id, model) + events.emit(ModelEvent.OnModelsUpdate, {}) + } + + /** + * Retrieves a model by it's id. + * @param id - The id of the model to retrieve. + * @returns The model, if found. + */ + get(id: string): T | undefined { + return this.models.get(id) as T | undefined + } + + /** + * The instance of the tool manager. + */ + static instance(): ModelManager { + return (window.core?.modelManager as ModelManager) ?? new ModelManager() + } +} diff --git a/core/src/types/api/index.ts b/core/src/types/api/index.ts index 8f1ff70bf..c0de0f5e8 100644 --- a/core/src/types/api/index.ts +++ b/core/src/types/api/index.ts @@ -69,9 +69,11 @@ export enum DownloadRoute { } export enum DownloadEvent { - onFileDownloadUpdate = 'onFileDownloadUpdate', - onFileDownloadError = 'onFileDownloadError', - onFileDownloadSuccess = 'onFileDownloadSuccess', + onFileDownloadUpdate = 'DownloadUpdated', + onFileDownloadError = 'DownloadError', + onFileDownloadSuccess = 'DownloadSuccess', + onFileDownloadStopped = 'DownloadStopped', + onFileDownloadStarted = 'DownloadStarted', onFileUnzipSuccess = 'onFileUnzipSuccess', } diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts index 933c698c3..ed1db94bd 100644 --- a/core/src/types/model/modelEntity.ts +++ b/core/src/types/model/modelEntity.ts @@ -6,8 +6,8 @@ import { FileMetadata } from '../file' */ export type ModelInfo = { id: string - settings: ModelSettingParams - parameters: ModelRuntimeParams + settings?: ModelSettingParams + parameters?: ModelRuntimeParams engine?: InferenceEngine } @@ -28,9 +28,10 @@ export enum InferenceEngine { nitro_tensorrt_llm = 'nitro-tensorrt-llm', cohere = 'cohere', nvidia = 'nvidia', - cortex_llamacpp = 'cortex.llamacpp', - cortex_onnx = 'cortex.onnx', - cortex_tensorrtllm = 'cortex.tensorrt-llm', + cortex = 'cortex', + cortex_llamacpp = 'llama-cpp', + cortex_onnx = 'onnxruntime', + cortex_tensorrtllm = '.tensorrt-llm', } export type ModelArtifact = { @@ -153,8 +154,3 @@ export type ModelRuntimeParams = { export type ModelInitFailed = Model & { error: Error } - -/** - * ModelFile is the model.json entity and it's file metadata - */ -export type ModelFile = Model & FileMetadata diff --git a/core/src/types/model/modelInterface.ts b/core/src/types/model/modelInterface.ts index 08d456b7e..088118f69 100644 --- a/core/src/types/model/modelInterface.ts +++ b/core/src/types/model/modelInterface.ts @@ -1,5 +1,5 @@ -import { GpuSetting } from '../miscellaneous' -import { Model, ModelFile } from './modelEntity' +import { Model } from './modelEntity' +import { OptionType } from './modelImport' /** * Model extension for managing models. @@ -8,38 +8,41 @@ export interface ModelInterface { /** * Downloads a model. * @param model - The model to download. - * @param network - Optional object to specify proxy/whether to ignore SSL certificates. * @returns A Promise that resolves when the model has been downloaded. */ - downloadModel( - model: ModelFile, - gpuSettings?: GpuSetting, - network?: { ignoreSSL?: boolean; proxy?: string } - ): Promise + pullModel(model: string): Promise /** * Cancels the download of a specific model. * @param {string} modelId - The ID of the model to cancel the download for. * @returns {Promise} A promise that resolves when the download has been cancelled. */ - cancelModelDownload(modelId: string): Promise + cancelModelPull(modelId: string): Promise /** * Deletes a model. * @param modelId - The ID of the model to delete. * @returns A Promise that resolves when the model has been deleted. */ - deleteModel(model: ModelFile): Promise + deleteModel(model: string): Promise /** - * Gets a list of downloaded models. + * Gets downloaded models. * @returns A Promise that resolves with an array of downloaded models. */ - getDownloadedModels(): Promise + getModels(): Promise /** - * Gets a list of configured models. - * @returns A Promise that resolves with an array of configured models. + * Update a pulled model's metadata + * @param model - The model to update. + * @returns A Promise that resolves when the model has been updated. */ - getConfiguredModels(): Promise + updateModel(model: Partial): Promise + + /** + * Import an existing model file. + * @param model id of the model to import + * @param modelPath - path of the model file + */ + importModel(model: string, modePath: string): Promise } diff --git a/docs/src/pages/docs/built-in/llama-cpp.mdx b/docs/src/pages/docs/built-in/llama-cpp.mdx index 5b7b0453a..8e2fa8498 100644 --- a/docs/src/pages/docs/built-in/llama-cpp.mdx +++ b/docs/src/pages/docs/built-in/llama-cpp.mdx @@ -102,7 +102,7 @@ Enable the GPU acceleration option within the Jan application by following the [ ], "size": 669000000 }, - "engine": "nitro" + "engine": "llama-cpp" } ``` ### Step 2: Modify the `model.json` diff --git a/extensions/assistant-extension/src/node/retrieval.ts b/extensions/assistant-extension/src/node/retrieval.ts index 28d629aa8..3386029fa 100644 --- a/extensions/assistant-extension/src/node/retrieval.ts +++ b/extensions/assistant-extension/src/node/retrieval.ts @@ -10,8 +10,6 @@ import { HNSWLib } from 'langchain/vectorstores/hnswlib' import { OpenAIEmbeddings } from 'langchain/embeddings/openai' import { readEmbeddingEngine } from './engine' -import path from 'path' - export class Retrieval { public chunkSize: number = 100 public chunkOverlap?: number = 0 diff --git a/extensions/inference-nitro-extension/.gitignore b/extensions/inference-cortex-extension/.gitignore similarity index 100% rename from extensions/inference-nitro-extension/.gitignore rename to extensions/inference-cortex-extension/.gitignore diff --git a/extensions/inference-nitro-extension/README.md b/extensions/inference-cortex-extension/README.md similarity index 100% rename from extensions/inference-nitro-extension/README.md rename to extensions/inference-cortex-extension/README.md diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-cortex-extension/download.bat similarity index 93% rename from extensions/inference-nitro-extension/download.bat rename to extensions/inference-cortex-extension/download.bat index 7acd385d5..d764b6df8 100644 --- a/extensions/inference-nitro-extension/download.bat +++ b/extensions/inference-cortex-extension/download.bat @@ -4,10 +4,10 @@ set /p CORTEX_VERSION=<./bin/version.txt @REM Download cortex.llamacpp binaries set VERSION=v0.1.25 -set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.25-windows-amd64 +set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.34-windows-amd64 set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan -call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz +call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-12-0/engines/cortex.llamacpp call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-11-7/engines/cortex.llamacpp call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/win-noavx/engines/cortex.llamacpp diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh new file mode 100755 index 000000000..fe1f8af9f --- /dev/null +++ b/extensions/inference-cortex-extension/download.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Read CORTEX_VERSION +CORTEX_VERSION=$(cat ./bin/version.txt) +CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download" +ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.34/cortex.llamacpp-0.1.34" +# Detect platform +OS_TYPE=$(uname) + +if [ "$OS_TYPE" == "Linux" ]; then + # Linux downloads + download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin" + chmod +x "./bin/cortex" + + # Download engines for Linux + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1 + +elif [ "$OS_TYPE" == "Darwin" ]; then + # macOS downloads + download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1 + chmod +x "./bin/cortex" + + # Download engines for macOS + download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp + download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp + +else + echo "Unsupported operating system: $OS_TYPE" + exit 1 +fi diff --git a/extensions/inference-nitro-extension/jest.config.js b/extensions/inference-cortex-extension/jest.config.js similarity index 100% rename from extensions/inference-nitro-extension/jest.config.js rename to extensions/inference-cortex-extension/jest.config.js diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-cortex-extension/package.json similarity index 82% rename from extensions/inference-nitro-extension/package.json rename to extensions/inference-cortex-extension/package.json index 15ceaf566..920989f3b 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-cortex-extension/package.json @@ -10,12 +10,12 @@ "scripts": { "test": "jest", "build": "tsc --module commonjs && rollup -c rollup.config.ts", - "downloadnitro:linux:darwin": "./download.sh", - "downloadnitro:win32": "download.bat", - "downloadnitro": "run-script-os", - "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", - "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", - "build:publish": "yarn test && run-script-os" + "downloadcortex:linux:darwin": "./download.sh", + "downloadcortex:win32": "download.bat", + "downloadcortex": "run-script-os", + "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", + "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install", + "build:publish": "run-script-os" }, "exports": { ".": "./dist/index.js", @@ -50,6 +50,7 @@ "cpu-instructions": "^0.0.13", "decompress": "^4.2.1", "fetch-retry": "^5.0.6", + "ky": "^1.7.2", "rxjs": "^7.8.1", "tcp-port-used": "^1.0.2", "terminate": "2.6.1", diff --git a/extensions/inference-nitro-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json similarity index 100% rename from extensions/inference-nitro-extension/resources/default_settings.json rename to extensions/inference-cortex-extension/resources/default_settings.json diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json rename to extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json index 8c3029be0..f6e3d08e9 100644 --- a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json +++ b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json @@ -31,5 +31,5 @@ "tags": ["34B", "Finetuned"], "size": 21556982144 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json rename to extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json index 163373014..463f7eec7 100644 --- a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json +++ b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json @@ -31,5 +31,5 @@ "tags": ["7B", "Finetuned"], "size": 5056982144 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/bakllava-1/model.json b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/bakllava-1/model.json rename to extensions/inference-cortex-extension/resources/models/bakllava-1/model.json index 93f87c7f4..391c93990 100644 --- a/extensions/inference-nitro-extension/resources/models/bakllava-1/model.json +++ b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json @@ -31,5 +31,5 @@ "tags": ["Vision"], "size": 5750000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json rename to extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json index fb2a5f346..7bd5bf3a4 100644 --- a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json @@ -30,5 +30,5 @@ "tags": ["7B", "Finetuned"], "size": 4370000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/codestral-22b/model.json rename to extensions/inference-cortex-extension/resources/models/codestral-22b/model.json index f90f848dd..2cce063e6 100644 --- a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json +++ b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json @@ -31,6 +31,6 @@ "tags": ["22B", "Finetuned", "Featured"], "size": 13341237440 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/command-r-34b/model.json rename to extensions/inference-cortex-extension/resources/models/command-r-34b/model.json index 6b166eea5..13518604c 100644 --- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json +++ b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json @@ -31,6 +31,6 @@ "tags": ["34B", "Finetuned"], "size": 21500000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json rename to extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json index 4d825cfeb..6722d253d 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json +++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json @@ -31,5 +31,5 @@ "tags": ["Tiny"], "size": 1430000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json rename to extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json index e87d6a643..8a2e271cd 100644 --- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json +++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json @@ -31,5 +31,5 @@ "tags": ["33B"], "size": 19940000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json rename to extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json index 837b10ce3..3278c9a81 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json +++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json @@ -31,5 +31,5 @@ "tags": ["2B", "Finetuned", "Tiny"], "size": 1630000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json rename to extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json index b29043483..9a57f9b37 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json @@ -31,5 +31,5 @@ "tags": ["7B", "Finetuned"], "size": 5330000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json rename to extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json index 4db74ac6f..66eaff7c2 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json +++ b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json @@ -37,5 +37,5 @@ ], "size": 16600000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json rename to extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json index d85759f9b..60be558b8 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json +++ b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json @@ -38,5 +38,5 @@ ], "size": 1710000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json rename to extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json index 8f6af15d9..67acaad09 100644 --- a/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json +++ b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json @@ -37,5 +37,5 @@ ], "size": 5760000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json rename to extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json index 0c770b189..c91a0a73b 100644 --- a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json @@ -31,5 +31,5 @@ "tags": ["70B", "Foundational Model"], "size": 43920000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json rename to extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json index 9efd634b5..4a28f6004 100644 --- a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json @@ -31,5 +31,5 @@ "tags": ["7B", "Foundational Model"], "size": 4080000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json index 4d84b9967..3456a185e 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json @@ -31,5 +31,5 @@ "tags": ["8B"], "size": 4920000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json rename to extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json index a3601c8cd..718629fb0 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json @@ -34,5 +34,5 @@ ], "size": 4920000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json similarity index 98% rename from extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json index 1f4931e11..aec73719e 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json @@ -37,5 +37,5 @@ ], "size": 42500000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json similarity index 98% rename from extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json index 4b21534bc..ec9a0284b 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json @@ -37,5 +37,5 @@ ], "size": 4920000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json index 5be08409d..0fe7d3316 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json @@ -31,5 +31,5 @@ "tags": ["1B", "Featured"], "size": 1320000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json index aacb3f0f8..299362fbf 100644 --- a/extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json @@ -31,5 +31,5 @@ "tags": ["3B", "Featured"], "size": 3420000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json rename to extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json index 94b62ec82..3230df5b0 100644 --- a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json @@ -34,5 +34,5 @@ ], "size": 1170000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/llava-13b/model.json b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llava-13b/model.json rename to extensions/inference-cortex-extension/resources/models/llava-13b/model.json index 6d94fd272..791c98749 100644 --- a/extensions/inference-nitro-extension/resources/models/llava-13b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json @@ -32,5 +32,5 @@ "tags": ["Vision"], "size": 7870000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/llava-7b/model.json b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/llava-7b/model.json rename to extensions/inference-cortex-extension/resources/models/llava-7b/model.json index 1fdd75247..b22899c96 100644 --- a/extensions/inference-nitro-extension/resources/models/llava-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json @@ -32,5 +32,5 @@ "tags": ["Vision"], "size": 4370000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json rename to extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json index 88f701466..9b568e468 100644 --- a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json +++ b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json @@ -32,5 +32,5 @@ "size": 4370000000, "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png" }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json index 4413b415c..c711065ff 100644 --- a/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json @@ -30,5 +30,5 @@ "tags": ["70B", "Foundational Model"], "size": 26440000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json rename to extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json index 10c17c310..1999035aa 100644 --- a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json @@ -31,5 +31,5 @@ "tags": ["7B", "Finetuned"], "size": 4370000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json rename to extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json index e743a74c9..05371b69e 100644 --- a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json @@ -31,5 +31,5 @@ "tags": ["Recommended", "7B", "Finetuned"], "size": 4370000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json rename to extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json index 6459b049d..a2197dab2 100644 --- a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json +++ b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json @@ -34,5 +34,5 @@ ], "size": 2320000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/phi3-medium/model.json rename to extensions/inference-cortex-extension/resources/models/phi3-medium/model.json index 7331b2fd8..f7131ee98 100644 --- a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json +++ b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json @@ -34,5 +34,5 @@ ], "size": 8366000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/phind-34b/model.json rename to extensions/inference-cortex-extension/resources/models/phind-34b/model.json index 14099a635..f6e302173 100644 --- a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json +++ b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json @@ -31,5 +31,5 @@ "tags": ["34B", "Finetuned"], "size": 20220000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen-7b/model.json rename to extensions/inference-cortex-extension/resources/models/qwen-7b/model.json index 85081a605..be37cac0d 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json @@ -31,5 +31,5 @@ "tags": ["7B", "Finetuned"], "size": 4770000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json rename to extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json index a7613982c..210848a43 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json @@ -31,6 +31,6 @@ "tags": ["7B", "Finetuned"], "size": 4680000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json index 04913b874..96e4d214c 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json @@ -31,6 +31,6 @@ "tags": ["14B", "Featured"], "size": 8990000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json index 43ba30c56..20681dff4 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json @@ -31,6 +31,6 @@ "tags": ["32B"], "size": 19900000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json index 1852a0909..b741539eb 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json @@ -31,6 +31,6 @@ "tags": ["72B"], "size": 47400000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json index b47511f96..6741aef64 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json @@ -31,6 +31,6 @@ "tags": ["7B", "Featured"], "size": 4680000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json rename to extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json index 2f1080b2c..9162c8a43 100644 --- a/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json @@ -31,6 +31,6 @@ "tags": ["7B", "Featured"], "size": 4680000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json rename to extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json index 938e03fb7..a6c84bd17 100644 --- a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json +++ b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json @@ -31,5 +31,5 @@ "tags": ["3B", "Finetuned", "Tiny"], "size": 2970000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json rename to extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json index c17d1c35e..ffb32922e 100644 --- a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json @@ -30,5 +30,5 @@ "tags": ["7B", "Finetuned"], "size": 4370000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json rename to extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json index a49e79073..b6aeea3e3 100644 --- a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json +++ b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json @@ -31,5 +31,5 @@ "tags": ["Tiny", "Foundation Model"], "size": 669000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json rename to extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json index 6c9aa2b89..fae5d0ca5 100644 --- a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json @@ -31,5 +31,5 @@ "size": 4370000000, "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png" }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/vistral-7b/model.json rename to extensions/inference-cortex-extension/resources/models/vistral-7b/model.json index b84f2c676..46b6999a6 100644 --- a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json @@ -31,6 +31,6 @@ "tags": ["7B", "Finetuned"], "size": 4410000000 }, - "engine": "nitro" + "engine": "llama-cpp" } \ No newline at end of file diff --git a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json rename to extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json index 101eedfd1..cf39ad857 100644 --- a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json +++ b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json @@ -31,5 +31,5 @@ "tags": ["Recommended", "13B", "Finetuned"], "size": 7870000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json similarity index 97% rename from extensions/inference-nitro-extension/resources/models/yi-34b/model.json rename to extensions/inference-cortex-extension/resources/models/yi-34b/model.json index db7df9f2d..4f56650d7 100644 --- a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json +++ b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json @@ -31,5 +31,5 @@ "tags": ["34B", "Foundational Model"], "size": 20660000000 }, - "engine": "nitro" + "engine": "llama-cpp" } diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts similarity index 93% rename from extensions/inference-nitro-extension/rollup.config.ts rename to extensions/inference-cortex-extension/rollup.config.ts index 1a8badb6f..d0e9f5fbe 100644 --- a/extensions/inference-nitro-extension/rollup.config.ts +++ b/extensions/inference-cortex-extension/rollup.config.ts @@ -114,19 +114,7 @@ export default [ ]), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), - INFERENCE_URL: JSON.stringify( - process.env.INFERENCE_URL || - 'http://127.0.0.1:3928/inferences/server/chat_completion' - ), - TROUBLESHOOTING_URL: JSON.stringify( - 'https://jan.ai/guides/troubleshooting' - ), - JAN_SERVER_INFERENCE_URL: JSON.stringify( - 'http://localhost:1337/v1/chat/completions' - ), - CUDA_DOWNLOAD_URL: JSON.stringify( - 'https://catalog.jan.ai/dist/cuda-dependencies///cuda.tar.gz' - ), + CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291/v1'), }), // Allow json resolution json(), diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts similarity index 70% rename from extensions/inference-nitro-extension/src/@types/global.d.ts rename to extensions/inference-cortex-extension/src/@types/global.d.ts index 85c9b939f..64ae5a6e7 100644 --- a/extensions/inference-nitro-extension/src/@types/global.d.ts +++ b/extensions/inference-cortex-extension/src/@types/global.d.ts @@ -1,7 +1,5 @@ declare const NODE: string -declare const INFERENCE_URL: string -declare const TROUBLESHOOTING_URL: string -declare const JAN_SERVER_INFERENCE_URL: string +declare const CORTEX_API_URL: string declare const DEFAULT_SETTINGS: Array declare const MODELS: Array diff --git a/extensions/inference-nitro-extension/src/babel.config.js b/extensions/inference-cortex-extension/src/babel.config.js similarity index 100% rename from extensions/inference-nitro-extension/src/babel.config.js rename to extensions/inference-cortex-extension/src/babel.config.js diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts new file mode 100644 index 000000000..93036fc4d --- /dev/null +++ b/extensions/inference-cortex-extension/src/index.ts @@ -0,0 +1,111 @@ +/** + * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + * @version 1.0.0 + * @module inference-extension/src/index + */ + +import { + Model, + executeOnMain, + systemInformation, + log, + joinPath, + dirName, + LocalOAIEngine, + InferenceEngine, +} from '@janhq/core' + +import ky from 'ky' + +/** + * A class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + */ +export default class JanInferenceCortexExtension extends LocalOAIEngine { + // DEPRECATED + nodeModule: string = 'node' + + provider: string = InferenceEngine.cortex + + /** + * The URL for making inference requests. + */ + inferenceUrl = `${CORTEX_API_URL}/chat/completions` + + /** + * Subscribes to events emitted by the @janhq/core package. + */ + async onLoad() { + const models = MODELS as Model[] + + this.registerModels(models) + + super.onLoad() + + // Run the process watchdog + const systemInfo = await systemInformation() + executeOnMain(NODE, 'run', systemInfo) + } + + onUnload(): void { + executeOnMain(NODE, 'dispose') + super.onUnload() + } + + override async loadModel( + model: Model & { file_path?: string } + ): Promise { + // Legacy model cache - should import + if (model.engine === InferenceEngine.nitro && model.file_path) { + // Try importing the model + await ky + .post(`${CORTEX_API_URL}/models/${model.id}`, { + json: { model: model.id, modelPath: await this.modelPath(model) }, + }) + .json() + .catch((e) => log(e.message ?? e ?? '')) + } + + return ky + .post(`${CORTEX_API_URL}/models/start`, { + json: { + ...model.settings, + model: model.id, + engine: + model.engine === InferenceEngine.nitro // Legacy model cache + ? InferenceEngine.cortex_llamacpp + : model.engine, + }, + }) + .json() + .catch(async (e) => { + throw (await e.response?.json()) ?? e + }) + .then() + } + + override async unloadModel(model: Model): Promise { + return ky + .post(`${CORTEX_API_URL}/models/stop`, { + json: { model: model.id }, + }) + .json() + .then() + } + + private async modelPath( + model: Model & { file_path?: string } + ): Promise { + if (!model.file_path) return model.id + return await joinPath([ + await dirName(model.file_path), + model.sources[0]?.filename ?? + model.settings?.llama_model_path ?? + model.sources[0]?.url.split('/').pop() ?? + model.id, + ]) + } +} diff --git a/extensions/inference-nitro-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts similarity index 83% rename from extensions/inference-nitro-extension/src/node/execute.test.ts rename to extensions/inference-cortex-extension/src/node/execute.test.ts index dfd8b35a9..89110fbd9 100644 --- a/extensions/inference-nitro-extension/src/node/execute.test.ts +++ b/extensions/inference-cortex-extension/src/node/execute.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from '@jest/globals' -import { executableNitroFile } from './execute' +import { executableCortexFile } from './execute' import { GpuSetting } from '@janhq/core' import { cpuInfo } from 'cpu-instructions' @@ -30,7 +30,7 @@ jest.mock('cpu-instructions', () => ({ let mock = cpuInfo.cpuInfo as jest.Mock mock.mockReturnValue([]) -describe('test executable nitro file', () => { +describe('test executable cortex file', () => { afterAll(function () { Object.defineProperty(process, 'platform', { value: originalPlatform, @@ -44,10 +44,13 @@ describe('test executable nitro file', () => { Object.defineProperty(process, 'arch', { value: 'arm64', }) - expect(executableNitroFile(testSettings)).toEqual( + expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`mac-arm64`), - executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(), + executablePath: + originalPlatform === 'darwin' + ? expect.stringContaining(`/cortex`) + : expect.anything(), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -55,10 +58,13 @@ describe('test executable nitro file', () => { Object.defineProperty(process, 'arch', { value: 'x64', }) - expect(executableNitroFile(testSettings)).toEqual( + expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`mac-x64`), - executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(), + executablePath: + originalPlatform === 'darwin' + ? expect.stringContaining(`/cortex`) + : expect.anything(), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -73,10 +79,10 @@ describe('test executable nitro file', () => { ...testSettings, run_mode: 'cpu', } - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`win`), - executablePath: expect.stringContaining(`cortex-cpp.exe`), + executablePath: expect.stringContaining(`/cortex.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -107,10 +113,10 @@ describe('test executable nitro file', () => { }, ], } - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`win-cuda-11-7`), - executablePath: expect.stringContaining(`cortex-cpp.exe`), + executablePath: expect.stringContaining(`/cortex.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -141,10 +147,10 @@ describe('test executable nitro file', () => { }, ], } - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`win-cuda-12-0`), - executablePath: expect.stringContaining(`cortex-cpp.exe`), + executablePath: expect.stringContaining(`/cortex.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -159,10 +165,10 @@ describe('test executable nitro file', () => { ...testSettings, run_mode: 'cpu', } - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`linux`), - executablePath: expect.stringContaining(`cortex-cpp`), + executablePath: expect.stringContaining(`/cortex`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -193,10 +199,10 @@ describe('test executable nitro file', () => { }, ], } - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`linux-cuda-11-7`), - executablePath: expect.stringContaining(`cortex-cpp`), + executablePath: expect.stringContaining(`/cortex`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -227,10 +233,10 @@ describe('test executable nitro file', () => { }, ], } - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`linux-cuda-12-0`), - executablePath: expect.stringContaining(`cortex-cpp`), + executablePath: expect.stringContaining(`/cortex`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -251,10 +257,10 @@ describe('test executable nitro file', () => { cpuInstructions.forEach((instruction) => { mock.mockReturnValue([instruction]) - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`linux-${instruction}`), - executablePath: expect.stringContaining(`cortex-cpp`), + executablePath: expect.stringContaining(`/cortex`), cudaVisibleDevices: '', vkVisibleDevices: '', @@ -274,10 +280,10 @@ describe('test executable nitro file', () => { const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { mock.mockReturnValue([instruction]) - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`win-${instruction}`), - executablePath: expect.stringContaining(`cortex-cpp.exe`), + executablePath: expect.stringContaining(`/cortex.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -313,10 +319,10 @@ describe('test executable nitro file', () => { const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { mock.mockReturnValue([instruction]) - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`win-cuda-12-0`), - executablePath: expect.stringContaining(`cortex-cpp.exe`), + executablePath: expect.stringContaining(`/cortex.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -352,10 +358,10 @@ describe('test executable nitro file', () => { } cpuInstructions.forEach((instruction) => { mock.mockReturnValue([instruction]) - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`linux-cuda-12-0`), - executablePath: expect.stringContaining(`cortex-cpp`), + executablePath: expect.stringContaining(`/cortex`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -392,10 +398,10 @@ describe('test executable nitro file', () => { } cpuInstructions.forEach((instruction) => { mock.mockReturnValue([instruction]) - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`linux-vulkan`), - executablePath: expect.stringContaining(`cortex-cpp`), + executablePath: expect.stringContaining(`/cortex`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -418,10 +424,13 @@ describe('test executable nitro file', () => { run_mode: 'cpu', } mock.mockReturnValue([]) - expect(executableNitroFile(settings)).toEqual( + expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`mac-x64`), - executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(), + executablePath: + originalPlatform === 'darwin' + ? expect.stringContaining(`/cortex`) + : expect.anything(), cudaVisibleDevices: '', vkVisibleDevices: '', }) diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts similarity index 93% rename from extensions/inference-nitro-extension/src/node/execute.ts rename to extensions/inference-cortex-extension/src/node/execute.ts index 595063ed4..0febe8adf 100644 --- a/extensions/inference-nitro-extension/src/node/execute.ts +++ b/extensions/inference-cortex-extension/src/node/execute.ts @@ -2,7 +2,7 @@ import { GpuSetting } from '@janhq/core' import * as path from 'path' import { cpuInfo } from 'cpu-instructions' -export interface NitroExecutableOptions { +export interface CortexExecutableOptions { enginePath: string executablePath: string cudaVisibleDevices: string @@ -81,9 +81,9 @@ const cpuInstructions = () => { * Find which executable file to run based on the current platform. * @returns The name of the executable file to run. */ -export const executableNitroFile = ( +export const executableCortexFile = ( gpuSetting?: GpuSetting -): NitroExecutableOptions => { +): CortexExecutableOptions => { let engineFolder = [ os(), ...(gpuSetting?.vulkan @@ -99,7 +99,7 @@ export const executableNitroFile = ( .join('-') let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' - let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}` + let binaryName = `cortex${extension()}` return { enginePath: path.join(__dirname, '..', 'bin', engineFolder), diff --git a/extensions/inference-cortex-extension/src/node/index.test.ts b/extensions/inference-cortex-extension/src/node/index.test.ts new file mode 100644 index 000000000..ff9d7c2fc --- /dev/null +++ b/extensions/inference-cortex-extension/src/node/index.test.ts @@ -0,0 +1,94 @@ +jest.mock('@janhq/core/node', () => ({ + ...jest.requireActual('@janhq/core/node'), + getJanDataFolderPath: () => '', + getSystemResourceInfo: () => { + return { + cpu: { + cores: 1, + logicalCores: 1, + threads: 1, + model: 'model', + speed: 1, + }, + memory: { + total: 1, + free: 1, + }, + gpu: { + model: 'model', + memory: 1, + cuda: { + version: 'version', + devices: 'devices', + }, + vulkan: { + version: 'version', + devices: 'devices', + }, + }, + } + }, +})) + +jest.mock('fs', () => ({ + default: { + readdirSync: () => [], + }, +})) + +jest.mock('child_process', () => ({ + exec: () => { + return { + stdout: { on: jest.fn() }, + stderr: { on: jest.fn() }, + on: jest.fn(), + } + }, + spawn: () => { + return { + stdout: { on: jest.fn() }, + stderr: { on: jest.fn() }, + on: jest.fn(), + pid: '111', + } + }, +})) + +jest.mock('./execute', () => ({ + executableCortexFile: () => { + return { + enginePath: 'enginePath', + executablePath: 'executablePath', + cudaVisibleDevices: 'cudaVisibleDevices', + vkVisibleDevices: 'vkVisibleDevices', + } + }, +})) + +import index from './index' + +describe('dispose', () => { + it('should dispose a model successfully on Mac', async () => { + Object.defineProperty(process, 'platform', { + value: 'darwin', + }) + + // Call the dispose function + const result = await index.dispose() + + // Assert that the result is as expected + expect(result).toBeUndefined() + }) + + it('should kill the subprocess successfully on Windows', async () => { + Object.defineProperty(process, 'platform', { + value: 'win32', + }) + + // Call the killSubprocess function + const result = await index.dispose() + + // Assert that the result is as expected + expect(result).toBeUndefined() + }) +}) diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts new file mode 100644 index 000000000..f1c365ade --- /dev/null +++ b/extensions/inference-cortex-extension/src/node/index.ts @@ -0,0 +1,83 @@ +import path from 'path' +import { log, SystemInformation } from '@janhq/core/node' +import { executableCortexFile } from './execute' +import { ProcessWatchdog } from './watchdog' + +// The HOST address to use for the Nitro subprocess +const LOCAL_PORT = '39291' +let watchdog: ProcessWatchdog | undefined = undefined + +/** + * Spawns a Nitro subprocess. + * @returns A promise that resolves when the Nitro subprocess is started. + */ +function run(systemInfo?: SystemInformation): Promise { + log(`[CORTEX]:: Spawning cortex subprocess...`) + + return new Promise(async (resolve, reject) => { + let executableOptions = executableCortexFile( + // If ngl is not set or equal to 0, run on CPU with correct instructions + systemInfo?.gpuSetting + ? { + ...systemInfo.gpuSetting, + run_mode: systemInfo.gpuSetting.run_mode, + } + : undefined + ) + + // Execute the binary + log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`) + log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`) + + // Add engine path to the PATH and LD_LIBRARY_PATH + process.env.PATH = (process.env.PATH || '').concat( + path.delimiter, + executableOptions.enginePath + ) + log(`[CORTEX] PATH: ${process.env.PATH}`) + process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( + path.delimiter, + executableOptions.enginePath + ) + + watchdog = new ProcessWatchdog( + executableOptions.executablePath, + ['--start-server', '--port', LOCAL_PORT.toString()], + { + cwd: executableOptions.enginePath, + env: { + ...process.env, + ENGINE_PATH: executableOptions.enginePath, + CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, + // Vulkan - Support 1 device at a time for now + ...(executableOptions.vkVisibleDevices?.length > 0 && { + GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], + }), + }, + } + ) + watchdog.start() + resolve() + }) +} + +/** + * Every module should have a dispose function + * This will be called when the extension is unloaded and should clean up any resources + * Also called when app is closed + */ +function dispose() { + watchdog?.terminate() +} + +/** + * Cortex process info + */ +export interface CortexProcessInfo { + isRunning: boolean +} + +export default { + run, + dispose, +} diff --git a/extensions/inference-cortex-extension/src/node/watchdog.ts b/extensions/inference-cortex-extension/src/node/watchdog.ts new file mode 100644 index 000000000..3e2b81d70 --- /dev/null +++ b/extensions/inference-cortex-extension/src/node/watchdog.ts @@ -0,0 +1,84 @@ +import { log } from '@janhq/core/node' +import { spawn, ChildProcess } from 'child_process' +import { EventEmitter } from 'events' + +interface WatchdogOptions { + cwd?: string + restartDelay?: number + maxRestarts?: number + env?: NodeJS.ProcessEnv +} + +export class ProcessWatchdog extends EventEmitter { + private command: string + private args: string[] + private options: WatchdogOptions + private process: ChildProcess | null + private restartDelay: number + private maxRestarts: number + private restartCount: number + private isTerminating: boolean + + constructor(command: string, args: string[], options: WatchdogOptions = {}) { + super() + this.command = command + this.args = args + this.options = options + this.process = null + this.restartDelay = options.restartDelay || 5000 + this.maxRestarts = options.maxRestarts || 5 + this.restartCount = 0 + this.isTerminating = false + } + + start(): void { + this.spawnProcess() + } + + private spawnProcess(): void { + if (this.isTerminating) return + + log(`Starting process: ${this.command} ${this.args.join(' ')}`) + this.process = spawn(this.command, this.args, this.options) + + this.process.stdout?.on('data', (data: Buffer) => { + log(`Process output: ${data}`) + this.emit('output', data.toString()) + }) + + this.process.stderr?.on('data', (data: Buffer) => { + log(`Process error: ${data}`) + this.emit('error', data.toString()) + }) + + this.process.on('close', (code: number | null) => { + log(`Process exited with code ${code}`) + this.emit('close', code) + if (!this.isTerminating) { + this.restartProcess() + } + }) + } + + private restartProcess(): void { + if (this.restartCount < this.maxRestarts) { + this.restartCount++ + log( + `Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})` + ) + setTimeout(() => this.spawnProcess(), this.restartDelay) + } else { + log('Max restart attempts reached. Exiting watchdog.') + this.emit('maxRestartsReached') + } + } + + terminate(): void { + this.isTerminating = true + if (this.process) { + log('Terminating watched process...') + this.process.kill() + } + this.emit('terminated') + } +} diff --git a/extensions/inference-nitro-extension/tsconfig.json b/extensions/inference-cortex-extension/tsconfig.json similarity index 100% rename from extensions/inference-nitro-extension/tsconfig.json rename to extensions/inference-cortex-extension/tsconfig.json diff --git a/extensions/inference-nitro-extension/bin/version.txt b/extensions/inference-nitro-extension/bin/version.txt deleted file mode 100644 index 8f0916f76..000000000 --- a/extensions/inference-nitro-extension/bin/version.txt +++ /dev/null @@ -1 +0,0 @@ -0.5.0 diff --git a/extensions/inference-nitro-extension/download.sh b/extensions/inference-nitro-extension/download.sh deleted file mode 100755 index 98ed8504a..000000000 --- a/extensions/inference-nitro-extension/download.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# Read CORTEX_VERSION -CORTEX_VERSION=$(cat ./bin/version.txt) -CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download" - -# Detect platform -OS_TYPE=$(uname) - -if [ "$OS_TYPE" == "Linux" ]; then - # Linux downloads - download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin" - chmod +x "./bin/cortex-cpp" - - ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64" - - # Download engines for Linux - download "${ENGINE_DOWNLOAD_URL}-noavx.tar.gz" -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-avx.tar.gz" -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-avx2.tar.gz" -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-avx512.tar.gz" -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-vulkan.tar.gz" -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1 - -elif [ "$OS_TYPE" == "Darwin" ]; then - # macOS downloads - download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/mac-arm64" 1 - download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/mac-x64" 1 - chmod +x "./bin/mac-arm64/cortex-cpp" - chmod +x "./bin/mac-x64/cortex-cpp" - - ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac" - # Download engines for macOS - download "${ENGINE_DOWNLOAD_URL}-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp - download "${ENGINE_DOWNLOAD_URL}-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp - -else - echo "Unsupported operating system: $OS_TYPE" - exit 1 -fi diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts deleted file mode 100644 index 6e825e8fd..000000000 --- a/extensions/inference-nitro-extension/src/index.ts +++ /dev/null @@ -1,193 +0,0 @@ -/** - * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package. - * The class provides methods for initializing and stopping a model, and for making inference requests. - * It also subscribes to events emitted by the @janhq/core package and handles new message requests. - * @version 1.0.0 - * @module inference-extension/src/index - */ - -import { - events, - executeOnMain, - Model, - ModelEvent, - LocalOAIEngine, - InstallationState, - systemInformation, - fs, - getJanDataFolderPath, - joinPath, - DownloadRequest, - baseName, - downloadFile, - DownloadState, - DownloadEvent, - ModelFile, -} from '@janhq/core' - -declare const CUDA_DOWNLOAD_URL: string -/** - * A class that implements the InferenceExtension interface from the @janhq/core package. - * The class provides methods for initializing and stopping a model, and for making inference requests. - * It also subscribes to events emitted by the @janhq/core package and handles new message requests. - */ -export default class JanInferenceNitroExtension extends LocalOAIEngine { - nodeModule: string = NODE - provider: string = 'nitro' - - /** - * Checking the health for Nitro's process each 5 secs. - */ - private static readonly _intervalHealthCheck = 5 * 1000 - - /** - * The interval id for the health check. Used to stop the health check. - */ - private getNitroProcessHealthIntervalId: NodeJS.Timeout | undefined = undefined - - /** - * Tracking the current state of nitro process. - */ - private nitroProcessInfo: any = undefined - - /** - * The URL for making inference requests. - */ - inferenceUrl = '' - - /** - * Subscribes to events emitted by the @janhq/core package. - */ - async onLoad() { - this.inferenceUrl = INFERENCE_URL - - // If the extension is running in the browser, use the base API URL from the core package. - if (!('electronAPI' in window)) { - this.inferenceUrl = `${window.core?.api?.baseApiUrl}/v1/chat/completions` - } - - this.getNitroProcessHealthIntervalId = setInterval( - () => this.periodicallyGetNitroHealth(), - JanInferenceNitroExtension._intervalHealthCheck - ) - const models = MODELS as unknown as Model[] - this.registerModels(models) - super.onLoad() - - // Add additional dependencies PATH to the env - executeOnMain(NODE, 'addAdditionalDependencies', { - name: this.name, - version: this.version, - }) - } - - /** - * Periodically check for nitro process's health. - */ - private async periodicallyGetNitroHealth(): Promise { - const health = await executeOnMain(NODE, 'getCurrentNitroProcessInfo') - - const isRunning = this.nitroProcessInfo?.isRunning ?? false - if (isRunning && health.isRunning === false) { - console.debug('Nitro process is stopped') - events.emit(ModelEvent.OnModelStopped, {}) - } - this.nitroProcessInfo = health - } - - override loadModel(model: ModelFile): Promise { - if (model.engine !== this.provider) return Promise.resolve() - this.getNitroProcessHealthIntervalId = setInterval( - () => this.periodicallyGetNitroHealth(), - JanInferenceNitroExtension._intervalHealthCheck - ) - return super.loadModel(model) - } - - override async unloadModel(model?: Model): Promise { - if (model?.engine && model.engine !== this.provider) return - - // stop the periocally health check - if (this.getNitroProcessHealthIntervalId) { - clearInterval(this.getNitroProcessHealthIntervalId) - this.getNitroProcessHealthIntervalId = undefined - } - return super.unloadModel(model) - } - - override async install(): Promise { - const info = await systemInformation() - - const platform = info.osInfo?.platform === 'win32' ? 'windows' : 'linux' - const downloadUrl = CUDA_DOWNLOAD_URL - - const url = downloadUrl - .replace('', info.gpuSetting?.cuda?.version ?? '12.4') - .replace('', platform) - - console.debug('Downloading Cuda Toolkit Dependency: ', url) - - const janDataFolderPath = await getJanDataFolderPath() - - const executableFolderPath = await joinPath([ - janDataFolderPath, - 'engines', - this.name ?? 'cortex-cpp', - this.version ?? '1.0.0', - ]) - - if (!(await fs.existsSync(executableFolderPath))) { - await fs.mkdir(executableFolderPath) - } - - const tarball = await baseName(url) - const tarballFullPath = await joinPath([executableFolderPath, tarball]) - - const downloadRequest: DownloadRequest = { - url, - localPath: tarballFullPath, - extensionId: this.name, - downloadType: 'extension', - } - downloadFile(downloadRequest) - - const onFileDownloadSuccess = async (state: DownloadState) => { - console.log(state) - // if other download, ignore - if (state.fileName !== tarball) return - events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess) - await executeOnMain( - NODE, - 'decompressRunner', - tarballFullPath, - executableFolderPath - ) - events.emit(DownloadEvent.onFileUnzipSuccess, state) - } - events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess) - } - - override async installationState(): Promise { - const info = await systemInformation() - if ( - info.gpuSetting?.run_mode === 'gpu' && - !info.gpuSetting?.vulkan && - info.osInfo && - info.osInfo.platform !== 'darwin' && - !info.gpuSetting?.cuda?.exist - ) { - const janDataFolderPath = await getJanDataFolderPath() - - const executableFolderPath = await joinPath([ - janDataFolderPath, - 'engines', - this.name ?? 'cortex-cpp', - this.version ?? '1.0.0', - ]) - - if (!(await fs.existsSync(executableFolderPath))) return 'NotInstalled' - return 'Installed' - } - return 'NotRequired' - } -} diff --git a/extensions/inference-nitro-extension/src/node/index.test.ts b/extensions/inference-nitro-extension/src/node/index.test.ts deleted file mode 100644 index 6e64b4a06..000000000 --- a/extensions/inference-nitro-extension/src/node/index.test.ts +++ /dev/null @@ -1,465 +0,0 @@ -jest.mock('fetch-retry', () => ({ - default: () => () => { - return Promise.resolve({ - ok: true, - status: 200, - json: () => - Promise.resolve({ - model_loaded: true, - }), - text: () => Promise.resolve(''), - }) - }, -})) - -jest.mock('path', () => ({ - default: { - isAbsolute: jest.fn(), - join: jest.fn(), - parse: () => { - return { dir: 'dir' } - }, - delimiter: { concat: () => '' }, - }, -})) - -jest.mock('decompress', () => ({ - default: () => { - return Promise.resolve() - }, -})) - -jest.mock('@janhq/core/node', () => ({ - ...jest.requireActual('@janhq/core/node'), - getJanDataFolderPath: () => '', - getSystemResourceInfo: () => { - return { - cpu: { - cores: 1, - logicalCores: 1, - threads: 1, - model: 'model', - speed: 1, - }, - memory: { - total: 1, - free: 1, - }, - gpu: { - model: 'model', - memory: 1, - cuda: { - version: 'version', - devices: 'devices', - }, - vulkan: { - version: 'version', - devices: 'devices', - }, - }, - } - }, -})) - -jest.mock('fs', () => ({ - default: { - readdirSync: () => [], - }, -})) - -jest.mock('child_process', () => ({ - exec: () => { - return { - stdout: { on: jest.fn() }, - stderr: { on: jest.fn() }, - on: jest.fn(), - } - }, - spawn: () => { - return { - stdout: { on: jest.fn() }, - stderr: { on: jest.fn() }, - on: jest.fn(), - pid: '111', - } - }, -})) - -jest.mock('tcp-port-used', () => ({ - default: { - waitUntilFree: () => Promise.resolve(true), - waitUntilUsed: () => Promise.resolve(true), - }, -})) - -jest.mock('./execute', () => ({ - executableNitroFile: () => { - return { - enginePath: 'enginePath', - executablePath: 'executablePath', - cudaVisibleDevices: 'cudaVisibleDevices', - vkVisibleDevices: 'vkVisibleDevices', - } - }, -})) - -jest.mock('terminate', () => ({ - default: (id: String, func: Function) => { - console.log(id) - func() - }, -})) - -import * as execute from './execute' -import index from './index' - -let executeMock = execute - -const modelInitOptions: any = { - modelFolder: '/path/to/model', - model: { - id: 'test', - name: 'test', - engine: 'nitro', - version: '0.0', - format: 'GGUF', - object: 'model', - sources: [], - created: 0, - description: 'test', - parameters: {}, - metadata: { - author: '', - tags: [], - size: 0, - }, - settings: { - prompt_template: '{prompt}', - llama_model_path: 'model.gguf', - }, - }, -} - -describe('loadModel', () => { - it('should load a model successfully', async () => { - // Mock the necessary parameters and system information - - const systemInfo = { - // Mock the system information if needed - } - - // Call the loadModel function - const result = await index.loadModel(modelInitOptions, systemInfo) - - // Assert that the result is as expected - expect(result).toBeUndefined() - }) - - it('should reject with an error message if the model is not a nitro model', async () => { - // Mock the necessary parameters and system information - - const systemInfo = { - // Mock the system information if needed - } - modelInitOptions.model.engine = 'not-nitro' - // Call the loadModel function - try { - await index.loadModel(modelInitOptions, systemInfo) - } catch (error) { - // Assert that the error message is as expected - expect(error).toBe('Not a cortex model') - } - modelInitOptions.model.engine = 'nitro' - }) - - it('should reject if model load failed with an error message', async () => { - // Mock the necessary parameters and system information - - const systemInfo = { - // Mock the system information if needed - } - // Mock the fetch-retry module to return a failed response - jest.mock('fetch-retry', () => ({ - default: () => () => { - return Promise.resolve({ - ok: false, - status: 500, - json: () => - Promise.resolve({ - model_loaded: false, - }), - text: () => Promise.resolve('Failed to load model'), - }) - }, - })) - - // Call the loadModel function - try { - await index.loadModel(modelInitOptions, systemInfo) - } catch (error) { - // Assert that the error message is as expected - expect(error).toBe('Failed to load model') - } - }) - - it('should reject if port not available', async () => { - // Mock the necessary parameters and system information - - const systemInfo = { - // Mock the system information if needed - } - - // Mock the tcp-port-used module to return false - jest.mock('tcp-port-used', () => ({ - default: { - waitUntilFree: () => Promise.resolve(false), - waitUntilUsed: () => Promise.resolve(false), - }, - })) - - // Call the loadModel function - try { - await index.loadModel(modelInitOptions, systemInfo) - } catch (error) { - // Assert that the error message is as expected - expect(error).toBe('Port not available') - } - }) - - it('should run on GPU model if ngl is set', async () => { - const systemInfo: any = { - gpuSetting: { - run_mode: 'gpu', - }, - } - // Spy executableNitroFile - jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({ - enginePath: '', - executablePath: '', - cudaVisibleDevices: '', - vkVisibleDevices: '', - }) - - Object.defineProperty(process, 'platform', { value: 'win32' }) - await index.loadModel( - { - ...modelInitOptions, - model: { - ...modelInitOptions.model, - settings: { - ...modelInitOptions.model.settings, - ngl: 40, - }, - }, - }, - systemInfo - ) - expect(executeMock.executableNitroFile).toHaveBeenCalledWith({ - run_mode: 'gpu', - }) - }) - - it('should run on correct CPU instructions if ngl is not set', async () => { - const systemInfo: any = { - gpuSetting: { - run_mode: 'gpu', - }, - } - // Spy executableNitroFile - jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({ - enginePath: '', - executablePath: '', - cudaVisibleDevices: '', - vkVisibleDevices: '', - }) - - Object.defineProperty(process, 'platform', { value: 'win32' }) - await index.loadModel( - { - ...modelInitOptions, - model: { - ...modelInitOptions.model, - settings: { - ...modelInitOptions.model.settings, - ngl: undefined, - }, - }, - }, - systemInfo - ) - expect(executeMock.executableNitroFile).toHaveBeenCalledWith({ - run_mode: 'cpu', - }) - }) - - it('should run on correct CPU instructions if ngl is 0', async () => { - const systemInfo: any = { - gpuSetting: { - run_mode: 'gpu', - }, - } - // Spy executableNitroFile - jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({ - enginePath: '', - executablePath: '', - cudaVisibleDevices: '', - vkVisibleDevices: '', - }) - - Object.defineProperty(process, 'platform', { value: 'win32' }) - await index.loadModel( - { - ...modelInitOptions, - model: { - ...modelInitOptions.model, - settings: { - ...modelInitOptions.model.settings, - ngl: 0, - }, - }, - }, - systemInfo - ) - expect(executeMock.executableNitroFile).toHaveBeenCalledWith({ - run_mode: 'cpu', - }) - }) -}) - -describe('unloadModel', () => { - it('should unload a model successfully', async () => { - // Call the unloadModel function - const result = await index.unloadModel() - - // Assert that the result is as expected - expect(result).toBeUndefined() - }) - - it('should reject with an error message if the model is not a nitro model', async () => { - // Call the unloadModel function - try { - await index.unloadModel() - } catch (error) { - // Assert that the error message is as expected - expect(error).toBe('Not a cortex model') - } - }) - - it('should reject if model unload failed with an error message', async () => { - // Mock the fetch-retry module to return a failed response - jest.mock('fetch-retry', () => ({ - default: () => () => { - return Promise.resolve({ - ok: false, - status: 500, - json: () => - Promise.resolve({ - model_unloaded: false, - }), - text: () => Promise.resolve('Failed to unload model'), - }) - }, - })) - - // Call the unloadModel function - try { - await index.unloadModel() - } catch (error) { - // Assert that the error message is as expected - expect(error).toBe('Failed to unload model') - } - }) - - it('should reject if port not available', async () => { - // Mock the tcp-port-used module to return false - jest.mock('tcp-port-used', () => ({ - default: { - waitUntilFree: () => Promise.resolve(false), - waitUntilUsed: () => Promise.resolve(false), - }, - })) - - // Call the unloadModel function - try { - await index.unloadModel() - } catch (error) { - // Assert that the error message is as expected - expect(error).toBe('Port not available') - } - }) -}) -describe('dispose', () => { - it('should dispose a model successfully on Mac', async () => { - Object.defineProperty(process, 'platform', { - value: 'darwin', - }) - - // Call the dispose function - const result = await index.dispose() - - // Assert that the result is as expected - expect(result).toBeUndefined() - }) - - it('should kill the subprocess successfully on Windows', async () => { - Object.defineProperty(process, 'platform', { - value: 'win32', - }) - - // Call the killSubprocess function - const result = await index.dispose() - - // Assert that the result is as expected - expect(result).toBeUndefined() - }) -}) - -describe('getCurrentNitroProcessInfo', () => { - it('should return the current nitro process info', async () => { - // Call the getCurrentNitroProcessInfo function - const result = await index.getCurrentNitroProcessInfo() - - // Assert that the result is as expected - expect(result).toEqual({ - isRunning: true, - }) - }) -}) - -describe('decompressRunner', () => { - it('should decompress the runner successfully', async () => { - jest.mock('decompress', () => ({ - default: () => { - return Promise.resolve() - }, - })) - // Call the decompressRunner function - const result = await index.decompressRunner('', '') - - // Assert that the result is as expected - expect(result).toBeUndefined() - }) - it('should not reject if decompression failed', async () => { - jest.mock('decompress', () => ({ - default: () => { - return Promise.reject('Failed to decompress') - }, - })) - // Call the decompressRunner function - const result = await index.decompressRunner('', '') - expect(result).toBeUndefined() - }) -}) - -describe('addAdditionalDependencies', () => { - it('should add additional dependencies successfully', async () => { - // Call the addAdditionalDependencies function - const result = await index.addAdditionalDependencies({ - name: 'name', - version: 'version', - }) - - // Assert that the result is as expected - expect(result).toBeUndefined() - }) -}) diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts deleted file mode 100644 index 98ca4572f..000000000 --- a/extensions/inference-nitro-extension/src/node/index.ts +++ /dev/null @@ -1,501 +0,0 @@ -import fs from 'fs' -import path from 'path' -import { ChildProcessWithoutNullStreams, spawn } from 'child_process' -import tcpPortUsed from 'tcp-port-used' -import fetchRT from 'fetch-retry' -import { - log, - getSystemResourceInfo, - InferenceEngine, - ModelSettingParams, - PromptTemplate, - SystemInformation, - getJanDataFolderPath, - ModelFile, -} from '@janhq/core/node' -import { executableNitroFile } from './execute' -import terminate from 'terminate' -import decompress from 'decompress' - -// Polyfill fetch with retry -const fetchRetry = fetchRT(fetch) - -/** - * The response object for model init operation. - */ -interface ModelInitOptions { - modelFolder: string - model: ModelFile -} -// The PORT to use for the Nitro subprocess -const PORT = 3928 -// The HOST address to use for the Nitro subprocess -const LOCAL_HOST = '127.0.0.1' -// The URL for the Nitro subprocess -const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}` -// The URL for the Nitro subprocess to load a model -const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel` -// The URL for the Nitro subprocess to validate a model -const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus` -// The URL for the Nitro subprocess to kill itself -const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` - -const NITRO_PORT_FREE_CHECK_INTERVAL = 100 - -// The supported model format -// TODO: Should be an array to support more models -const SUPPORTED_MODEL_FORMAT = '.gguf' - -// The subprocess instance for Nitro -let subprocess: ChildProcessWithoutNullStreams | undefined = undefined - -// The current model settings -let currentSettings: (ModelSettingParams & { model?: string }) | undefined = - undefined - -/** - * Stops a Nitro subprocess. - * @param wrapper - The model wrapper. - * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. - */ -function unloadModel(): Promise { - return killSubprocess() -} - -/** - * Initializes a Nitro subprocess to load a machine learning model. - * @param wrapper - The model wrapper. - * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. - * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package - */ -async function loadModel( - params: ModelInitOptions, - systemInfo?: SystemInformation -): Promise { - if (params.model.engine !== InferenceEngine.nitro) { - // Not a nitro model - return Promise.resolve() - } - - if (params.model.engine !== InferenceEngine.nitro) { - return Promise.reject('Not a cortex model') - } else { - const nitroResourceProbe = await getSystemResourceInfo() - // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt - if (params.model.settings.prompt_template) { - const promptTemplate = params.model.settings.prompt_template - const prompt = promptTemplateConverter(promptTemplate) - if (prompt?.error) { - return Promise.reject(prompt.error) - } - params.model.settings.system_prompt = prompt.system_prompt - params.model.settings.user_prompt = prompt.user_prompt - params.model.settings.ai_prompt = prompt.ai_prompt - } - - // modelFolder is the absolute path to the running model folder - // e.g. ~/jan/models/llama-2 - let modelFolder = params.modelFolder - - let llama_model_path = params.model.settings.llama_model_path - - // Absolute model path support - if ( - params.model?.sources.length && - params.model.sources.every((e) => fs.existsSync(e.url)) - ) { - llama_model_path = - params.model.sources.length === 1 - ? params.model.sources[0].url - : params.model.sources.find((e) => - e.url.includes(llama_model_path ?? params.model.id) - )?.url - } - - if (!llama_model_path || !path.isAbsolute(llama_model_path)) { - // Look for GGUF model file - const modelFiles: string[] = fs.readdirSync(modelFolder) - const ggufBinFile = modelFiles.find( - (file) => - // 1. Prioritize llama_model_path (predefined) - (llama_model_path && file === llama_model_path) || - // 2. Prioritize GGUF File (manual import) - file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) || - // 3. Fallback Model ID (for backward compatibility) - file === params.model.id - ) - if (ggufBinFile) llama_model_path = path.join(modelFolder, ggufBinFile) - } - - // Look for absolute source path for single model - - if (!llama_model_path) return Promise.reject('No GGUF model file found') - - currentSettings = { - cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), - // model.settings can override the default settings - ...params.model.settings, - llama_model_path, - model: params.model.id, - // This is critical and requires real CPU physical core count (or performance core) - ...(params.model.settings.mmproj && { - mmproj: path.isAbsolute(params.model.settings.mmproj) - ? params.model.settings.mmproj - : path.join(modelFolder, params.model.settings.mmproj), - }), - } - return runNitroAndLoadModel(params.model.id, systemInfo) - } -} - -/** - * 1. Spawn Nitro process - * 2. Load model into Nitro subprocess - * 3. Validate model status - * @returns - */ -async function runNitroAndLoadModel( - modelId: string, - systemInfo?: SystemInformation -) { - // Gather system information for CPU physical cores and memory - return killSubprocess() - .then(() => - tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) - ) - .then(() => spawnNitroProcess(systemInfo)) - .then(() => loadLLMModel(currentSettings)) - .then(() => validateModelStatus(modelId)) - .catch((err) => { - // TODO: Broadcast error so app could display proper error message - log(`[CORTEX]::Error: ${err}`) - return { error: err } - }) -} - -/** - * Parse prompt template into agrs settings - * @param promptTemplate Template as string - * @returns - */ -function promptTemplateConverter(promptTemplate: string): PromptTemplate { - // Split the string using the markers - const systemMarker = '{system_message}' - const promptMarker = '{prompt}' - - if ( - promptTemplate.includes(systemMarker) && - promptTemplate.includes(promptMarker) - ) { - // Find the indices of the markers - const systemIndex = promptTemplate.indexOf(systemMarker) - const promptIndex = promptTemplate.indexOf(promptMarker) - - // Extract the parts of the string - const system_prompt = promptTemplate.substring(0, systemIndex) - const user_prompt = promptTemplate.substring( - systemIndex + systemMarker.length, - promptIndex - ) - const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length - ) - - // Return the split parts - return { system_prompt, user_prompt, ai_prompt } - } else if (promptTemplate.includes(promptMarker)) { - // Extract the parts of the string for the case where only promptMarker is present - const promptIndex = promptTemplate.indexOf(promptMarker) - const user_prompt = promptTemplate.substring(0, promptIndex) - const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length - ) - - // Return the split parts - return { user_prompt, ai_prompt } - } - - // Return an error if none of the conditions are met - return { error: 'Cannot split prompt template' } -} - -/** - * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request. - * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. - */ -function loadLLMModel(settings: any): Promise { - if (!settings?.ngl) { - settings.ngl = 100 - } - log(`[CORTEX]:: Loading model with params ${JSON.stringify(settings)}`) - return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify(settings), - retries: 3, - retryDelay: 300, - }) - .then((res) => { - log( - `[CORTEX]:: Load model success with response ${JSON.stringify( - res - )}` - ) - return Promise.resolve(res) - }) - .catch((err) => { - log(`[CORTEX]::Error: Load model failed with error ${err}`) - return Promise.reject(err) - }) -} - -/** - * Validates the status of a model. - * @returns {Promise} A promise that resolves to an object. - * If the model is loaded successfully, the object is empty. - * If the model is not loaded successfully, the object contains an error message. - */ -async function validateModelStatus(modelId: string): Promise { - // Send a GET request to the validation URL. - // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries. - log(`[CORTEX]:: Validating model ${modelId}`) - return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { - method: 'POST', - body: JSON.stringify({ - model: modelId, - // TODO: force to use cortex llamacpp by default - engine: 'cortex.llamacpp', - }), - headers: { - 'Content-Type': 'application/json', - }, - retries: 5, - retryDelay: 300, - }).then(async (res: Response) => { - log( - `[CORTEX]:: Validate model state with response ${JSON.stringify( - res.status - )}` - ) - // If the response is OK, check model_loaded status. - if (res.ok) { - const body = await res.json() - // If the model is loaded, return an empty object. - // Otherwise, return an object with an error message. - if (body.model_loaded) { - log( - `[CORTEX]:: Validate model state success with response ${JSON.stringify( - body - )}` - ) - return Promise.resolve() - } - } - const errorBody = await res.text() - log( - `[CORTEX]:: Validate model state failed with response ${errorBody} and status is ${JSON.stringify( - res.statusText - )}` - ) - return Promise.reject('Validate model status failed') - }) -} - -/** - * Terminates the Nitro subprocess. - * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. - */ -async function killSubprocess(): Promise { - const controller = new AbortController() - setTimeout(() => controller.abort(), 5000) - log(`[CORTEX]:: Request to kill cortex`) - - const killRequest = () => { - return fetch(NITRO_HTTP_KILL_URL, { - method: 'DELETE', - signal: controller.signal, - }) - .catch(() => {}) // Do nothing with this attempt - .then(() => - tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) - ) - .then(() => log(`[CORTEX]:: cortex process is terminated`)) - .catch((err) => { - log( - `[CORTEX]:: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}` - ) - throw 'PORT_NOT_AVAILABLE' - }) - } - - if (subprocess?.pid && process.platform !== 'darwin') { - log(`[CORTEX]:: Killing PID ${subprocess.pid}`) - const pid = subprocess.pid - return new Promise((resolve, reject) => { - terminate(pid, function (err) { - if (err) { - log('[CORTEX]::Failed to kill PID - sending request to kill') - killRequest().then(resolve).catch(reject) - } else { - tcpPortUsed - .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) - .then(() => log(`[CORTEX]:: cortex process is terminated`)) - .then(() => resolve()) - .catch(() => { - log( - '[CORTEX]::Failed to kill PID (Port check timeout) - sending request to kill' - ) - killRequest().then(resolve).catch(reject) - }) - } - }) - }) - } else { - return killRequest() - } -} - -/** - * Spawns a Nitro subprocess. - * @returns A promise that resolves when the Nitro subprocess is started. - */ -function spawnNitroProcess(systemInfo?: SystemInformation): Promise { - log(`[CORTEX]:: Spawning cortex subprocess...`) - - return new Promise(async (resolve, reject) => { - let executableOptions = executableNitroFile( - // If ngl is not set or equal to 0, run on CPU with correct instructions - systemInfo?.gpuSetting - ? { - ...systemInfo.gpuSetting, - run_mode: - currentSettings?.ngl === undefined || currentSettings.ngl === 0 - ? 'cpu' - : systemInfo.gpuSetting.run_mode, - } - : undefined - ) - - const args: string[] = ['1', LOCAL_HOST, PORT.toString()] - // Execute the binary - log( - `[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}` - ) - log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`) - - // Add engine path to the PATH and LD_LIBRARY_PATH - process.env.PATH = (process.env.PATH || '').concat( - path.delimiter, - executableOptions.enginePath - ) - log(`[CORTEX] PATH: ${process.env.PATH}`) - process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( - path.delimiter, - executableOptions.enginePath - ) - - subprocess = spawn( - executableOptions.executablePath, - ['1', LOCAL_HOST, PORT.toString()], - { - cwd: path.join(path.parse(executableOptions.executablePath).dir), - env: { - ...process.env, - ENGINE_PATH: executableOptions.enginePath, - CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, - // Vulkan - Support 1 device at a time for now - ...(executableOptions.vkVisibleDevices?.length > 0 && { - GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], - }), - }, - } - ) - - // Handle subprocess output - subprocess.stdout.on('data', (data: any) => { - log(`[CORTEX]:: ${data}`) - }) - - subprocess.stderr.on('data', (data: any) => { - log(`[CORTEX]::Error: ${data}`) - }) - - subprocess.on('close', (code: any) => { - log(`[CORTEX]:: cortex exited with code: ${code}`) - subprocess = undefined - reject(`child process exited with code ${code}`) - }) - - tcpPortUsed - .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000) - .then(() => { - log(`[CORTEX]:: cortex is ready`) - resolve() - }) - }) -} - -/** - * Every module should have a dispose function - * This will be called when the extension is unloaded and should clean up any resources - * Also called when app is closed - */ -function dispose() { - // clean other registered resources here - killSubprocess() -} - -/** - * Nitro process info - */ -export interface NitroProcessInfo { - isRunning: boolean -} - -/** - * Retrieve current nitro process - */ -const getCurrentNitroProcessInfo = (): NitroProcessInfo => { - return { - isRunning: subprocess != null, - } -} - -const addAdditionalDependencies = (data: { name: string; version: string }) => { - log( - `[CORTEX]::Debug: Adding additional dependencies for ${data.name} ${data.version}` - ) - const additionalPath = path.delimiter.concat( - path.join(getJanDataFolderPath(), 'engines', data.name, data.version) - ) - // Set the updated PATH - process.env.PATH = (process.env.PATH || '').concat( - path.delimiter, - additionalPath - ) - process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( - path.delimiter, - additionalPath - ) -} - -const decompressRunner = async (zipPath: string, output: string) => { - console.debug(`Decompressing ${zipPath} to ${output}...`) - try { - const files = await decompress(zipPath, output) - console.debug('Decompress finished!', files) - } catch (err) { - console.error(`Decompress ${zipPath} failed: ${err}`) - } -} - -export default { - loadModel, - unloadModel, - dispose, - getCurrentNitroProcessInfo, - addAdditionalDependencies, - decompressRunner, -} diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json index 3a694e5a0..bd834454a 100644 --- a/extensions/model-extension/package.json +++ b/extensions/model-extension/package.json @@ -4,7 +4,6 @@ "version": "1.0.34", "description": "Model Management Extension provides model exploration and seamless downloads", "main": "dist/index.js", - "node": "dist/node/index.cjs.js", "author": "Jan ", "license": "AGPL-3.0", "scripts": { @@ -36,15 +35,9 @@ "README.md" ], "dependencies": { - "@huggingface/gguf": "^0.0.11", - "@huggingface/jinja": "^0.3.0", "@janhq/core": "file:../../core", - "hyllama": "^0.2.2", - "python-shell": "^5.0.0" + "ky": "^1.7.2", + "p-queue": "^8.0.1" }, - "bundleDependencies": [ - "hyllama", - "@huggingface/gguf", - "@huggingface/jinja" - ] + "bundleDependencies": [] } diff --git a/extensions/model-extension/resources/default-model.json b/extensions/model-extension/resources/default-model.json deleted file mode 100644 index c02008cd6..000000000 --- a/extensions/model-extension/resources/default-model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "object": "model", - "version": "1.0", - "format": "gguf", - "sources": [ - { - "url": "N/A", - "filename": "N/A" - } - ], - "id": "N/A", - "name": "N/A", - "created": 0, - "description": "User self import model", - "settings": { - "ctx_len": 2048, - "embedding": false, - "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:", - "llama_model_path": "N/A" - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 2048, - "stop": ["<|END_OF_TURN_TOKEN|>", "", "[/INST]", "<|end_of_text|>", "<|eot_id|>", "<|im_end|>", "<|end|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "User", - "tags": [], - "size": 0 - }, - "engine": "nitro" -} diff --git a/extensions/model-extension/rollup.config.ts b/extensions/model-extension/rollup.config.ts index d36d8ffac..6e506140f 100644 --- a/extensions/model-extension/rollup.config.ts +++ b/extensions/model-extension/rollup.config.ts @@ -6,7 +6,6 @@ import replace from '@rollup/plugin-replace' import commonjs from '@rollup/plugin-commonjs' const settingJson = require('./resources/settings.json') const packageJson = require('./package.json') -const defaultModelJson = require('./resources/default-model.json') export default [ { @@ -20,17 +19,18 @@ export default [ plugins: [ replace({ preventAssignment: true, - DEFAULT_MODEL: JSON.stringify(defaultModelJson), SETTINGS: JSON.stringify(settingJson), - NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), }), // Allow json resolution json(), // Compile TypeScript files - typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }), + typescript({ + useTsconfigDeclarationDir: true, + exclude: ['**/__tests__', '**/*.test.ts'], + }), // Compile TypeScript files // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) - // commonjs(), + commonjs(), // Allow node_modules resolution, so you can use 'external' to control // which external modules to include in the bundle // https://github.com/rollup/rollup-plugin-node-resolve#usage @@ -39,39 +39,6 @@ export default [ browser: true, }), - // Resolve source maps to the original source - sourceMaps(), - ], - }, - { - input: `src/node/index.ts`, - output: [ - { - file: 'dist/node/index.cjs.js', - format: 'cjs', - sourcemap: true, - inlineDynamicImports: true, - }, - ], - // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') - external: ['@janhq/core/node'], - watch: { - include: 'src/node/**', - }, - plugins: [ - // Allow json resolution - json(), - // Compile TypeScript files - typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }), - // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) - commonjs(), - // Allow node_modules resolution, so you can use 'external' to control - // which external modules to include in the bundle - // https://github.com/rollup/rollup-plugin-node-resolve#usage - resolve({ - extensions: ['.ts', '.js', '.json'], - }), - // Resolve source maps to the original source sourceMaps(), ], diff --git a/extensions/model-extension/src/@types/InvalidHostError.ts b/extensions/model-extension/src/@types/InvalidHostError.ts deleted file mode 100644 index 47262206e..000000000 --- a/extensions/model-extension/src/@types/InvalidHostError.ts +++ /dev/null @@ -1,6 +0,0 @@ -export class InvalidHostError extends Error { - constructor(message: string) { - super(message) - this.name = 'InvalidHostError' - } -} diff --git a/extensions/model-extension/src/@types/NotSupportModelError.ts b/extensions/model-extension/src/@types/NotSupportModelError.ts deleted file mode 100644 index 0a1946176..000000000 --- a/extensions/model-extension/src/@types/NotSupportModelError.ts +++ /dev/null @@ -1,6 +0,0 @@ -export class NotSupportedModelError extends Error { - constructor(message: string) { - super(message) - this.name = 'NotSupportedModelError' - } -} diff --git a/extensions/model-extension/src/@types/global.d.ts b/extensions/model-extension/src/@types/global.d.ts index 3878d4bf2..01bd272f2 100644 --- a/extensions/model-extension/src/@types/global.d.ts +++ b/extensions/model-extension/src/@types/global.d.ts @@ -1,6 +1,5 @@ export {} declare global { - declare const DEFAULT_MODEL: object declare const NODE: string interface Core { diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts new file mode 100644 index 000000000..685bf3b9f --- /dev/null +++ b/extensions/model-extension/src/cortex.ts @@ -0,0 +1,166 @@ +import PQueue from 'p-queue' +import ky from 'ky' +import { + DownloadEvent, + events, + Model, + ModelRuntimeParams, + ModelSettingParams, +} from '@janhq/core' +/** + * cortex.cpp Model APIs interface + */ +interface ICortexAPI { + getModel(model: string): Promise + getModels(): Promise + pullModel(model: string): Promise + importModel(path: string, modelPath: string): Promise + deleteModel(model: string): Promise + updateModel(model: object): Promise + cancelModelPull(model: string): Promise +} +/** + * Simple CortexAPI service + * It could be replaced by cortex client sdk later on + */ +const API_URL = 'http://127.0.0.1:39291' +const SOCKET_URL = 'ws://127.0.0.1:39291' + +type ModelList = { + data: any[] +} + +export class CortexAPI implements ICortexAPI { + queue = new PQueue({ concurrency: 1 }) + socket?: WebSocket = undefined + + constructor() { + this.queue.add(() => this.healthz()) + this.subscribeToEvents() + } + + getModel(model: string): Promise { + return this.queue.add(() => + ky + .get(`${API_URL}/v1/models/${model}`) + .json() + .then((e) => this.transformModel(e)) + ) + } + + getModels(): Promise { + return this.queue + .add(() => ky.get(`${API_URL}/models`).json()) + .then((e) => + typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : [] + ) + } + + pullModel(model: string): Promise { + return this.queue.add(() => + ky + .post(`${API_URL}/v1/models/pull`, { json: { model } }) + .json() + .catch(async (e) => { + throw (await e.response?.json()) ?? e + }) + .then() + ) + } + + importModel(model: string, modelPath: string): Promise { + return this.queue.add(() => + ky + .post(`${API_URL}/v1/models/import`, { json: { model, modelPath } }) + .json() + .catch((e) => console.debug(e)) // Ignore error + .then() + ) + } + + deleteModel(model: string): Promise { + return this.queue.add(() => + ky.delete(`${API_URL}/models/${model}`).json().then() + ) + } + + updateModel(model: object): Promise { + return this.queue.add(() => + ky + .patch(`${API_URL}/v1/models/${model}`, { json: { model } }) + .json() + .then() + ) + } + cancelModelPull(model: string): Promise { + return this.queue.add(() => + ky + .delete(`${API_URL}/models/pull`, { json: { taskId: model } }) + .json() + .then() + ) + } + + healthz(): Promise { + return ky + .get(`${API_URL}/healthz`, { + retry: { + limit: 10, + methods: ['get'], + }, + }) + .then(() => {}) + } + + subscribeToEvents() { + this.queue.add( + () => + new Promise((resolve) => { + this.socket = new WebSocket(`${SOCKET_URL}/events`) + console.log('Socket connected') + + this.socket.addEventListener('message', (event) => { + const data = JSON.parse(event.data) + const transferred = data.task.items.reduce( + (accumulator, currentValue) => + accumulator + currentValue.downloadedBytes, + 0 + ) + const total = data.task.items.reduce( + (accumulator, currentValue) => accumulator + currentValue.bytes, + 0 + ) + const percent = ((transferred ?? 1) / (total ?? 1)) * 100 + + events.emit(data.type, { + modelId: data.task.id, + percent: percent, + size: { + transferred: transferred, + total: total, + }, + }) + }) + resolve() + }) + ) + } + + private transformModel(model: any) { + model.parameters = setParameters(model) + model.settings = setParameters(model) + model.metadata = { + tags: [], + } + return model as Model + } +} + +type FilteredParams = { + [K in keyof T]: T[K] +} + +function setParameters(params: T): T { + const filteredParams: FilteredParams = { ...params } + return filteredParams +} diff --git a/extensions/model-extension/src/helpers/path.test.ts b/extensions/model-extension/src/helpers/path.test.ts deleted file mode 100644 index 64ca65d8a..000000000 --- a/extensions/model-extension/src/helpers/path.test.ts +++ /dev/null @@ -1,87 +0,0 @@ -import { extractFileName } from './path'; - -describe('extractFileName Function', () => { - it('should correctly extract the file name with the provided file extension', () => { - const url = 'http://example.com/some/path/to/file.ext'; - const fileExtension = '.ext'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('file.ext'); - }); - - it('should correctly append the file extension if it does not already exist in the file name', () => { - const url = 'http://example.com/some/path/to/file'; - const fileExtension = '.txt'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('file.txt'); - }); - - it('should handle cases where the URL does not have a file extension correctly', () => { - const url = 'http://example.com/some/path/to/file'; - const fileExtension = '.jpg'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('file.jpg'); - }); - - it('should correctly handle URLs without a trailing slash', () => { - const url = 'http://example.com/some/path/tofile'; - const fileExtension = '.txt'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('tofile.txt'); - }); - - it('should correctly handle URLs with multiple file extensions', () => { - const url = 'http://example.com/some/path/tofile.tar.gz'; - const fileExtension = '.gz'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('tofile.tar.gz'); - }); - - it('should correctly handle URLs with special characters', () => { - const url = 'http://example.com/some/path/tófílë.extë'; - const fileExtension = '.extë'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('tófílë.extë'); - }); - - it('should correctly handle URLs that are just a file with no path', () => { - const url = 'http://example.com/file.txt'; - const fileExtension = '.txt'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('file.txt'); - }); - - it('should correctly handle URLs that have special query parameters', () => { - const url = 'http://example.com/some/path/tofile.ext?query=1'; - const fileExtension = '.ext'; - const fileName = extractFileName(url.split('?')[0], fileExtension); - expect(fileName).toBe('tofile.ext'); - }); - - it('should correctly handle URLs that have uppercase characters', () => { - const url = 'http://EXAMPLE.COM/PATH/TO/FILE.EXT'; - const fileExtension = '.ext'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('FILE.EXT'); - }); - - it('should correctly handle invalid URLs', () => { - const url = 'invalid-url'; - const fileExtension = '.txt'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('invalid-url.txt'); - }); - - it('should correctly handle empty URLs', () => { - const url = ''; - const fileExtension = '.txt'; - const fileName = extractFileName(url, fileExtension); - expect(fileName).toBe('.txt'); - }); - - it('should correctly handle undefined URLs', () => { - const url = undefined; - const fileExtension = '.txt'; - const fileName = extractFileName(url as any, fileExtension); - expect(fileName).toBe('.txt'); - }); -}); diff --git a/extensions/model-extension/src/helpers/path.ts b/extensions/model-extension/src/helpers/path.ts deleted file mode 100644 index 6091005b8..000000000 --- a/extensions/model-extension/src/helpers/path.ts +++ /dev/null @@ -1,13 +0,0 @@ -/** - * try to retrieve the download file name from the source url - */ - -export function extractFileName(url: string, fileExtension: string): string { - if(!url) return fileExtension - - const extractedFileName = url.split('/').pop() - const fileName = extractedFileName.toLowerCase().endsWith(fileExtension) - ? extractedFileName - : extractedFileName + fileExtension - return fileName -} diff --git a/extensions/model-extension/src/index.test.ts b/extensions/model-extension/src/index.test.ts index 3f804b6d6..05598c30d 100644 --- a/extensions/model-extension/src/index.test.ts +++ b/extensions/model-extension/src/index.test.ts @@ -1,846 +1,90 @@ -/** - * @jest-environment jsdom - */ -const readDirSyncMock = jest.fn() -const existMock = jest.fn() -const readFileSyncMock = jest.fn() -const downloadMock = jest.fn() -const mkdirMock = jest.fn() -const writeFileSyncMock = jest.fn() -const copyFileMock = jest.fn() -const dirNameMock = jest.fn() -const executeMock = jest.fn() +import JanModelExtension from './index' +import { Model } from '@janhq/core' + +let SETTINGS = [] +// @ts-ignore +global.SETTINGS = SETTINGS jest.mock('@janhq/core', () => ({ ...jest.requireActual('@janhq/core/node'), events: { emit: jest.fn(), }, - fs: { - existsSync: existMock, - readdirSync: readDirSyncMock, - readFileSync: readFileSyncMock, - writeFileSync: writeFileSyncMock, - mkdir: mkdirMock, - copyFile: copyFileMock, - fileStat: () => ({ - isDirectory: false, - }), - }, - dirName: dirNameMock, joinPath: (paths) => paths.join('/'), - ModelExtension: jest.fn(), - downloadFile: downloadMock, - executeOnMain: executeMock, + ModelExtension: jest.fn().mockImplementation(function () { + // @ts-ignore + this.registerSettings = () => { + return Promise.resolve() + } + // @ts-ignore + return this + }), })) -jest.mock('@huggingface/gguf') - -global.fetch = jest.fn(() => - Promise.resolve({ - json: () => Promise.resolve({ test: 100 }), - arrayBuffer: jest.fn(), - }) -) as jest.Mock - -import JanModelExtension from '.' -import { fs, dirName } from '@janhq/core' -import { gguf } from '@huggingface/gguf' - describe('JanModelExtension', () => { - let sut: JanModelExtension - - beforeAll(() => { - // @ts-ignore - sut = new JanModelExtension() - }) + let extension: JanModelExtension + let mockCortexAPI: any beforeEach(() => { - jest.clearAllMocks() - }) - - describe('getConfiguredModels', () => { - describe("when there's no models are pre-populated", () => { - it('should return empty array', async () => { - // Mock configured models data - const configuredModels = [] - existMock.mockReturnValue(true) - readDirSyncMock.mockReturnValue([]) - - const result = await sut.getConfiguredModels() - expect(result).toEqual([]) - }) - }) - - describe("when there's are pre-populated models - all flattened", () => { - it('returns configured models data - flatten folder - with correct file_path and model id', async () => { - // Mock configured models data - const configuredModels = [ - { - id: '1', - name: 'Model 1', - version: '1.0.0', - description: 'Model 1 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model1', - }, - format: 'onnx', - sources: [], - created: new Date(), - updated: new Date(), - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - { - id: '2', - name: 'Model 2', - version: '2.0.0', - description: 'Model 2 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model2', - }, - format: 'onnx', - sources: [], - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - ] - existMock.mockReturnValue(true) - - readDirSyncMock.mockImplementation((path) => { - if (path === 'file://models') return ['model1', 'model2'] - else return ['model.json'] - }) - - readFileSyncMock.mockImplementation((path) => { - if (path.includes('model1')) - return JSON.stringify(configuredModels[0]) - else return JSON.stringify(configuredModels[1]) - }) - - const result = await sut.getConfiguredModels() - expect(result).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - file_path: 'file://models/model1/model.json', - id: '1', - }), - expect.objectContaining({ - file_path: 'file://models/model2/model.json', - id: '2', - }), - ]) - ) - }) - }) - - describe("when there's are pre-populated models - there are nested folders", () => { - it('returns configured models data - flatten folder - with correct file_path and model id', async () => { - // Mock configured models data - const configuredModels = [ - { - id: '1', - name: 'Model 1', - version: '1.0.0', - description: 'Model 1 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model1', - }, - format: 'onnx', - sources: [], - created: new Date(), - updated: new Date(), - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - { - id: '2', - name: 'Model 2', - version: '2.0.0', - description: 'Model 2 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model2', - }, - format: 'onnx', - sources: [], - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - ] - existMock.mockReturnValue(true) - - readDirSyncMock.mockImplementation((path) => { - if (path === 'file://models') return ['model1', 'model2/model2-1'] - else return ['model.json'] - }) - - readFileSyncMock.mockImplementation((path) => { - if (path.includes('model1')) - return JSON.stringify(configuredModels[0]) - else if (path.includes('model2/model2-1')) - return JSON.stringify(configuredModels[1]) - }) - - const result = await sut.getConfiguredModels() - expect(result).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - file_path: 'file://models/model1/model.json', - id: '1', - }), - expect.objectContaining({ - file_path: 'file://models/model2/model2-1/model.json', - id: '2', - }), - ]) - ) - }) - }) - }) - - describe('getDownloadedModels', () => { - describe('no models downloaded', () => { - it('should return empty array', async () => { - // Mock downloaded models data - existMock.mockReturnValue(true) - readDirSyncMock.mockReturnValue([]) - - const result = await sut.getDownloadedModels() - expect(result).toEqual([]) - }) - }) - describe('only one model is downloaded', () => { - describe('flatten folder', () => { - it('returns downloaded models - with correct file_path and model id', async () => { - // Mock configured models data - const configuredModels = [ - { - id: '1', - name: 'Model 1', - version: '1.0.0', - description: 'Model 1 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model1', - }, - format: 'onnx', - sources: [], - created: new Date(), - updated: new Date(), - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - { - id: '2', - name: 'Model 2', - version: '2.0.0', - description: 'Model 2 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model2', - }, - format: 'onnx', - sources: [], - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - ] - existMock.mockReturnValue(true) - - readDirSyncMock.mockImplementation((path) => { - if (path === 'file://models') return ['model1', 'model2'] - else if (path === 'file://models/model1') - return ['model.json', 'test.gguf'] - else return ['model.json'] - }) - - readFileSyncMock.mockImplementation((path) => { - if (path.includes('model1')) - return JSON.stringify(configuredModels[0]) - else return JSON.stringify(configuredModels[1]) - }) - - const result = await sut.getDownloadedModels() - expect(result).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - file_path: 'file://models/model1/model.json', - id: '1', - }), - ]) - ) - }) - }) - }) - - describe('all models are downloaded', () => { - describe('nested folders', () => { - it('returns downloaded models - with correct file_path and model id', async () => { - // Mock configured models data - const configuredModels = [ - { - id: '1', - name: 'Model 1', - version: '1.0.0', - description: 'Model 1 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model1', - }, - format: 'onnx', - sources: [], - created: new Date(), - updated: new Date(), - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - { - id: '2', - name: 'Model 2', - version: '2.0.0', - description: 'Model 2 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model2', - }, - format: 'onnx', - sources: [], - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - ] - existMock.mockReturnValue(true) - - readDirSyncMock.mockImplementation((path) => { - if (path === 'file://models') return ['model1', 'model2/model2-1'] - else return ['model.json', 'test.gguf'] - }) - - readFileSyncMock.mockImplementation((path) => { - if (path.includes('model1')) - return JSON.stringify(configuredModels[0]) - else return JSON.stringify(configuredModels[1]) - }) - - const result = await sut.getDownloadedModels() - expect(result).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - file_path: 'file://models/model1/model.json', - id: '1', - }), - expect.objectContaining({ - file_path: 'file://models/model2/model2-1/model.json', - id: '2', - }), - ]) - ) - }) - }) - }) - - describe('all models are downloaded with uppercased GGUF files', () => { - it('returns downloaded models - with correct file_path and model id', async () => { - // Mock configured models data - const configuredModels = [ - { - id: '1', - name: 'Model 1', - version: '1.0.0', - description: 'Model 1 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model1', - }, - format: 'onnx', - sources: [], - created: new Date(), - updated: new Date(), - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - { - id: '2', - name: 'Model 2', - version: '2.0.0', - description: 'Model 2 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model2', - }, - format: 'onnx', - sources: [], - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - ] - existMock.mockReturnValue(true) - - readDirSyncMock.mockImplementation((path) => { - if (path === 'file://models') return ['model1', 'model2/model2-1'] - else if (path === 'file://models/model1') - return ['model.json', 'test.GGUF'] - else return ['model.json', 'test.gguf'] - }) - - readFileSyncMock.mockImplementation((path) => { - if (path.includes('model1')) - return JSON.stringify(configuredModels[0]) - else return JSON.stringify(configuredModels[1]) - }) - - const result = await sut.getDownloadedModels() - expect(result).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - file_path: 'file://models/model1/model.json', - id: '1', - }), - expect.objectContaining({ - file_path: 'file://models/model2/model2-1/model.json', - id: '2', - }), - ]) - ) - }) - }) - - describe('all models are downloaded - GGUF & Tensort RT', () => { - it('returns downloaded models - with correct file_path and model id', async () => { - // Mock configured models data - const configuredModels = [ - { - id: '1', - name: 'Model 1', - version: '1.0.0', - description: 'Model 1 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model1', - }, - format: 'onnx', - sources: [], - created: new Date(), - updated: new Date(), - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - { - id: '2', - name: 'Model 2', - version: '2.0.0', - description: 'Model 2 description', - object: { - type: 'model', - uri: 'http://localhost:5000/models/model2', - }, - format: 'onnx', - sources: [], - parameters: {}, - settings: {}, - metadata: {}, - engine: 'test', - } as any, - ] - existMock.mockReturnValue(true) - - readDirSyncMock.mockImplementation((path) => { - if (path === 'file://models') return ['model1', 'model2/model2-1'] - else if (path === 'file://models/model1') - return ['model.json', 'test.gguf'] - else return ['model.json', 'test.engine'] - }) - - readFileSyncMock.mockImplementation((path) => { - if (path.includes('model1')) - return JSON.stringify(configuredModels[0]) - else return JSON.stringify(configuredModels[1]) - }) - - const result = await sut.getDownloadedModels() - expect(result).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - file_path: 'file://models/model1/model.json', - id: '1', - }), - expect.objectContaining({ - file_path: 'file://models/model2/model2-1/model.json', - id: '2', - }), - ]) - ) - }) - }) - }) - - describe('deleteModel', () => { - describe('model is a GGUF model', () => { - it('should delete the GGUF file', async () => { - fs.unlinkSync = jest.fn() - const dirMock = dirName as jest.Mock - dirMock.mockReturnValue('file://models/model1') - - fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({})) - - readDirSyncMock.mockImplementation((path) => { - return ['model.json', 'test.gguf'] - }) - - existMock.mockReturnValue(true) - - await sut.deleteModel({ - file_path: 'file://models/model1/model.json', - } as any) - - expect(fs.unlinkSync).toHaveBeenCalledWith( - 'file://models/model1/test.gguf' - ) - }) - - it('no gguf file presented', async () => { - fs.unlinkSync = jest.fn() - const dirMock = dirName as jest.Mock - dirMock.mockReturnValue('file://models/model1') - - fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({})) - - readDirSyncMock.mockReturnValue(['model.json']) - - existMock.mockReturnValue(true) - - await sut.deleteModel({ - file_path: 'file://models/model1/model.json', - } as any) - - expect(fs.unlinkSync).toHaveBeenCalledTimes(0) - }) - - it('delete an imported model', async () => { - fs.rm = jest.fn() - const dirMock = dirName as jest.Mock - dirMock.mockReturnValue('file://models/model1') - - readDirSyncMock.mockReturnValue(['model.json', 'test.gguf']) - - // MARK: This is a tricky logic implement? - // I will just add test for now but will align on the legacy implementation - fs.readFileSync = jest.fn().mockReturnValue( - JSON.stringify({ - metadata: { - author: 'user', - }, - }) - ) - - existMock.mockReturnValue(true) - - await sut.deleteModel({ - file_path: 'file://models/model1/model.json', - } as any) - - expect(fs.rm).toHaveBeenCalledWith('file://models/model1') - }) - - it('delete tensorrt-models', async () => { - fs.rm = jest.fn() - const dirMock = dirName as jest.Mock - dirMock.mockReturnValue('file://models/model1') - - readDirSyncMock.mockReturnValue(['model.json', 'test.engine']) - - fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({})) - - existMock.mockReturnValue(true) - - await sut.deleteModel({ - file_path: 'file://models/model1/model.json', - } as any) - - expect(fs.unlinkSync).toHaveBeenCalledWith( - 'file://models/model1/test.engine' - ) - }) - }) - }) - - describe('downloadModel', () => { - const model: any = { - id: 'model-id', - name: 'Test Model', - sources: [ - { url: 'http://example.com/model.gguf', filename: 'model.gguf' }, - ], - engine: 'test-engine', + mockCortexAPI = { + getModels: jest.fn().mockResolvedValue([]), + pullModel: jest.fn().mockResolvedValue(undefined), + importModel: jest.fn().mockResolvedValue(undefined), + deleteModel: jest.fn().mockResolvedValue(undefined), + updateModel: jest.fn().mockResolvedValue({}), + cancelModelPull: jest.fn().mockResolvedValue(undefined), } - const network = { - ignoreSSL: true, - proxy: 'http://proxy.example.com', - } + // @ts-ignore + extension = new JanModelExtension() + extension.cortexAPI = mockCortexAPI + }) - const gpuSettings: any = { - gpus: [{ name: 'nvidia-rtx-3080', arch: 'ampere' }], - } + it('should register settings on load', async () => { + // @ts-ignore + const registerSettingsSpy = jest.spyOn(extension, 'registerSettings') + await extension.onLoad() + expect(registerSettingsSpy).toHaveBeenCalledWith(SETTINGS) + }) - it('should reject with invalid gguf metadata', async () => { - existMock.mockImplementation(() => false) + it('should pull a model', async () => { + const model = 'test-model' + await extension.pullModel(model) + expect(mockCortexAPI.pullModel).toHaveBeenCalledWith(model) + }) - expect( - sut.downloadModel(model, gpuSettings, network) - ).rejects.toBeTruthy() - }) + it('should cancel model download', async () => { + const model = 'test-model' + await extension.cancelModelPull(model) + expect(mockCortexAPI.cancelModelPull).toHaveBeenCalledWith(model) + }) - it('should download corresponding ID', async () => { - existMock.mockImplementation(() => true) - dirNameMock.mockImplementation(() => 'file://models/model1') - downloadMock.mockImplementation(() => { - return Promise.resolve({}) - }) + it('should delete a model', async () => { + const model = 'test-model' + await extension.deleteModel(model) + expect(mockCortexAPI.deleteModel).toHaveBeenCalledWith(model) + }) - expect( - await sut.downloadModel( - { ...model, file_path: 'file://models/model1/model.json' }, - gpuSettings, - network - ) - ).toBeUndefined() + it('should get all models', async () => { + const models = await extension.getModels() + expect(models).toEqual([]) + expect(mockCortexAPI.getModels).toHaveBeenCalled() + }) - expect(downloadMock).toHaveBeenCalledWith( - { - localPath: 'file://models/model1/model.gguf', - modelId: 'model-id', - url: 'http://example.com/model.gguf', - }, - { ignoreSSL: true, proxy: 'http://proxy.example.com' } - ) - }) + it('should update a model', async () => { + const model: Partial = { id: 'test-model' } + const updatedModel = await extension.updateModel(model) + expect(updatedModel).toEqual({}) + expect(mockCortexAPI.updateModel).toHaveBeenCalledWith(model) + }) - it('should handle invalid model file', async () => { - executeMock.mockResolvedValue({}) - - fs.readFileSync = jest.fn(() => { - return JSON.stringify({ metadata: { author: 'user' } }) - }) - - expect( - sut.downloadModel( - { ...model, file_path: 'file://models/model1/model.json' }, - gpuSettings, - network - ) - ).resolves.not.toThrow() - - expect(downloadMock).not.toHaveBeenCalled() - }) - it('should handle model file with no sources', async () => { - executeMock.mockResolvedValue({}) - const modelWithoutSources = { ...model, sources: [] } - - expect( - sut.downloadModel( - { - ...modelWithoutSources, - file_path: 'file://models/model1/model.json', - }, - gpuSettings, - network - ) - ).resolves.toBe(undefined) - - expect(downloadMock).not.toHaveBeenCalled() - }) - - it('should handle model file with multiple sources', async () => { - const modelWithMultipleSources = { - ...model, - sources: [ - { url: 'http://example.com/model1.gguf', filename: 'model1.gguf' }, - { url: 'http://example.com/model2.gguf', filename: 'model2.gguf' }, - ], - } - - executeMock.mockResolvedValue({ - metadata: { 'tokenizer.ggml.eos_token_id': 0 }, - }) - ;(gguf as jest.Mock).mockResolvedValue({ - metadata: { 'tokenizer.ggml.eos_token_id': 0 }, - }) - // @ts-ignore - global.NODE = 'node' - // @ts-ignore - global.DEFAULT_MODEL = { - parameters: { stop: [] }, - } - downloadMock.mockImplementation(() => { - return Promise.resolve({}) - }) - - expect( - await sut.downloadModel( - { - ...modelWithMultipleSources, - file_path: 'file://models/model1/model.json', - }, - gpuSettings, - network - ) - ).toBeUndefined() - - expect(downloadMock).toHaveBeenCalledWith( - { - localPath: 'file://models/model1/model1.gguf', - modelId: 'model-id', - url: 'http://example.com/model1.gguf', - }, - { ignoreSSL: true, proxy: 'http://proxy.example.com' } - ) - - expect(downloadMock).toHaveBeenCalledWith( - { - localPath: 'file://models/model1/model2.gguf', - modelId: 'model-id', - url: 'http://example.com/model2.gguf', - }, - { ignoreSSL: true, proxy: 'http://proxy.example.com' } - ) - }) - - it('should handle model file with no file_path', async () => { - executeMock.mockResolvedValue({ - metadata: { 'tokenizer.ggml.eos_token_id': 0 }, - }) - ;(gguf as jest.Mock).mockResolvedValue({ - metadata: { 'tokenizer.ggml.eos_token_id': 0 }, - }) - // @ts-ignore - global.NODE = 'node' - // @ts-ignore - global.DEFAULT_MODEL = { - parameters: { stop: [] }, - } - const modelWithoutFilepath = { ...model, file_path: undefined } - - await sut.downloadModel(modelWithoutFilepath, gpuSettings, network) - - expect(downloadMock).toHaveBeenCalledWith( - expect.objectContaining({ - localPath: 'file://models/model-id/model.gguf', - }), - expect.anything() - ) - }) - - it('should handle model file with invalid file_path', async () => { - executeMock.mockResolvedValue({ - metadata: { 'tokenizer.ggml.eos_token_id': 0 }, - }) - ;(gguf as jest.Mock).mockResolvedValue({ - metadata: { 'tokenizer.ggml.eos_token_id': 0 }, - }) - // @ts-ignore - global.NODE = 'node' - // @ts-ignore - global.DEFAULT_MODEL = { - parameters: { stop: [] }, - } - const modelWithInvalidFilepath = { - ...model, - file_path: 'file://models/invalid-model.json', - } - - await sut.downloadModel(modelWithInvalidFilepath, gpuSettings, network) - - expect(downloadMock).toHaveBeenCalledWith( - expect.objectContaining({ - localPath: 'file://models/model1/model.gguf', - }), - expect.anything() - ) - }) - - it('should handle model with valid chat_template', async () => { - executeMock.mockResolvedValue('{prompt}') - ;(gguf as jest.Mock).mockResolvedValue({ - metadata: {}, - }) - // @ts-ignore - global.NODE = 'node' - // @ts-ignore - global.DEFAULT_MODEL = { - parameters: { stop: [] }, - settings: { - prompt_template: '<|im-start|>{prompt}<|im-end|>', - }, - } - - const result = await sut.retrieveGGUFMetadata({}) - - expect(result).toEqual({ - parameters: { - stop: [], - }, - settings: { - ctx_len: 4096, - ngl: 33, - prompt_template: '{prompt}', - }, - }) - }) - - it('should handle model without chat_template', async () => { - executeMock.mockRejectedValue({}) - ;(gguf as jest.Mock).mockResolvedValue({ - metadata: {}, - }) - // @ts-ignore - global.NODE = 'node' - // @ts-ignore - global.DEFAULT_MODEL = { - parameters: { stop: [] }, - settings: { - prompt_template: '<|im-start|>{prompt}<|im-end|>', - }, - } - - const result = await sut.retrieveGGUFMetadata({}) - - expect(result).toEqual({ - parameters: { - stop: [], - }, - settings: { - ctx_len: 4096, - ngl: 33, - prompt_template: '<|im-start|>{prompt}<|im-end|>', - }, - }) - }) + it('should import a model', async () => { + const model: any = { path: 'test-path' } + const optionType: any = 'test-option' + await extension.importModel(model, optionType) + expect(mockCortexAPI.importModel).toHaveBeenCalledWith( + model.path, + optionType + ) }) }) diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts index 7e7c12469..b879e0bb9 100644 --- a/extensions/model-extension/src/index.ts +++ b/extensions/model-extension/src/index.ts @@ -1,66 +1,47 @@ import { - fs, - downloadFile, - abortDownload, - InferenceEngine, - joinPath, ModelExtension, Model, - getJanDataFolderPath, - events, - DownloadEvent, - DownloadRoute, - DownloadState, - OptionType, - ImportingModel, - LocalImportModelEvent, - baseName, - GpuSetting, - DownloadRequest, - executeOnMain, - HuggingFaceRepoData, - getFileSize, - AllQuantizations, - ModelEvent, - ModelFile, + InferenceEngine, + fs, + joinPath, dirName, } from '@janhq/core' - -import { extractFileName } from './helpers/path' -import { GGUFMetadata, gguf } from '@huggingface/gguf' -import { NotSupportedModelError } from './@types/NotSupportModelError' -import { InvalidHostError } from './@types/InvalidHostError' +import { CortexAPI } from './cortex' declare const SETTINGS: Array + +/** + * TODO: Set env for HF access token? or via API request? + */ enum Settings { huggingFaceAccessToken = 'hugging-face-access-token', } +/** + * Extension enum + */ +enum ExtensionEnum { + downloadedModels = 'downloadedModels', +} + /** * A extension for models */ export default class JanModelExtension extends ModelExtension { private static readonly _homeDir = 'file://models' - private static readonly _modelMetadataFileName = 'model.json' - private static readonly _supportedModelFormat = '.gguf' - private static readonly _incompletedModelFileName = '.download' - private static readonly _offlineInferenceEngine = [ - InferenceEngine.nitro, - InferenceEngine.nitro_tensorrt_llm, - ] - private static readonly _tensorRtEngineFormat = '.engine' - private static readonly _supportedGpuArch = ['ampere', 'ada'] - - interrupted = false + cortexAPI: CortexAPI = new CortexAPI() /** * Called when the extension is loaded. * @override */ async onLoad() { - // Handle Desktop Events this.registerSettings(SETTINGS) - this.handleDesktopEvents() + + // Try get models from cortex.cpp + this.getModels().then((models) => { + this.registerModels(models) + }) } /** @@ -72,384 +53,145 @@ export default class JanModelExtension extends ModelExtension { /** * Downloads a machine learning model. * @param model - The model to download. - * @param network - Optional object to specify proxy/whether to ignore SSL certificates. * @returns A Promise that resolves when the model is downloaded. */ - async downloadModel( - model: ModelFile, - gpuSettings?: GpuSetting, - network?: { ignoreSSL?: boolean; proxy?: string } - ): Promise { - // Create corresponding directory - const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id]) - if (!(await fs.existsSync(modelDirPath))) await fs.mkdir(modelDirPath) - const modelJsonPath = - model.file_path ?? (await joinPath([modelDirPath, 'model.json'])) - - // Download HF model - model.json not exist - if (!(await fs.existsSync(modelJsonPath))) { - // It supports only one source for HF download - const metadata = await this.fetchModelMetadata(model.sources[0].url) - const updatedModel = await this.retrieveGGUFMetadata(metadata) - if (updatedModel) { - // Update model settings - model.settings = { - ...model.settings, - ...updatedModel.settings, - } - model.parameters = { - ...model.parameters, - ...updatedModel.parameters, - } - } - await fs.writeFileSync(modelJsonPath, JSON.stringify(model, null, 2)) - events.emit(ModelEvent.OnModelsUpdate, {}) - } - if (model.engine === InferenceEngine.nitro_tensorrt_llm) { - if (!gpuSettings || gpuSettings.gpus.length === 0) { - console.error('No GPU found. Please check your GPU setting.') - return - } - const firstGpu = gpuSettings.gpus[0] - if (!firstGpu.name.toLowerCase().includes('nvidia')) { - console.error('No Nvidia GPU found. Please check your GPU setting.') - return - } - const gpuArch = firstGpu.arch - if (gpuArch === undefined) { - console.error( - 'No GPU architecture found. Please check your GPU setting.' - ) - return - } - - if (!JanModelExtension._supportedGpuArch.includes(gpuArch)) { - console.debug( - `Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.` - ) - return - } - - const os = 'windows' // TODO: remove this hard coded value - - const newSources = model.sources.map((source) => { - const newSource = { ...source } - newSource.url = newSource.url - .replace(//g, os) - .replace(//g, gpuArch) - return newSource - }) - model.sources = newSources - } - - console.debug(`Download sources: ${JSON.stringify(model.sources)}`) - - if (model.sources.length > 1) { - // path to model binaries - for (const source of model.sources) { - let path = extractFileName( - source.url, - JanModelExtension._supportedModelFormat - ) - if (source.filename) { - path = model.file_path - ? await joinPath([await dirName(model.file_path), source.filename]) - : await joinPath([modelDirPath, source.filename]) - } - - const downloadRequest: DownloadRequest = { - url: source.url, - localPath: path, - modelId: model.id, - } - downloadFile(downloadRequest, network) - } - // TODO: handle multiple binaries for web later - } else { - const fileName = extractFileName( - model.sources[0]?.url, - JanModelExtension._supportedModelFormat - ) - const path = model.file_path - ? await joinPath([await dirName(model.file_path), fileName]) - : await joinPath([modelDirPath, fileName]) - const downloadRequest: DownloadRequest = { - url: model.sources[0]?.url, - localPath: path, - modelId: model.id, - } - downloadFile(downloadRequest, network) - - if (window && window.core?.api && window.core.api.baseApiUrl) { - this.startPollingDownloadProgress(model.id) - } - } - } - - private toHuggingFaceUrl(repoId: string): string { - try { - const url = new URL(repoId) - if (url.host !== 'huggingface.co') { - throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`) - } - - const paths = url.pathname.split('/').filter((e) => e.trim().length > 0) - if (paths.length < 2) { - throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`) - } - - return `${url.origin}/api/models/${paths[0]}/${paths[1]}` - } catch (err) { - if (err instanceof InvalidHostError) { - throw err - } - - if (repoId.startsWith('https')) { - throw new Error(`Cannot parse url: ${repoId}`) - } - - return `https://huggingface.co/api/models/${repoId}` - } - } - - async fetchHuggingFaceRepoData(repoId: string): Promise { - const sanitizedUrl = this.toHuggingFaceUrl(repoId) - console.debug('sanitizedUrl', sanitizedUrl) - - const huggingFaceAccessToken = ( - await this.getSetting(Settings.huggingFaceAccessToken, '') - ).trim() - - const headers = { - Accept: 'application/json', - } - - if (huggingFaceAccessToken.length > 0) { - headers['Authorization'] = `Bearer ${huggingFaceAccessToken}` - } - - const res = await fetch(sanitizedUrl, { - headers: headers, - }) - const response = await res.json() - if (response['error'] != null) { - throw new Error(response['error']) - } - - const data = response as HuggingFaceRepoData - - if (data.tags.indexOf('gguf') === -1) { - throw new NotSupportedModelError( - `${repoId} is not supported. Only GGUF models are supported.` - ) - } - - const promises: Promise[] = [] - - // fetching file sizes - const url = new URL(sanitizedUrl) - const paths = url.pathname.split('/').filter((e) => e.trim().length > 0) - - for (const sibling of data.siblings) { - const downloadUrl = `https://huggingface.co/${paths[2]}/${paths[3]}/resolve/main/${sibling.rfilename}` - sibling.downloadUrl = downloadUrl - promises.push(getFileSize(downloadUrl)) - } - - const result = await Promise.all(promises) - for (let i = 0; i < data.siblings.length; i++) { - data.siblings[i].fileSize = result[i] - } - - AllQuantizations.forEach((quantization) => { - data.siblings.forEach((sibling) => { - if (!sibling.quantization && sibling.rfilename.includes(quantization)) { - sibling.quantization = quantization - } - }) - }) - - data.modelUrl = `https://huggingface.co/${paths[2]}/${paths[3]}` - return data - } - - async fetchModelMetadata(url: string): Promise { - const { metadata } = await gguf(url) - return metadata - } - - /** - * Specifically for Jan server. - */ - private async startPollingDownloadProgress(modelId: string): Promise { - // wait for some seconds before polling - await new Promise((resolve) => setTimeout(resolve, 3000)) - - return new Promise((resolve) => { - const interval = setInterval(async () => { - fetch( - `${window.core.api.baseApiUrl}/v1/download/${DownloadRoute.getDownloadProgress}/${modelId}`, - { - method: 'GET', - headers: { contentType: 'application/json' }, - } - ).then(async (res) => { - const state: DownloadState = await res.json() - if (state.downloadState === 'end') { - events.emit(DownloadEvent.onFileDownloadSuccess, state) - clearInterval(interval) - resolve() - return - } - - if (state.downloadState === 'error') { - events.emit(DownloadEvent.onFileDownloadError, state) - clearInterval(interval) - resolve() - return - } - - events.emit(DownloadEvent.onFileDownloadUpdate, state) - }) - }, 1000) - }) + async pullModel(model: string): Promise { + /** + * Sending POST to /models/pull/{id} endpoint to pull the model + */ + return this.cortexAPI?.pullModel(model) } /** * Cancels the download of a specific machine learning model. * - * @param {string} modelId - The ID of the model whose download is to be cancelled. + * @param {string} model - The ID of the model whose download is to be cancelled. * @returns {Promise} A promise that resolves when the download has been cancelled. */ - async cancelModelDownload(modelId: string): Promise { - const path = await joinPath([JanModelExtension._homeDir, modelId, modelId]) - try { - await abortDownload(path) - await fs.unlinkSync(path) - } catch (e) { - console.error(e) - } + async cancelModelPull(model: string): Promise { + /** + * Sending DELETE to /models/pull/{id} endpoint to cancel a model pull + */ + this.cortexAPI?.cancelModelPull(model) } /** - * Deletes a machine learning model. - * @param filePath - The path to the model file to delete. + * Deletes a pulled model + * @param model - The model to delete * @returns A Promise that resolves when the model is deleted. */ - async deleteModel(model: ModelFile): Promise { - try { - const dirPath = await dirName(model.file_path) - const jsonFilePath = await joinPath([ - dirPath, - JanModelExtension._modelMetadataFileName, - ]) - const modelInfo = JSON.parse( - await this.readModelMetadata(jsonFilePath) - ) as Model - - // TODO: This is so tricky? - // Should depend on sources? - const isUserImportModel = - modelInfo.metadata?.author?.toLowerCase() === 'user' - if (isUserImportModel) { - // just delete the folder - return fs.rm(dirPath) - } - - // remove all files under dirPath except model.json - const files = await fs.readdirSync(dirPath) - const deletePromises = files.map(async (fileName: string) => { - if (fileName !== JanModelExtension._modelMetadataFileName) { - return fs.unlinkSync(await joinPath([dirPath, fileName])) - } - }) - await Promise.allSettled(deletePromises) - } catch (err) { - console.error(err) - } + async deleteModel(model: string): Promise { + return this.cortexAPI?.deleteModel(model) } /** - * Gets all downloaded models. + * Gets all pulled models * @returns A Promise that resolves with an array of all models. */ - async getDownloadedModels(): Promise { - return await this.getModelsMetadata( - async (modelDir: string, model: Model) => { - if (!JanModelExtension._offlineInferenceEngine.includes(model.engine)) - return true + async getModels(): Promise { + /** + * In this action, if return empty array right away + * it would reset app cache and app will not function properly + * should compare and try import + */ - // model binaries (sources) are absolute path & exist - const existFiles = await Promise.all( - model.sources.map( - (source) => - // Supposed to be a local file url - !source.url.startsWith(`http://`) && - !source.url.startsWith(`https://`) + if (!localStorage.getItem(ExtensionEnum.downloadedModels)) { + // Updated from an older version than 0.5.5 + // Scan through the models folder and import them (Legacy flow) + // Return models immediately + return this.scanModelsFolder().then((models) => { + return models ?? [] + }) + } + + let currentModels: Model[] = [] + + try { + currentModels = JSON.parse( + localStorage.getItem(ExtensionEnum.downloadedModels) + ) as Model[] + } catch (e) { + currentModels = [] + console.error(e) + } + + /** + * Here we are filtering out the models that are not imported + * and are not using llama.cpp engine + */ + var toImportModels = currentModels.filter( + (e) => e.engine === InferenceEngine.nitro + ) + + await this.cortexAPI?.getModels().then((models) => { + const existingIds = models.map((e) => e.id) + toImportModels = toImportModels.filter( + (e: Model) => !existingIds.includes(e.id) + ) + }) + + console.log('To import models:', toImportModels.length) + /** + * There are models to import + * do not return models from cortex.cpp yet + * otherwise it will reset the app cache + * */ + if (toImportModels.length > 0) { + // Import models + await Promise.all( + toImportModels.map(async (model: Model & { file_path: string }) => + this.importModel( + model.id, + await joinPath([ + await dirName(model.file_path), + model.sources[0]?.filename ?? + model.settings?.llama_model_path ?? + model.sources[0]?.url.split('/').pop() ?? + model.id, + ]) ) ) - if (existFiles.every((exist) => exist)) return true + ) - const result = await fs - .readdirSync(await joinPath([JanModelExtension._homeDir, modelDir])) - .then((files: string[]) => { - // Model binary exists in the directory - // Model binary name can match model ID or be a .gguf file and not be an incompleted model file - return ( - files.includes(modelDir) || - files.filter((file) => { - if ( - file.endsWith(JanModelExtension._incompletedModelFileName) - ) { - return false - } - return ( - file - .toLowerCase() - .includes(JanModelExtension._supportedModelFormat) || - file - .toLowerCase() - .includes(JanModelExtension._tensorRtEngineFormat) - ) - // Check if the number of matched files equals the number of sources - })?.length >= model.sources.length - ) - }) + return currentModels + } - return result - } + /** + * All models are imported successfully before + * just return models from cortex.cpp + */ + return ( + this.cortexAPI?.getModels().then((models) => { + return models + }) ?? Promise.resolve([]) ) } - private async getModelJsonPath( - folderFullPath: string - ): Promise { - // try to find model.json recursively inside each folder - if (!(await fs.existsSync(folderFullPath))) return undefined - - const files: string[] = await fs.readdirSync(folderFullPath) - if (files.length === 0) return undefined - - if (files.includes(JanModelExtension._modelMetadataFileName)) { - return joinPath([ - folderFullPath, - JanModelExtension._modelMetadataFileName, - ]) - } - // continue recursive - for (const file of files) { - const path = await joinPath([folderFullPath, file]) - const fileStats = await fs.fileStat(path) - if (fileStats.isDirectory) { - const result = await this.getModelJsonPath(path) - if (result) return result - } - } + /** + * Update a pulled model metadata + * @param model - The metadata of the model + */ + async updateModel(model: Partial): Promise { + return this.cortexAPI + ?.updateModel(model) + .then(() => this.cortexAPI!.getModel(model.id)) } - private async getModelsMetadata( - selector?: (path: string, model: Model) => Promise - ): Promise { + /** + * Import an existing model file + * @param model + * @param optionType + */ + async importModel(model: string, modelPath: string): Promise { + return this.cortexAPI?.importModel(model, modelPath) + } + + //// LEGACY MODEL FOLDER //// + /** + * Scan through models folder and return downloaded models + * @returns + */ + private async scanModelsFolder(): Promise { try { if (!(await fs.existsSync(JanModelExtension._homeDir))) { console.debug('Model folder not found') @@ -459,10 +201,14 @@ export default class JanModelExtension extends ModelExtension { const files: string[] = await fs.readdirSync(JanModelExtension._homeDir) const allDirectories: string[] = [] - for (const file of files) { - if (file === '.DS_Store') continue - if (file === 'config') continue - allDirectories.push(file) + + for (const modelFolder of files) { + const fullModelFolderPath = await joinPath([ + JanModelExtension._homeDir, + modelFolder, + ]) + if (!(await fs.fileStat(fullModelFolderPath)).isDirectory) continue + allDirectories.push(modelFolder) } const readJsonPromises = allDirectories.map(async (dirName) => { @@ -477,7 +223,7 @@ export default class JanModelExtension extends ModelExtension { if (await fs.existsSync(jsonPath)) { // if we have the model.json file, read it - let model = await this.readModelMetadata(jsonPath) + let model = await fs.readFileSync(jsonPath, 'utf-8') model = typeof model === 'object' ? model : JSON.parse(model) @@ -491,420 +237,89 @@ export default class JanModelExtension extends ModelExtension { ] } model.file_path = jsonPath - model.file_name = JanModelExtension._modelMetadataFileName + model.file_name = 'model.json' - if (selector && !(await selector?.(dirName, model))) { - return - } - return model - } else { - // otherwise, we generate our own model file - // TODO: we might have more than one binary file here. This will be addressed with new version of Model file - // which is the PR from Hiro on branch Jan can see - return this.generateModelMetadata(dirName) + // Check model file exist + // model binaries (sources) are absolute path & exist (symlinked) + const existFiles = await Promise.all( + model.sources.map( + (source) => + // Supposed to be a local file url + !source.url.startsWith(`http://`) && + !source.url.startsWith(`https://`) + ) + ) + if (existFiles.every((exist) => exist)) return true + + const result = await fs + .readdirSync(await joinPath([JanModelExtension._homeDir, dirName])) + .then((files: string[]) => { + // Model binary exists in the directory + // Model binary name can match model ID or be a .gguf file and not be an incompleted model file + return ( + files.includes(dirName) || // Legacy model GGUF without extension + files.filter((file) => { + return ( + file.toLowerCase().endsWith('.gguf') || // GGUF + file.toLowerCase().endsWith('.engine') // Tensort-LLM + ) + })?.length > 0 // TODO: find better way (can use basename to check the file name with source url) + ) + }) + + if (result) return model + else return undefined } }) const results = await Promise.allSettled(readJsonPromises) - const modelData = results.map((result) => { - if (result.status === 'fulfilled' && result.value) { - try { - const model = - typeof result.value === 'object' - ? result.value - : JSON.parse(result.value) - return model as ModelFile - } catch { - console.debug(`Unable to parse model metadata: ${result.value}`) + const modelData = results + .map((result) => { + if (result.status === 'fulfilled' && result.value) { + try { + const model = + typeof result.value === 'object' + ? result.value + : JSON.parse(result.value) + return model as Model + } catch { + console.debug(`Unable to parse model metadata: ${result.value}`) + } } - } - return undefined - }) + return undefined + }) + .filter((e) => !!e) - return modelData.filter((e) => !!e) + return modelData } catch (err) { console.error(err) return [] } } - private readModelMetadata(path: string) { - return fs.readFileSync(path, 'utf-8') - } - /** - * Handle the case where we have the model directory but we don't have the corresponding - * model.json file associated with it. - * - * This function will create a model.json file for the model. - * It works only with single binary file model. - * - * @param dirName the director which reside in ~/jan/models but does not have model.json file. - */ - private async generateModelMetadata(dirName: string): Promise { - const files: string[] = await fs.readdirSync( - await joinPath([JanModelExtension._homeDir, dirName]) - ) - - // sort files by name - files.sort() - - // find the first file which is not a directory - let binaryFileName: string | undefined = undefined - let binaryFileSize: number | undefined = undefined - - for (const file of files) { - if (file.endsWith(JanModelExtension._supportedModelFormat)) { - const path = await joinPath([JanModelExtension._homeDir, dirName, file]) - const fileStats = await fs.fileStat(path) - if (fileStats.isDirectory) continue - binaryFileSize = fileStats.size - binaryFileName = file - break - } - } - - if (!binaryFileName) { - console.warn(`Unable to find binary file for model ${dirName}`) - return - } - - const defaultModel = (await this.getDefaultModel()) as Model - const metadata = await executeOnMain( - NODE, - 'retrieveGGUFMetadata', - await joinPath([ - await getJanDataFolderPath(), - 'models', - dirName, - binaryFileName, - ]) - ).catch(() => undefined) - - const updatedModel = await this.retrieveGGUFMetadata(metadata) - - if (!defaultModel) { - console.error('Unable to find default model') - return - } - - const model: Model = { - ...defaultModel, - // Overwrite default N/A fields - id: dirName, - name: dirName, - sources: [ - { - url: binaryFileName, - filename: binaryFileName, - }, - ], - parameters: { - ...defaultModel.parameters, - ...updatedModel.parameters, - }, - settings: { - ...defaultModel.settings, - ...updatedModel.settings, - llama_model_path: binaryFileName, - }, - created: Date.now(), - description: '', - metadata: { - size: binaryFileSize, - author: 'User', - tags: [], - }, - } - - const modelFilePath = await joinPath([ - JanModelExtension._homeDir, - dirName, - JanModelExtension._modelMetadataFileName, - ]) - - await fs.writeFileSync(modelFilePath, JSON.stringify(model, null, 2)) - - return model - } - - override async getDefaultModel(): Promise { - const defaultModel = DEFAULT_MODEL as Model - return defaultModel - } - - /** - * Gets all available models. - * @returns A Promise that resolves with an array of all models. - */ - async getConfiguredModels(): Promise { - return this.getModelsMetadata() - } - - handleDesktopEvents() { - if (window && window.electronAPI) { - window.electronAPI.onFileDownloadUpdate( - async (_event: string, state: DownloadState | undefined) => { - if (!state) return - state.downloadState = 'downloading' - events.emit(DownloadEvent.onFileDownloadUpdate, state) - } - ) - window.electronAPI.onFileDownloadError( - async (_event: string, state: DownloadState) => { - state.downloadState = 'error' - events.emit(DownloadEvent.onFileDownloadError, state) - } - ) - window.electronAPI.onFileDownloadSuccess( - async (_event: string, state: DownloadState) => { - state.downloadState = 'end' - events.emit(DownloadEvent.onFileDownloadSuccess, state) - } - ) - } - } - - private async importModelSymlink( - modelBinaryPath: string, - modelFolderName: string, - modelFolderPath: string - ): Promise { - const fileStats = await fs.fileStat(modelBinaryPath, true) - const binaryFileSize = fileStats.size - - // Just need to generate model.json there - const defaultModel = (await this.getDefaultModel()) as Model - if (!defaultModel) { - console.error('Unable to find default model') - return - } - - const metadata = await executeOnMain( - NODE, - 'retrieveGGUFMetadata', - modelBinaryPath - ) - - const binaryFileName = await baseName(modelBinaryPath) - const updatedModel = await this.retrieveGGUFMetadata(metadata) - - const model: Model = { - ...defaultModel, - id: modelFolderName, - name: modelFolderName, - sources: [ - { - url: modelBinaryPath, - filename: binaryFileName, - }, - ], - parameters: { - ...defaultModel.parameters, - ...updatedModel.parameters, - }, - - settings: { - ...defaultModel.settings, - ...updatedModel.settings, - llama_model_path: binaryFileName, - }, - created: Date.now(), - description: '', - metadata: { - size: binaryFileSize, - author: 'User', - tags: [], - }, - } - - const modelFilePath = await joinPath([ - modelFolderPath, - JanModelExtension._modelMetadataFileName, - ]) - - await fs.writeFileSync(modelFilePath, JSON.stringify(model, null, 2)) - - return { - ...model, - file_path: modelFilePath, - file_name: JanModelExtension._modelMetadataFileName, - } - } - - async updateModelInfo(modelInfo: Partial): Promise { - if (modelInfo.id == null) throw new Error('Model ID is required') - - const model = JSON.parse( - await this.readModelMetadata(modelInfo.file_path) - ) as ModelFile - - const updatedModel: ModelFile = { - ...model, - ...modelInfo, - parameters: { - ...model.parameters, - ...modelInfo.parameters, - }, - settings: { - ...model.settings, - ...modelInfo.settings, - }, - metadata: { - ...model.metadata, - ...modelInfo.metadata, - }, - // Should not persist file_path & file_name - file_path: undefined, - file_name: undefined, - } - - await fs.writeFileSync( - modelInfo.file_path, - JSON.stringify(updatedModel, null, 2) - ) - return updatedModel - } - - private async importModel( - model: ImportingModel, - optionType: OptionType - ): Promise { - const binaryName = (await baseName(model.path)).replace(/\s/g, '') - - let modelFolderName = binaryName - if (binaryName.endsWith(JanModelExtension._supportedModelFormat)) { - modelFolderName = binaryName.replace( - JanModelExtension._supportedModelFormat, - '' - ) - } - - const modelFolderPath = await this.getModelFolderName(modelFolderName) - await fs.mkdir(modelFolderPath) - - const uniqueFolderName = await baseName(modelFolderPath) - const modelBinaryFile = binaryName.endsWith( - JanModelExtension._supportedModelFormat - ) - ? binaryName - : `${binaryName}${JanModelExtension._supportedModelFormat}` - - const binaryPath = await joinPath([modelFolderPath, modelBinaryFile]) - - if (optionType === 'SYMLINK') { - return this.importModelSymlink( - model.path, - uniqueFolderName, - modelFolderPath - ) - } - - const srcStat = await fs.fileStat(model.path, true) - - // interval getting the file size to calculate the percentage - const interval = setInterval(async () => { - const destStats = await fs.fileStat(binaryPath, true) - const percentage = destStats.size / srcStat.size - events.emit(LocalImportModelEvent.onLocalImportModelUpdate, { - ...model, - percentage, - }) - }, 1000) - - await fs.copyFile(model.path, binaryPath) - - clearInterval(interval) - - // generate model json - return this.generateModelMetadata(uniqueFolderName) - } - - private async getModelFolderName( - modelFolderName: string, - count?: number - ): Promise { - const newModelFolderName = count - ? `${modelFolderName}-${count}` - : modelFolderName - - const janDataFolderPath = await getJanDataFolderPath() - const modelFolderPath = await joinPath([ - janDataFolderPath, - 'models', - newModelFolderName, - ]) - - const isFolderExist = await fs.existsSync(modelFolderPath) - if (!isFolderExist) { - return modelFolderPath - } else { - const newCount = (count ?? 0) + 1 - return this.getModelFolderName(modelFolderName, newCount) - } - } - - async importModels( - models: ImportingModel[], - optionType: OptionType - ): Promise { - const importedModels: Model[] = [] - - for (const model of models) { - events.emit(LocalImportModelEvent.onLocalImportModelUpdate, model) - try { - const importedModel = await this.importModel(model, optionType) - events.emit(LocalImportModelEvent.onLocalImportModelSuccess, { - ...model, - modelId: importedModel.id, - }) - importedModels.push(importedModel) - } catch (err) { - events.emit(LocalImportModelEvent.onLocalImportModelFailed, { - ...model, - error: err, - }) - } - } - - events.emit( - LocalImportModelEvent.onLocalImportModelFinished, - importedModels - ) - } - - /** - * Retrieve Model Settings from GGUF Metadata - * @param metadata + * Retrieve the model.json path from a folder + * @param folderFullPath * @returns */ - async retrieveGGUFMetadata(metadata: any): Promise> { - const defaultModel = DEFAULT_MODEL as Model - var template = await executeOnMain( - NODE, - 'renderJinjaTemplate', - metadata - ).catch(() => undefined) - - const eos_id = metadata['tokenizer.ggml.eos_token_id'] - const architecture = metadata['general.architecture'] - - return { - settings: { - prompt_template: template ?? defaultModel.settings.prompt_template, - ctx_len: - metadata[`${architecture}.context_length`] ?? - metadata['llama.context_length'] ?? - 4096, - ngl: - (metadata[`${architecture}.block_count`] ?? - metadata['llama.block_count'] ?? - 32) + 1, - }, - parameters: { - stop: eos_id - ? [metadata?.['tokenizer.ggml.tokens'][eos_id] ?? ''] - : defaultModel.parameters.stop, - }, + private async getModelJsonPath( + folderFullPath: string + ): Promise { + // try to find model.json recursively inside each folder + if (!(await fs.existsSync(folderFullPath))) return undefined + const files: string[] = await fs.readdirSync(folderFullPath) + if (files.length === 0) return undefined + if (files.includes('model.json')) { + return joinPath([folderFullPath, 'model.json']) + } + // continue recursive + for (const file of files) { + const path = await joinPath([folderFullPath, file]) + const fileStats = await fs.fileStat(path) + if (fileStats.isDirectory) { + const result = await this.getModelJsonPath(path) + if (result) return result + } } } + //// END LEGACY MODEL FOLDER //// } diff --git a/extensions/model-extension/src/node/index.ts b/extensions/model-extension/src/node/index.ts deleted file mode 100644 index 2acf6ec4a..000000000 --- a/extensions/model-extension/src/node/index.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { closeSync, openSync, readSync } from 'fs' -import { Template } from '@huggingface/jinja' -/** - * This is to retrieve the metadata from a GGUF file - * It uses hyllama and jinja from @huggingface module - */ -export const retrieveGGUFMetadata = async (ggufPath: string) => { - try { - const { ggufMetadata } = await import('hyllama') - // Read first 10mb of gguf file - const fd = openSync(ggufPath, 'r') - const buffer = new Uint8Array(10_000_000) - readSync(fd, buffer, 0, 10_000_000, 0) - closeSync(fd) - - // Parse metadata and tensor info - const { metadata } = ggufMetadata(buffer.buffer) - - return metadata - } catch (e) { - console.log('[MODEL_EXT]', e) - } -} - -/** - * Convert metadata to jinja template - * @param metadata - */ -export const renderJinjaTemplate = (metadata: any): string => { - const template = new Template(metadata['tokenizer.chat_template']) - const eos_id = metadata['tokenizer.ggml.eos_token_id'] - const bos_id = metadata['tokenizer.ggml.bos_token_id'] - if (eos_id === undefined || bos_id === undefined) { - return '' - } - const eos_token = metadata['tokenizer.ggml.tokens'][eos_id] - const bos_token = metadata['tokenizer.ggml.tokens'][bos_id] - // Parse jinja template - return template.render({ - add_generation_prompt: true, - eos_token, - bos_token, - messages: [ - { - role: 'system', - content: '{system_message}', - }, - { - role: 'user', - content: '{prompt}', - }, - ], - }) -} diff --git a/extensions/model-extension/src/node/node.test.ts b/extensions/model-extension/src/node/node.test.ts deleted file mode 100644 index afd2b8470..000000000 --- a/extensions/model-extension/src/node/node.test.ts +++ /dev/null @@ -1,53 +0,0 @@ -import { renderJinjaTemplate } from './index' -import { Template } from '@huggingface/jinja' - -jest.mock('@huggingface/jinja', () => ({ - Template: jest.fn((template: string) => ({ - render: jest.fn(() => `${template}_rendered`), - })), -})) - -describe('renderJinjaTemplate', () => { - beforeEach(() => { - jest.clearAllMocks() // Clear mocks between tests - }) - - it('should render the template with correct parameters', () => { - const metadata = { - 'tokenizer.chat_template': 'Hello, {{ messages }}!', - 'tokenizer.ggml.eos_token_id': 0, - 'tokenizer.ggml.bos_token_id': 1, - 'tokenizer.ggml.tokens': ['EOS', 'BOS'], - } - - const renderedTemplate = renderJinjaTemplate(metadata) - - expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!') - - expect(renderedTemplate).toBe('Hello, {{ messages }}!_rendered') - }) - - it('should handle missing token IDs gracefully', () => { - const metadata = { - 'tokenizer.chat_template': 'Hello, {{ messages }}!', - 'tokenizer.ggml.eos_token_id': 0, - 'tokenizer.ggml.tokens': ['EOS'], - } - - const renderedTemplate = renderJinjaTemplate(metadata) - - expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!') - - expect(renderedTemplate).toBe('') - }) - - it('should handle empty template gracefully', () => { - const metadata = {} - - const renderedTemplate = renderJinjaTemplate(metadata) - - expect(Template).toHaveBeenCalledWith(undefined) - - expect(renderedTemplate).toBe("") - }) -}) diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts index 11c86a9a7..d9c89242f 100644 --- a/extensions/tensorrt-llm-extension/src/index.ts +++ b/extensions/tensorrt-llm-extension/src/index.ts @@ -7,9 +7,7 @@ import { DownloadEvent, DownloadRequest, DownloadState, - GpuSetting, InstallationState, - Model, baseName, downloadFile, events, @@ -23,7 +21,7 @@ import { ModelEvent, getJanDataFolderPath, SystemInformation, - ModelFile, + Model, } from '@janhq/core' /** @@ -137,7 +135,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { events.emit(ModelEvent.OnModelsUpdate, {}) } - override async loadModel(model: ModelFile): Promise { + override async loadModel(model: Model): Promise { if ((await this.installationState()) === 'Installed') return super.loadModel(model) @@ -177,7 +175,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine { override async inference(data: MessageRequest) { if (!this.loadedModel) return // TensorRT LLM Extension supports streaming only - if (data.model) data.model.parameters.stream = true + if (data.model && data.model.parameters) data.model.parameters.stream = true super.inference(data) } diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts index 77003389f..d02427170 100644 --- a/extensions/tensorrt-llm-extension/src/node/index.ts +++ b/extensions/tensorrt-llm-extension/src/node/index.ts @@ -41,7 +41,7 @@ async function loadModel( // e.g. ~/jan/models/llama-2 let modelFolder = params.modelFolder - if (params.model.settings.prompt_template) { + if (params.model.settings?.prompt_template) { const promptTemplate = params.model.settings.prompt_template const prompt = promptTemplateConverter(promptTemplate) if (prompt?.error) { diff --git a/web/containers/Layout/BottomPanel/DownloadingState/index.tsx b/web/containers/Layout/BottomPanel/DownloadingState/index.tsx index ddc2eab91..8eb16f549 100644 --- a/web/containers/Layout/BottomPanel/DownloadingState/index.tsx +++ b/web/containers/Layout/BottomPanel/DownloadingState/index.tsx @@ -9,11 +9,8 @@ import { modelDownloadStateAtom } from '@/hooks/useDownloadState' import { formatDownloadPercentage } from '@/utils/converter' -import { getDownloadingModelAtom } from '@/helpers/atoms/Model.atom' - export default function DownloadingState() { const downloadStates = useAtomValue(modelDownloadStateAtom) - const downloadingModels = useAtomValue(getDownloadingModelAtom) const { abortModelDownload } = useDownloadModel() const totalCurrentProgress = Object.values(downloadStates) @@ -76,10 +73,7 @@ export default function DownloadingState() { theme="destructive" onClick={() => { if (item?.modelId) { - const model = downloadingModels.find( - (model) => model.id === item.modelId - ) - if (model) abortModelDownload(model) + abortModelDownload(item?.modelId) } }} > diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx index 6ff6c894a..00d528f99 100644 --- a/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx +++ b/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx @@ -30,8 +30,8 @@ const TableActiveModel = () => { - {activeModel.metadata.size - ? toGibibytes(activeModel.metadata.size) + {activeModel.metadata?.size + ? toGibibytes(activeModel.metadata?.size) : '-'} diff --git a/web/containers/ModalCancelDownload/index.tsx b/web/containers/ModalCancelDownload/index.tsx index e8d3842a8..fdc583911 100644 --- a/web/containers/ModalCancelDownload/index.tsx +++ b/web/containers/ModalCancelDownload/index.tsx @@ -30,7 +30,7 @@ const ModalCancelDownload = ({ model, isFromList }: Props) => { const onAbortDownloadClick = useCallback(() => { if (downloadState?.modelId) { const model = downloadingModels.find( - (model) => model.id === downloadState.modelId + (model) => model === downloadState.modelId ) if (model) abortModelDownload(model) } diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx index 59f19586a..7415f1165 100644 --- a/web/containers/ModelDropdown/index.tsx +++ b/web/containers/ModelDropdown/index.tsx @@ -88,7 +88,7 @@ const ModelDropdown = ({ const searchInputRef = useRef(null) const configuredModels = useAtomValue(configuredModelsAtom) const featuredModel = configuredModels.filter((x) => - x.metadata.tags.includes('Featured') + x.metadata?.tags?.includes('Featured') ) const { updateThreadMetadata } = useCreateNewThread() @@ -200,7 +200,7 @@ const ModelDropdown = ({ if (model) updateModelParameter(activeThread, { params: modelParams, - modelPath: model.file_path, + // modelPath: model.file_path, modelId: model.id, engine: model.engine, }) @@ -444,7 +444,7 @@ const ModelDropdown = ({
    {featuredModel.map((model) => { const isDownloading = downloadingModels.some( - (md) => md.id === model.id + (md) => md === model.id ) return (
  • - {toGibibytes(model.metadata.size)} + {toGibibytes(model.metadata?.size)} {!isDownloading ? ( downloadModel(model)} + onClick={() => + downloadModel(model.sources[0].url) + } /> ) : ( Object.values(downloadStates) @@ -511,7 +513,7 @@ const ModelDropdown = ({ .map((model) => { if (!showModel) return null const isDownloading = downloadingModels.some( - (md) => md.id === model.id + (md) => md === model.id ) const isDownloaded = downloadedModels.some( (c) => c.id === model.id @@ -549,14 +551,16 @@ const ModelDropdown = ({
    {!isDownloaded && ( - {toGibibytes(model.metadata.size)} + {toGibibytes(model.metadata?.size)} )} {!isDownloading && !isDownloaded ? ( downloadModel(model)} + onClick={() => + downloadModel(model.sources[0].url) + } /> ) : ( Object.values(downloadStates) diff --git a/web/containers/ModelLabel/index.tsx b/web/containers/ModelLabel/index.tsx index b0a3da96f..a6237ada6 100644 --- a/web/containers/ModelLabel/index.tsx +++ b/web/containers/ModelLabel/index.tsx @@ -42,7 +42,7 @@ const ModelLabel = ({ metadata, compact }: Props) => { const availableRam = settings?.run_mode === 'gpu' ? availableVram * 1000000 // MB to bytes - : totalRam - usedRam + (activeModel?.metadata.size ?? 0) + : totalRam - usedRam + (activeModel?.metadata?.size ?? 0) if (minimumRamModel > totalRam) { return ( { return null } - return metadata.tags.includes('Coming Soon') ? ( + return metadata?.tags?.includes('Coming Soon') ? ( ) : ( - getLabel(metadata.size ?? 0) + getLabel(metadata?.size ?? 0) ) } diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx index b35ab2e43..608160555 100644 --- a/web/containers/Providers/EventListener.tsx +++ b/web/containers/Providers/EventListener.tsx @@ -52,6 +52,21 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => { if (state.downloadType === 'extension') { removeInstallingExtension(state.extensionId!) } else { + state.downloadState = 'error' + setDownloadState(state) + } + }, + [setDownloadState, removeInstallingExtension] + ) + + const onFileDownloadStopped = useCallback( + (state: DownloadState) => { + console.debug('onFileDownloadError', state) + if (state.downloadType === 'extension') { + removeInstallingExtension(state.extensionId!) + } else { + state.downloadState = 'error' + state.error = 'aborted' setDownloadState(state) } }, @@ -62,6 +77,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => { (state: DownloadState) => { console.debug('onFileDownloadSuccess', state) if (state.downloadType !== 'extension') { + state.downloadState = 'end' setDownloadState(state) } events.emit(ModelEvent.OnModelsUpdate, {}) @@ -87,6 +103,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => { events.on(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate) events.on(DownloadEvent.onFileDownloadError, onFileDownloadError) events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess) + events.on(DownloadEvent.onFileDownloadStopped, onFileDownloadStopped) events.on(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess) return () => { diff --git a/web/extension/ExtensionManager.ts b/web/extension/ExtensionManager.ts index aa1a7674b..811126f85 100644 --- a/web/extension/ExtensionManager.ts +++ b/web/extension/ExtensionManager.ts @@ -8,6 +8,7 @@ import Extension from './Extension' * Manages the registration and retrieval of extensions. */ export class ExtensionManager { + date = new Date().toISOString() // Registered extensions private extensions = new Map() diff --git a/web/helpers/atoms/Model.atom.ts b/web/helpers/atoms/Model.atom.ts index 6abc42c9e..0f5367f64 100644 --- a/web/helpers/atoms/Model.atom.ts +++ b/web/helpers/atoms/Model.atom.ts @@ -1,4 +1,4 @@ -import { ImportingModel, InferenceEngine, Model, ModelFile } from '@janhq/core' +import { ImportingModel, InferenceEngine, Model } from '@janhq/core' import { atom } from 'jotai' import { atomWithStorage } from 'jotai/utils' @@ -14,7 +14,7 @@ enum ModelStorageAtomKeys { * Downloaded Models Atom * This atom stores the list of models that have been downloaded. */ -export const downloadedModelsAtom = atomWithStorage( +export const downloadedModelsAtom = atomWithStorage( ModelStorageAtomKeys.DownloadedModels, [] ) @@ -23,7 +23,7 @@ export const downloadedModelsAtom = atomWithStorage( * Configured Models Atom * This atom stores the list of models that have been configured and available to download */ -export const configuredModelsAtom = atomWithStorage( +export const configuredModelsAtom = atomWithStorage( ModelStorageAtomKeys.AvailableModels, [] ) @@ -43,12 +43,18 @@ export const removeDownloadedModelAtom = atom( /** * Atom to store the selected model (from ModelDropdown) */ -export const selectedModelAtom = atom(undefined) +export const selectedModelAtom = atom(undefined) /** * Atom to store the expanded engine sections (from ModelDropdown) */ -export const showEngineListModelAtom = atom([InferenceEngine.nitro]) +export const showEngineListModelAtom = atom([ + InferenceEngine.nitro, + InferenceEngine.cortex, + InferenceEngine.cortex_llamacpp, + InferenceEngine.cortex_onnx, + InferenceEngine.cortex_tensorrtllm, +]) /// End Models Atom /// Model Download Atom @@ -58,13 +64,13 @@ export const stateModel = atom({ state: 'start', loading: false, model: '' }) /** * Stores the list of models which are being downloaded. */ -const downloadingModelsAtom = atom([]) +const downloadingModelsAtom = atom([]) export const getDownloadingModelAtom = atom((get) => get(downloadingModelsAtom)) -export const addDownloadingModelAtom = atom(null, (get, set, model: Model) => { +export const addDownloadingModelAtom = atom(null, (get, set, model: string) => { const downloadingModels = get(downloadingModelsAtom) - if (!downloadingModels.find((e) => e.id === model.id)) { + if (!downloadingModels.find((e) => e === model)) { set(downloadingModelsAtom, [...downloadingModels, model]) } }) @@ -76,7 +82,7 @@ export const removeDownloadingModelAtom = atom( set( downloadingModelsAtom, - downloadingModels.filter((e) => e.id !== modelId) + downloadingModels.filter((e) => e !== modelId) ) } ) @@ -88,10 +94,6 @@ export const removeDownloadingModelAtom = atom( // store the paths of the models that are being imported export const importingModelsAtom = atom([]) -// DEPRECATED: Remove when moving to cortex.cpp -// Default model template when importing -export const defaultModelAtom = atom(undefined) - /** * Importing progress Atom */ diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts index 2d53678c3..8dd71fcc5 100644 --- a/web/hooks/useActiveModel.ts +++ b/web/hooks/useActiveModel.ts @@ -1,6 +1,6 @@ import { useCallback, useEffect, useRef } from 'react' -import { EngineManager, Model, ModelFile } from '@janhq/core' +import { EngineManager, Model } from '@janhq/core' import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai' import { toaster } from '@/containers/Toast' @@ -11,7 +11,7 @@ import { vulkanEnabledAtom } from '@/helpers/atoms/AppConfig.atom' import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom' import { activeThreadAtom } from '@/helpers/atoms/Thread.atom' -export const activeModelAtom = atom(undefined) +export const activeModelAtom = atom(undefined) export const loadModelErrorAtom = atom(undefined) type ModelState = { @@ -37,7 +37,7 @@ export function useActiveModel() { const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom) const isVulkanEnabled = useAtomValue(vulkanEnabledAtom) - const downloadedModelsRef = useRef([]) + const downloadedModelsRef = useRef([]) useEffect(() => { downloadedModelsRef.current = downloadedModels @@ -55,11 +55,6 @@ export function useActiveModel() { let model = downloadedModelsRef?.current.find((e) => e.id === modelId) - const error = await stopModel().catch((error: Error) => error) - if (error) { - return Promise.reject(error) - } - setLoadModelError(undefined) setActiveModel(undefined) @@ -144,7 +139,7 @@ export function useActiveModel() { const engine = EngineManager.instance().get(stoppingModel.engine) return engine ?.unloadModel(stoppingModel) - .catch() + .catch((e) => console.error(e)) .then(() => { setActiveModel(undefined) setStateModel({ state: 'start', loading: false, model: undefined }) diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts index e65353753..75aa99c27 100644 --- a/web/hooks/useCreateNewThread.ts +++ b/web/hooks/useCreateNewThread.ts @@ -8,7 +8,7 @@ import { ThreadAssistantInfo, ThreadState, AssistantTool, - ModelFile, + Model, } from '@janhq/core' import { atom, useAtomValue, useSetAtom } from 'jotai' @@ -76,7 +76,7 @@ export const useCreateNewThread = () => { const requestCreateNewThread = async ( assistant: Assistant, - model?: ModelFile | undefined + model?: Model | undefined ) => { // Stop generating if any setIsGeneratingResponse(false) diff --git a/web/hooks/useDeleteModel.test.ts b/web/hooks/useDeleteModel.test.ts index 336a1cd0c..3a6587d7b 100644 --- a/web/hooks/useDeleteModel.test.ts +++ b/web/hooks/useDeleteModel.test.ts @@ -16,7 +16,7 @@ describe('useDeleteModel', () => { const mockModel: any = { id: 'test-model', name: 'Test Model', - // Add other required properties of ModelFile + // Add other required properties of Model } const mockDeleteModel = jest.fn() diff --git a/web/hooks/useDeleteModel.ts b/web/hooks/useDeleteModel.ts index 5a7a319b2..5621a78b8 100644 --- a/web/hooks/useDeleteModel.ts +++ b/web/hooks/useDeleteModel.ts @@ -1,6 +1,6 @@ import { useCallback } from 'react' -import { ExtensionTypeEnum, ModelExtension, ModelFile } from '@janhq/core' +import { ExtensionTypeEnum, ModelExtension, Model } from '@janhq/core' import { useSetAtom } from 'jotai' @@ -13,8 +13,8 @@ export default function useDeleteModel() { const removeDownloadedModel = useSetAtom(removeDownloadedModelAtom) const deleteModel = useCallback( - async (model: ModelFile) => { - await localDeleteModel(model) + async (model: Model) => { + await localDeleteModel(model.id) removeDownloadedModel(model.id) toaster({ title: 'Model Deletion Successful', @@ -28,7 +28,7 @@ export default function useDeleteModel() { return { deleteModel } } -const localDeleteModel = async (model: ModelFile) => +const localDeleteModel = async (model: string) => extensionManager .get(ExtensionTypeEnum.Model) ?.deleteModel(model) diff --git a/web/hooks/useDownloadModel.ts b/web/hooks/useDownloadModel.ts index 0cd21ea83..82ce593e2 100644 --- a/web/hooks/useDownloadModel.ts +++ b/web/hooks/useDownloadModel.ts @@ -1,106 +1,47 @@ import { useCallback } from 'react' import { - Model, + events, ExtensionTypeEnum, + ModelEvent, ModelExtension, - abortDownload, - joinPath, - ModelArtifact, - DownloadState, - GpuSetting, - ModelFile, - dirName, } from '@janhq/core' -import { useAtomValue, useSetAtom } from 'jotai' +import { useSetAtom } from 'jotai' -import { setDownloadStateAtom } from './useDownloadState' - -import useGpuSetting from './useGpuSetting' +import { toaster } from '@/containers/Toast' import { extensionManager } from '@/extension/ExtensionManager' + import { - ignoreSslAtom, - proxyAtom, - proxyEnabledAtom, -} from '@/helpers/atoms/AppConfig.atom' -import { addDownloadingModelAtom } from '@/helpers/atoms/Model.atom' + addDownloadingModelAtom, + removeDownloadingModelAtom, +} from '@/helpers/atoms/Model.atom' export default function useDownloadModel() { - const ignoreSSL = useAtomValue(ignoreSslAtom) - const proxy = useAtomValue(proxyAtom) - const proxyEnabled = useAtomValue(proxyEnabledAtom) - const setDownloadState = useSetAtom(setDownloadStateAtom) const addDownloadingModel = useSetAtom(addDownloadingModelAtom) - - const { getGpuSettings } = useGpuSetting() + const removeDownloadingModel = useSetAtom(removeDownloadingModelAtom) const downloadModel = useCallback( - async (model: Model) => { - const childProgresses: DownloadState[] = model.sources.map( - (source: ModelArtifact) => ({ - fileName: source.filename, - modelId: model.id, - time: { - elapsed: 0, - remaining: 0, - }, - speed: 0, - percent: 0, - size: { - total: 0, - transferred: 0, - }, - downloadState: 'downloading', - }) - ) - - // set an initial download state - setDownloadState({ - fileName: '', - modelId: model.id, - time: { - elapsed: 0, - remaining: 0, - }, - speed: 0, - percent: 0, - size: { - total: 0, - transferred: 0, - }, - children: childProgresses, - downloadState: 'downloading', - }) - + async (model: string) => { addDownloadingModel(model) - const gpuSettings = await getGpuSettings() - await localDownloadModel( - model, - ignoreSSL, - proxyEnabled ? proxy : '', - gpuSettings - ) + localDownloadModel(model).catch((error) => { + if (error.message) { + toaster({ + title: 'Download failed', + description: error.message, + type: 'error', + }) + } + + removeDownloadingModel(model) + }) }, - [ - ignoreSSL, - proxy, - proxyEnabled, - getGpuSettings, - addDownloadingModel, - setDownloadState, - ] + [addDownloadingModel] ) - const abortModelDownload = useCallback(async (model: Model | ModelFile) => { - for (const source of model.sources) { - const path = - 'file_path' in model - ? await joinPath([await dirName(model.file_path), source.filename]) - : await joinPath(['models', model.id, source.filename]) - await abortDownload(path) - } + const abortModelDownload = useCallback(async (model: string) => { + await cancelModelDownload(model) }, []) return { @@ -109,12 +50,12 @@ export default function useDownloadModel() { } } -const localDownloadModel = async ( - model: Model, - ignoreSSL: boolean, - proxy: string, - gpuSettings?: GpuSetting -) => +const localDownloadModel = async (model: string) => extensionManager .get(ExtensionTypeEnum.Model) - ?.downloadModel(model, gpuSettings, { ignoreSSL, proxy }) + ?.pullModel(model) + +const cancelModelDownload = async (model: string) => + extensionManager + .get(ExtensionTypeEnum.Model) + ?.cancelModelPull(model) diff --git a/web/hooks/useDownloadState.ts b/web/hooks/useDownloadState.ts index 03a8883cb..59267749e 100644 --- a/web/hooks/useDownloadState.ts +++ b/web/hooks/useDownloadState.ts @@ -77,7 +77,7 @@ export const setDownloadStateAtom = atom( } } else { // download in progress - if (state.size.total === 0) { + if (state.size.total === 0 || !currentState[state.modelId]) { // this is initial state, just set the state currentState[state.modelId] = state set(modelDownloadStateAtom, currentState) diff --git a/web/hooks/useGetHFRepoData.ts b/web/hooks/useGetHFRepoData.ts index 3dab2c72e..4e3308116 100644 --- a/web/hooks/useGetHFRepoData.ts +++ b/web/hooks/useGetHFRepoData.ts @@ -1,12 +1,6 @@ import { useCallback, useState } from 'react' -import { - ExtensionTypeEnum, - HuggingFaceRepoData, - ModelExtension, -} from '@janhq/core' - -import { extensionManager } from '@/extension' +import { HuggingFaceRepoData } from '@janhq/core' export const useGetHFRepoData = () => { const [error, setError] = useState(undefined) @@ -35,7 +29,8 @@ export const useGetHFRepoData = () => { const extensionGetHfRepoData = async ( repoId: string ): Promise => { - return extensionManager - .get(ExtensionTypeEnum.Model) - ?.fetchHuggingFaceRepoData(repoId) + return Promise.resolve(undefined) + // return extensionManager + // .get(ExtensionTypeEnum.Model) + // ?.fetchHuggingFaceRepoData(repoId) } diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts index effc64f86..df6b085ca 100644 --- a/web/hooks/useImportModel.ts +++ b/web/hooks/useImportModel.ts @@ -104,16 +104,22 @@ const useImportModel = () => { const localImportModels = async ( models: ImportingModel[], optionType: OptionType -): Promise => - extensionManager - .get(ExtensionTypeEnum.Model) - ?.importModels(models, optionType) +): Promise => { + await models + .filter((e) => !!e.modelId) + .map((model) => { + if (model.modelId) + extensionManager + .get(ExtensionTypeEnum.Model) + ?.importModel(model.modelId, model.path) + }) +} const localUpdateModelInfo = async ( modelInfo: Partial ): Promise => extensionManager .get(ExtensionTypeEnum.Model) - ?.updateModelInfo(modelInfo) + ?.updateModel(modelInfo) export default useImportModel diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts index 58def79c6..1cbd970d6 100644 --- a/web/hooks/useModels.ts +++ b/web/hooks/useModels.ts @@ -5,8 +5,8 @@ import { Model, ModelEvent, ModelExtension, - ModelFile, events, + ModelManager, } from '@janhq/core' import { useSetAtom } from 'jotai' @@ -14,7 +14,6 @@ import { useSetAtom } from 'jotai' import { extensionManager } from '@/extension' import { configuredModelsAtom, - defaultModelAtom, downloadedModelsAtom, } from '@/helpers/atoms/Model.atom' @@ -25,32 +24,22 @@ import { */ const useModels = () => { const setDownloadedModels = useSetAtom(downloadedModelsAtom) - const setConfiguredModels = useSetAtom(configuredModelsAtom) - const setDefaultModel = useSetAtom(defaultModelAtom) + const setExtensionModels = useSetAtom(configuredModelsAtom) const getData = useCallback(() => { const getDownloadedModels = async () => { - const models = await getLocalDownloadedModels() + const models = await getModels() setDownloadedModels(models) } - const getConfiguredModels = async () => { - const models = await getLocalConfiguredModels() - setConfiguredModels(models) - } - - const getDefaultModel = async () => { - const defaultModel = await getLocalDefaultModel() - setDefaultModel(defaultModel) + const getExtensionModels = async () => { + const models = ModelManager.instance().models.values().toArray() + setExtensionModels(models) } // Fetch all data - Promise.all([ - getDownloadedModels(), - getConfiguredModels(), - getDefaultModel(), - ]) - }, [setDownloadedModels, setConfiguredModels, setDefaultModel]) + Promise.all([getDownloadedModels(), getExtensionModels()]) + }, [setDownloadedModels, setExtensionModels]) useEffect(() => { // Try get data on mount @@ -65,22 +54,8 @@ const useModels = () => { }, [getData]) } -// TODO: Deprecated - Remove when moving to cortex.cpp -const getLocalDefaultModel = async (): Promise => - extensionManager - .get(ExtensionTypeEnum.Model) - ?.getDefaultModel() - -// TODO: Deprecated - Remove when moving to cortex.cpp -const getLocalConfiguredModels = async (): Promise => - extensionManager - .get(ExtensionTypeEnum.Model) - ?.getConfiguredModels() ?? [] - -// TODO: Deprecated - Remove when moving to cortex.cpp -const getLocalDownloadedModels = async (): Promise => - extensionManager - .get(ExtensionTypeEnum.Model) - ?.getDownloadedModels() ?? [] +const getModels = async (): Promise => + extensionManager.get(ExtensionTypeEnum.Model)?.getModels() ?? + [] export default useModels diff --git a/web/hooks/useRecommendedModel.ts b/web/hooks/useRecommendedModel.ts index ed56efa55..21a9c69e7 100644 --- a/web/hooks/useRecommendedModel.ts +++ b/web/hooks/useRecommendedModel.ts @@ -1,6 +1,6 @@ import { useCallback, useEffect, useState } from 'react' -import { Model, InferenceEngine, ModelFile } from '@janhq/core' +import { Model, InferenceEngine } from '@janhq/core' import { atom, useAtomValue } from 'jotai' @@ -24,16 +24,12 @@ export const LAST_USED_MODEL_ID = 'last-used-model-id' */ export default function useRecommendedModel() { const activeModel = useAtomValue(activeModelAtom) - const [sortedModels, setSortedModels] = useState([]) - const [recommendedModel, setRecommendedModel] = useState< - ModelFile | undefined - >() + const [sortedModels, setSortedModels] = useState([]) + const [recommendedModel, setRecommendedModel] = useState() const activeThread = useAtomValue(activeThreadAtom) const downloadedModels = useAtomValue(downloadedModelsAtom) - const getAndSortDownloadedModels = useCallback(async (): Promise< - ModelFile[] - > => { + const getAndSortDownloadedModels = useCallback(async (): Promise => { const models = downloadedModels.sort((a, b) => a.engine !== InferenceEngine.nitro && b.engine === InferenceEngine.nitro ? 1 diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts index 1dbd5b45e..bab515a30 100644 --- a/web/hooks/useSendChatMessage.ts +++ b/web/hooks/useSendChatMessage.ts @@ -123,65 +123,27 @@ export default function useSendChatMessage() { } const resendChatMessage = async (currentMessage: ThreadMessage) => { - if (!activeThreadRef.current) { - console.error('No active thread') - return - } - updateThreadWaiting(activeThreadRef.current.id, true) + // Delete last response before regenerating + const newConvoData = currentMessages + let toSendMessage = currentMessage - const requestBuilder = new MessageRequestBuilder( - MessageRequestType.Thread, - activeThreadRef.current.assistants[0].model ?? selectedModelRef.current, - activeThreadRef.current, - currentMessages - ) - .addSystemMessage(activeThreadRef.current.assistants[0]?.instructions) - .removeLastAssistantMessage() + do { + deleteMessage(currentMessage.id) + const msg = newConvoData.pop() + if (!msg) break + toSendMessage = msg + deleteMessage(toSendMessage.id ?? '') + } while (toSendMessage.role !== ChatCompletionRole.User) - const modelId = - selectedModelRef.current?.id ?? - activeThreadRef.current.assistants[0].model.id - - if (modelRef.current?.id !== modelId) { - const error = await startModel(modelId).catch((error: Error) => error) - if (error) { - updateThreadWaiting(activeThreadRef.current.id, false) - return - } + if (activeThreadRef.current) { + await extensionManager + .get(ExtensionTypeEnum.Conversational) + ?.writeMessages(activeThreadRef.current.id, newConvoData) } - setIsGeneratingResponse(true) - - if (currentMessage.role !== ChatCompletionRole.User) { - // Delete last response before regenerating - deleteMessage(currentMessage.id ?? '') - if (activeThreadRef.current) { - await extensionManager - .get(ExtensionTypeEnum.Conversational) - ?.writeMessages( - activeThreadRef.current.id, - currentMessages.filter((msg) => msg.id !== currentMessage.id) - ) - } - } - // Process message request with Assistants tools - const request = await ToolManager.instance().process( - requestBuilder.build(), - activeThreadRef.current.assistants?.flatMap( - (assistant) => assistant.tools ?? [] - ) ?? [] - ) - - request.messages = normalizeMessages(request.messages ?? []) - - const engine = - requestBuilder.model?.engine ?? selectedModelRef.current?.engine ?? '' - - EngineManager.instance().get(engine)?.inference(request) + sendChatMessage(toSendMessage.content[0]?.text.value) } - // Define interface extending Array prototype - const sendChatMessage = async (message: string) => { if (!message || message.trim().length === 0) return @@ -294,6 +256,7 @@ export default function useSendChatMessage() { ) request.messages = normalizeMessages(request.messages ?? []) + console.log(requestBuilder.model?.engine ?? modelRequest.engine, request) // Request for inference EngineManager.instance() .get(requestBuilder.model?.engine ?? modelRequest.engine ?? '') diff --git a/web/screens/Hub/ModelList/ModelHeader/index.tsx b/web/screens/Hub/ModelList/ModelHeader/index.tsx index 44a3fd278..ce5a12957 100644 --- a/web/screens/Hub/ModelList/ModelHeader/index.tsx +++ b/web/screens/Hub/ModelList/ModelHeader/index.tsx @@ -1,6 +1,6 @@ import { useCallback } from 'react' -import { ModelFile } from '@janhq/core' +import { Model } from '@janhq/core' import { Button, Badge, Tooltip } from '@janhq/joi' import { useAtomValue, useSetAtom } from 'jotai' @@ -38,7 +38,7 @@ import { } from '@/helpers/atoms/SystemBar.atom' type Props = { - model: ModelFile + model: Model onClick: () => void open: string } @@ -64,7 +64,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => { const assistants = useAtomValue(assistantsAtom) const onDownloadClick = useCallback(() => { - downloadModel(model) + downloadModel(model.sources[0].url) }, [model, downloadModel]) const isDownloaded = downloadedModels.find((md) => md.id === model.id) != null @@ -81,7 +81,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => { ) - const isDownloading = downloadingModels.some((md) => md.id === model.id) + const isDownloading = downloadingModels.some((md) => md === model.id) const onUseModelClick = useCallback(async () => { if (assistants.length === 0) { @@ -144,7 +144,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
    - {toGibibytes(model.metadata.size)} + {toGibibytes(model.metadata?.size)}
    diff --git a/web/screens/Hub/ModelList/ModelItem/index.tsx b/web/screens/Hub/ModelList/ModelItem/index.tsx index ec9d885a1..a077dbffc 100644 --- a/web/screens/Hub/ModelList/ModelItem/index.tsx +++ b/web/screens/Hub/ModelList/ModelItem/index.tsx @@ -1,6 +1,6 @@ import { useState } from 'react' -import { ModelFile } from '@janhq/core' +import { Model } from '@janhq/core' import { Badge } from '@janhq/joi' import { twMerge } from 'tailwind-merge' @@ -12,7 +12,7 @@ import ModelItemHeader from '@/screens/Hub/ModelList/ModelHeader' import { toGibibytes } from '@/utils/converter' type Props = { - model: ModelFile + model: Model } const ModelItem: React.FC = ({ model }) => { @@ -34,7 +34,7 @@ const ModelItem: React.FC = ({ model }) => {
    - {toGibibytes(model.metadata.size)} + {toGibibytes(model.metadata?.size)}
    @@ -49,9 +49,9 @@ const ModelItem: React.FC = ({ model }) => { Author

    - {model.metadata.author} + {model.metadata?.author}

    @@ -66,7 +66,7 @@ const ModelItem: React.FC = ({ model }) => {
    Tags
    - {model.metadata.tags.map((tag: string) => ( + {model.metadata?.tags?.map((tag: string) => ( {tag} diff --git a/web/screens/Hub/ModelList/index.tsx b/web/screens/Hub/ModelList/index.tsx index 8fc30d541..0d7865a81 100644 --- a/web/screens/Hub/ModelList/index.tsx +++ b/web/screens/Hub/ModelList/index.tsx @@ -1,6 +1,6 @@ import { useMemo } from 'react' -import { ModelFile } from '@janhq/core' +import { Model } from '@janhq/core' import { useAtomValue } from 'jotai' @@ -9,16 +9,16 @@ import ModelItem from '@/screens/Hub/ModelList/ModelItem' import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom' type Props = { - models: ModelFile[] + models: Model[] } const ModelList = ({ models }: Props) => { const downloadedModels = useAtomValue(downloadedModelsAtom) - const sortedModels: ModelFile[] = useMemo(() => { - const featuredModels: ModelFile[] = [] - const remoteModels: ModelFile[] = [] - const localModels: ModelFile[] = [] - const remainingModels: ModelFile[] = [] + const sortedModels: Model[] = useMemo(() => { + const featuredModels: Model[] = [] + const remoteModels: Model[] = [] + const localModels: Model[] = [] + const remainingModels: Model[] = [] models.forEach((m) => { if (m.metadata?.tags?.includes('Featured')) { featuredModels.push(m) @@ -30,9 +30,9 @@ const ModelList = ({ models }: Props) => { remainingModels.push(m) } }) - featuredModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size) - localModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size) - remainingModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size) + featuredModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size) + localModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size) + remainingModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size) remoteModels.sort((m1, m2) => m1.name.localeCompare(m2.name)) return [ ...featuredModels, diff --git a/web/screens/Hub/index.tsx b/web/screens/Hub/index.tsx index 8148a6bb5..382cf5667 100644 --- a/web/screens/Hub/index.tsx +++ b/web/screens/Hub/index.tsx @@ -52,7 +52,7 @@ const HubScreen = () => { } else if (sortSelected === 'featured') { return ( x.name.toLowerCase().includes(searchValue.toLowerCase()) && - x.metadata.tags.includes('Featured') + x.metadata?.tags?.includes('Featured') ) } else { return x.name.toLowerCase().includes(searchValue.toLowerCase()) diff --git a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx index 9c2ff14a5..454905332 100644 --- a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx +++ b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx @@ -1,11 +1,6 @@ -import { useCallback, useMemo } from 'react' +import { useCallback } from 'react' -import { - DownloadState, - HuggingFaceRepoData, - Model, - Quantization, -} from '@janhq/core' +import { DownloadState, HuggingFaceRepoData, Quantization } from '@janhq/core' import { Badge, Button, Progress } from '@janhq/joi' import { useAtomValue, useSetAtom } from 'jotai' @@ -24,10 +19,7 @@ import { mainViewStateAtom } from '@/helpers/atoms/App.atom' import { assistantsAtom } from '@/helpers/atoms/Assistant.atom' import { importHuggingFaceModelStageAtom } from '@/helpers/atoms/HuggingFace.atom' -import { - defaultModelAtom, - downloadedModelsAtom, -} from '@/helpers/atoms/Model.atom' +import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom' type Props = { index: number @@ -39,7 +31,6 @@ type Props = { } const ModelDownloadRow: React.FC = ({ - repoData, downloadUrl, fileName, fileSize = 0, @@ -56,44 +47,18 @@ const ModelDownloadRow: React.FC = ({ const downloadedModel = downloadedModels.find((md) => md.id === fileName) const setHfImportingStage = useSetAtom(importHuggingFaceModelStageAtom) - const defaultModel = useAtomValue(defaultModelAtom) - - const model = useMemo(() => { - if (!defaultModel) { - return undefined - } - - const model: Model = { - ...defaultModel, - sources: [ - { - url: downloadUrl, - filename: fileName, - }, - ], - id: fileName, - name: fileName, - created: Date.now(), - metadata: { - author: 'User', - tags: repoData.tags, - size: fileSize, - }, - } - return model - }, [fileName, fileSize, repoData, downloadUrl, defaultModel]) const onAbortDownloadClick = useCallback(() => { - if (model) { - abortModelDownload(model) + if (downloadUrl) { + abortModelDownload(downloadUrl) } - }, [model, abortModelDownload]) + }, [downloadUrl, abortModelDownload]) const onDownloadClick = useCallback(async () => { - if (model) { - downloadModel(model) + if (downloadUrl) { + downloadModel(downloadUrl) } - }, [model, downloadModel]) + }, [downloadUrl, downloadModel]) const onUseModelClick = useCallback(async () => { if (assistants.length === 0) { @@ -111,7 +76,7 @@ const ModelDownloadRow: React.FC = ({ setHfImportingStage, ]) - if (!model) { + if (!downloadUrl) { return null } @@ -143,7 +108,7 @@ const ModelDownloadRow: React.FC = ({ variant="soft" className="min-w-[98px]" onClick={onUseModelClick} - data-testid={`use-model-btn-${model.id}`} + data-testid={`use-model-btn-${downloadUrl}`} > Use diff --git a/web/screens/Settings/MyModels/MyModelList/index.tsx b/web/screens/Settings/MyModels/MyModelList/index.tsx index 7557e9952..6661ed068 100644 --- a/web/screens/Settings/MyModels/MyModelList/index.tsx +++ b/web/screens/Settings/MyModels/MyModelList/index.tsx @@ -1,6 +1,6 @@ import { memo, useState } from 'react' -import { InferenceEngine, ModelFile } from '@janhq/core' +import { InferenceEngine, Model } from '@janhq/core' import { Badge, Button, Tooltip, useClickOutside } from '@janhq/joi' import { useAtom } from 'jotai' import { @@ -21,7 +21,7 @@ import { isLocalEngine } from '@/utils/modelEngine' import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom' type Props = { - model: ModelFile + model: Model groupTitle?: string } @@ -78,7 +78,7 @@ const MyModelList = ({ model }: Props) => {
    - {toGibibytes(model.metadata.size)} + {toGibibytes(model.metadata?.size)}
    diff --git a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx index 4dab6bfa8..f73efb486 100644 --- a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx +++ b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx @@ -38,20 +38,20 @@ const AssistantSetting: React.FC = ({ componentData }) => { (key === 'chunk_overlap' || key === 'chunk_size') ) { if ( - activeThread.assistants[0].tools[0]?.settings.chunk_size < - activeThread.assistants[0].tools[0]?.settings.chunk_overlap + activeThread.assistants[0].tools[0]?.settings?.chunk_size < + activeThread.assistants[0].tools[0]?.settings?.chunk_overlap ) { activeThread.assistants[0].tools[0].settings.chunk_overlap = activeThread.assistants[0].tools[0].settings.chunk_size } if ( key === 'chunk_size' && - value < activeThread.assistants[0].tools[0].settings.chunk_overlap + value < activeThread.assistants[0].tools[0].settings?.chunk_overlap ) { activeThread.assistants[0].tools[0].settings.chunk_overlap = value } else if ( key === 'chunk_overlap' && - value > activeThread.assistants[0].tools[0].settings.chunk_size + value > activeThread.assistants[0].tools[0].settings?.chunk_size ) { activeThread.assistants[0].tools[0].settings.chunk_size = value } diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx index 0ef9a9ba1..0adc7ddd4 100644 --- a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx +++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx @@ -69,7 +69,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => { return x.id === recommendModel[0] || x.id === recommendModel[1] } else { return ( - x.metadata.tags.includes('Featured') && x.metadata.size < 5000000000 + x.metadata?.tags?.includes('Featured') && x.metadata?.size < 5000000000 ) } }) @@ -143,7 +143,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => { ) : ( filteredModels.map((model) => { const isDownloading = downloadingModels.some( - (md) => md.id === model.id + (md) => md === model.id ) return (
    {
    - {toGibibytes(model.metadata.size)} + {toGibibytes(model.metadata?.size)} {!isDownloading ? ( downloadModel(model)} + onClick={() => + downloadModel(model.sources[0].url) + } /> ) : ( Object.values(downloadStates) @@ -210,7 +212,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => { {featuredModel.slice(0, 2).map((featModel) => { const isDownloading = downloadingModels.some( - (md) => md.id === featModel.id + (md) => md === featModel.id ) return (
    { - {toGibibytes(featModel.metadata.size)} + {toGibibytes(featModel.metadata?.size)}
    )} diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx index afa84b5bf..066c93430 100644 --- a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx +++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx @@ -155,7 +155,7 @@ const ChatInput = () => { fileUpload.length > 0 || (activeThread?.assistants[0].tools && !activeThread?.assistants[0].tools[0]?.enabled && - !activeThread?.assistants[0].model.settings.vision_model) + !activeThread?.assistants[0].model.settings?.vision_model) ) { e.stopPropagation() } else { @@ -180,7 +180,7 @@ const ChatInput = () => { (activeThread?.assistants[0].tools && !activeThread?.assistants[0].tools[0]?.enabled && !activeThread?.assistants[0].model.settings - .vision_model && ( + ?.vision_model && ( <> {fileUpload.length !== 0 && ( @@ -221,13 +221,13 @@ const ChatInput = () => {
  • { if ( - activeThread?.assistants[0].model.settings.vision_model + activeThread?.assistants[0].model.settings?.vision_model ) { imageInputRef.current?.click() setShowAttacmentMenus(false) @@ -240,7 +240,7 @@ const ChatInput = () => { } content="This feature only supports multimodal models." disabled={ - activeThread?.assistants[0].model.settings.vision_model + activeThread?.assistants[0].model.settings?.vision_model } /> {
  • { onClick={() => { if ( activeThread?.assistants[0].model.settings - .text_model !== false + ?.text_model !== false ) { fileInputRef.current?.click() setShowAttacmentMenus(false) @@ -271,11 +271,11 @@ const ChatInput = () => { content={ (!activeThread?.assistants[0].tools || !activeThread?.assistants[0].tools[0]?.enabled || - activeThread?.assistants[0].model.settings.text_model === + activeThread?.assistants[0].model.settings?.text_model === false) && ( <> - {activeThread?.assistants[0].model.settings.text_model === - false ? ( + {activeThread?.assistants[0].model.settings + ?.text_model === false ? ( This model does not support text-based retrieval. diff --git a/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx b/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx index cdf865ceb..c4a97a6b9 100644 --- a/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx +++ b/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx @@ -74,7 +74,7 @@ const MessageToolbar = ({ message }: { message: ThreadMessage }) => { )[ messages.filter((msg) => msg.role === ChatCompletionRole.Assistant) .length - 1 - ]?.content[0].text.value, + ]?.content[0]?.text.value, }, } diff --git a/web/screens/Thread/ThreadCenterPanel/index.tsx b/web/screens/Thread/ThreadCenterPanel/index.tsx index b12f859bd..fe7993e9a 100644 --- a/web/screens/Thread/ThreadCenterPanel/index.tsx +++ b/web/screens/Thread/ThreadCenterPanel/index.tsx @@ -56,7 +56,7 @@ const ThreadCenterPanel = () => { const activeThread = useAtomValue(activeThreadAtom) const acceptedFormat: Accept = activeThread?.assistants[0].model.settings - .vision_model + ?.vision_model ? { 'application/pdf': ['.pdf'], 'image/jpeg': ['.jpeg'], @@ -79,7 +79,7 @@ const ThreadCenterPanel = () => { e.dataTransfer.items.length === 1 && ((activeThread?.assistants[0].tools && activeThread?.assistants[0].tools[0]?.enabled) || - activeThread?.assistants[0].model.settings.vision_model) + activeThread?.assistants[0].model.settings?.vision_model) ) { setDragOver(true) } else if ( @@ -101,7 +101,7 @@ const ThreadCenterPanel = () => { rejectFiles.length !== 0 || (activeThread?.assistants[0].tools && !activeThread?.assistants[0].tools[0]?.enabled && - !activeThread?.assistants[0].model.settings.vision_model) + !activeThread?.assistants[0].model.settings?.vision_model) ) return const imageType = files[0]?.type.includes('image') @@ -170,7 +170,7 @@ const ThreadCenterPanel = () => { {isDragReject ? `Currently, we only support 1 attachment at the same time with ${ activeThread?.assistants[0].model.settings - .vision_model + ?.vision_model ? 'PDF, JPEG, JPG, PNG' : 'PDF' } format` @@ -178,7 +178,7 @@ const ThreadCenterPanel = () => { {!isDragReject && (

    - {activeThread?.assistants[0].model.settings.vision_model + {activeThread?.assistants[0].model.settings?.vision_model ? 'PDF, JPEG, JPG, PNG' : 'PDF'}

    diff --git a/web/screens/Thread/ThreadRightPanel/index.tsx b/web/screens/Thread/ThreadRightPanel/index.tsx index 0bf917015..7ccc4957a 100644 --- a/web/screens/Thread/ThreadRightPanel/index.tsx +++ b/web/screens/Thread/ThreadRightPanel/index.tsx @@ -182,8 +182,8 @@ const ThreadRightPanel = () => { }) if ( - activeThread.assistants[0].model.parameters.max_tokens && - activeThread.assistants[0].model.settings.ctx_len + activeThread.assistants[0].model.parameters?.max_tokens && + activeThread.assistants[0].model.settings?.ctx_len ) { if ( key === 'max_tokens' && diff --git a/web/services/appService.test.ts b/web/services/appService.test.ts index 37053f930..5172ea6ed 100644 --- a/web/services/appService.test.ts +++ b/web/services/appService.test.ts @@ -1,30 +1,32 @@ - -import { ExtensionTypeEnum, extensionManager } from '@/extension'; -import { appService } from './appService'; +import { extensionManager } from '@/extension' +import { appService } from './appService' test('should return correct system information when monitoring extension is found', async () => { - const mockGpuSetting = { name: 'NVIDIA GeForce GTX 1080', memory: 8192 }; - const mockOsInfo = { platform: 'win32', release: '10.0.19041' }; + const mockGpuSetting = { name: 'NVIDIA GeForce GTX 1080', memory: 8192 } + const mockOsInfo = { platform: 'win32', release: '10.0.19041' } const mockMonitoringExtension = { getGpuSetting: jest.fn().mockResolvedValue(mockGpuSetting), getOsInfo: jest.fn().mockResolvedValue(mockOsInfo), - }; - extensionManager.get = jest.fn().mockReturnValue(mockMonitoringExtension); - - const result = await appService.systemInformation(); - - expect(mockMonitoringExtension.getGpuSetting).toHaveBeenCalled(); - expect(mockMonitoringExtension.getOsInfo).toHaveBeenCalled(); - expect(result).toEqual({ gpuSetting: mockGpuSetting, osInfo: mockOsInfo }); -}); + } + extensionManager.get = jest.fn().mockReturnValue(mockMonitoringExtension) + const result = await appService.systemInformation() + + expect(mockMonitoringExtension.getGpuSetting).toHaveBeenCalled() + expect(mockMonitoringExtension.getOsInfo).toHaveBeenCalled() + expect(result).toEqual({ gpuSetting: mockGpuSetting, osInfo: mockOsInfo }) +}) test('should log a warning when monitoring extension is not found', async () => { - const consoleWarnMock = jest.spyOn(console, 'warn').mockImplementation(() => {}); - extensionManager.get = jest.fn().mockReturnValue(undefined); - - await appService.systemInformation(); - - expect(consoleWarnMock).toHaveBeenCalledWith('System monitoring extension not found'); - consoleWarnMock.mockRestore(); -}); + const consoleWarnMock = jest + .spyOn(console, 'warn') + .mockImplementation(() => {}) + extensionManager.get = jest.fn().mockReturnValue(undefined) + + await appService.systemInformation() + + expect(consoleWarnMock).toHaveBeenCalledWith( + 'System monitoring extension not found' + ) + consoleWarnMock.mockRestore() +})