feat: model and cortex extensions update
This commit is contained in:
parent
c6481d4668
commit
4080dc4b65
@ -1,6 +1,8 @@
|
||||
import { SettingComponentProps } from '../types'
|
||||
import { Model, ModelEvent, SettingComponentProps } from '../types'
|
||||
import { getJanDataFolderPath, joinPath } from './core'
|
||||
import { events } from './events'
|
||||
import { fs } from './fs'
|
||||
import { ModelManager } from './models'
|
||||
|
||||
export enum ExtensionTypeEnum {
|
||||
Assistant = 'assistant',
|
||||
@ -103,6 +105,22 @@ export abstract class BaseExtension implements ExtensionType {
|
||||
return undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers models - it persists in-memory shared ModelManager instance's data map.
|
||||
* @param models
|
||||
*/
|
||||
async registerModels(models: Model[]): Promise<void> {
|
||||
for (const model of models) {
|
||||
ModelManager.instance().register(model)
|
||||
}
|
||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||
}
|
||||
|
||||
/**
|
||||
* Register settings for the extension.
|
||||
* @param settings
|
||||
* @returns
|
||||
*/
|
||||
async registerSettings(settings: SettingComponentProps[]): Promise<void> {
|
||||
if (!this.name) {
|
||||
console.error('Extension name is not defined')
|
||||
@ -139,6 +157,12 @@ export abstract class BaseExtension implements ExtensionType {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the setting value for the key.
|
||||
* @param key
|
||||
* @param defaultValue
|
||||
* @returns
|
||||
*/
|
||||
async getSetting<T>(key: string, defaultValue: T) {
|
||||
const keySetting = (await this.getSettings()).find((setting) => setting.key === key)
|
||||
|
||||
@ -168,6 +192,10 @@ export abstract class BaseExtension implements ExtensionType {
|
||||
return
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the settings for the extension.
|
||||
* @returns
|
||||
*/
|
||||
async getSettings(): Promise<SettingComponentProps[]> {
|
||||
if (!this.name) return []
|
||||
|
||||
@ -189,6 +217,11 @@ export abstract class BaseExtension implements ExtensionType {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the settings for the extension.
|
||||
* @param componentProps
|
||||
* @returns
|
||||
*/
|
||||
async updateSettings(componentProps: Partial<SettingComponentProps>[]): Promise<void> {
|
||||
if (!this.name) return
|
||||
|
||||
|
||||
@ -1,8 +1,6 @@
|
||||
import { AIEngine } from './AIEngine'
|
||||
import { events } from '../../events'
|
||||
import { ModelEvent, Model, ModelFile, InferenceEngine } from '../../../types'
|
||||
import { EngineManager } from './EngineManager'
|
||||
import { fs } from '../../fs'
|
||||
import { ModelEvent, Model } from '../../../types'
|
||||
|
||||
jest.mock('../../events')
|
||||
jest.mock('./EngineManager')
|
||||
@ -26,7 +24,7 @@ describe('AIEngine', () => {
|
||||
})
|
||||
|
||||
it('should load model if provider matches', async () => {
|
||||
const model: ModelFile = { id: 'model1', engine: 'test-provider' } as any
|
||||
const model: any = { id: 'model1', engine: 'test-provider' } as any
|
||||
|
||||
await engine.loadModel(model)
|
||||
|
||||
@ -34,7 +32,7 @@ describe('AIEngine', () => {
|
||||
})
|
||||
|
||||
it('should not load model if provider does not match', async () => {
|
||||
const model: ModelFile = { id: 'model1', engine: 'other-provider' } as any
|
||||
const model: any = { id: 'model1', engine: 'other-provider' } as any
|
||||
|
||||
await engine.loadModel(model)
|
||||
|
||||
|
||||
@ -1,17 +1,14 @@
|
||||
import { getJanDataFolderPath, joinPath } from '../../core'
|
||||
import { events } from '../../events'
|
||||
import { BaseExtension } from '../../extension'
|
||||
import { fs } from '../../fs'
|
||||
import { MessageRequest, Model, ModelEvent, ModelFile } from '../../../types'
|
||||
import { MessageRequest, Model, ModelEvent } from '../../../types'
|
||||
import { EngineManager } from './EngineManager'
|
||||
import { ModelManager } from '../../models/manager'
|
||||
|
||||
/**
|
||||
* Base AIEngine
|
||||
* Applicable to all AI Engines
|
||||
*/
|
||||
export abstract class AIEngine extends BaseExtension {
|
||||
private static modelsFolder = 'models'
|
||||
|
||||
// The inference engine
|
||||
abstract provider: string
|
||||
|
||||
@ -21,7 +18,7 @@ export abstract class AIEngine extends BaseExtension {
|
||||
override onLoad() {
|
||||
this.registerEngine()
|
||||
|
||||
events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
|
||||
events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
|
||||
events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
|
||||
}
|
||||
|
||||
@ -32,53 +29,10 @@ export abstract class AIEngine extends BaseExtension {
|
||||
EngineManager.instance().register(this)
|
||||
}
|
||||
|
||||
async registerModels(models: Model[]): Promise<void> {
|
||||
const modelFolderPath = await joinPath([await getJanDataFolderPath(), AIEngine.modelsFolder])
|
||||
|
||||
let shouldNotifyModelUpdate = false
|
||||
for (const model of models) {
|
||||
const modelPath = await joinPath([modelFolderPath, model.id])
|
||||
const isExist = await fs.existsSync(modelPath)
|
||||
|
||||
if (isExist) {
|
||||
await this.migrateModelIfNeeded(model, modelPath)
|
||||
continue
|
||||
}
|
||||
|
||||
await fs.mkdir(modelPath)
|
||||
await fs.writeFileSync(
|
||||
await joinPath([modelPath, 'model.json']),
|
||||
JSON.stringify(model, null, 2)
|
||||
)
|
||||
shouldNotifyModelUpdate = true
|
||||
}
|
||||
|
||||
if (shouldNotifyModelUpdate) {
|
||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||
}
|
||||
}
|
||||
|
||||
async migrateModelIfNeeded(model: Model, modelPath: string): Promise<void> {
|
||||
try {
|
||||
const modelJson = await fs.readFileSync(await joinPath([modelPath, 'model.json']), 'utf-8')
|
||||
const currentModel: Model = JSON.parse(modelJson)
|
||||
if (currentModel.version !== model.version) {
|
||||
await fs.writeFileSync(
|
||||
await joinPath([modelPath, 'model.json']),
|
||||
JSON.stringify(model, null, 2)
|
||||
)
|
||||
|
||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Error while try to migrating model', error)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the model.
|
||||
*/
|
||||
async loadModel(model: ModelFile): Promise<any> {
|
||||
async loadModel(model: Model): Promise<any> {
|
||||
if (model.engine.toString() !== this.provider) return Promise.resolve()
|
||||
events.emit(ModelEvent.OnModelReady, model)
|
||||
return Promise.resolve()
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import { InferenceEngine } from '../../../types'
|
||||
import { AIEngine } from './AIEngine'
|
||||
|
||||
/**
|
||||
@ -20,6 +21,22 @@ export class EngineManager {
|
||||
* @returns The engine, if found.
|
||||
*/
|
||||
get<T extends AIEngine>(provider: string): T | undefined {
|
||||
// Backward compatible provider
|
||||
// nitro is migrated to cortex
|
||||
if (
|
||||
[
|
||||
InferenceEngine.nitro,
|
||||
InferenceEngine.cortex,
|
||||
InferenceEngine.cortex_llamacpp,
|
||||
InferenceEngine.cortex_onnx,
|
||||
InferenceEngine.cortex_tensorrtllm,
|
||||
InferenceEngine.cortex_onnx,
|
||||
]
|
||||
.map((e) => e.toString())
|
||||
.includes(provider)
|
||||
)
|
||||
provider = InferenceEngine.cortex
|
||||
|
||||
return this.engines.get(provider) as T | undefined
|
||||
}
|
||||
|
||||
@ -27,6 +44,6 @@ export class EngineManager {
|
||||
* The instance of the engine manager.
|
||||
*/
|
||||
static instance(): EngineManager {
|
||||
return window.core?.engineManager as EngineManager ?? new EngineManager()
|
||||
return (window.core?.engineManager as EngineManager) ?? new EngineManager()
|
||||
}
|
||||
}
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
*/
|
||||
import { LocalOAIEngine } from './LocalOAIEngine'
|
||||
import { events } from '../../events'
|
||||
import { ModelEvent, ModelFile, Model } from '../../../types'
|
||||
import { ModelEvent, Model } from '../../../types'
|
||||
import { executeOnMain, systemInformation, dirName } from '../../core'
|
||||
|
||||
jest.mock('../../core', () => ({
|
||||
@ -43,7 +43,7 @@ describe('LocalOAIEngine', () => {
|
||||
})
|
||||
|
||||
it('should load model correctly', async () => {
|
||||
const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
|
||||
const model: Model = { engine: 'testProvider', file_path: 'path/to/model' } as any
|
||||
const modelFolder = 'path/to'
|
||||
const systemInfo = { os: 'testOS' }
|
||||
const res = { error: null }
|
||||
@ -54,7 +54,6 @@ describe('LocalOAIEngine', () => {
|
||||
|
||||
await engine.loadModel(model)
|
||||
|
||||
expect(dirName).toHaveBeenCalledWith(model.file_path)
|
||||
expect(systemInformation).toHaveBeenCalled()
|
||||
expect(executeOnMain).toHaveBeenCalledWith(
|
||||
engine.nodeModule,
|
||||
@ -66,7 +65,7 @@ describe('LocalOAIEngine', () => {
|
||||
})
|
||||
|
||||
it('should handle load model error', async () => {
|
||||
const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
|
||||
const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
|
||||
const modelFolder = 'path/to'
|
||||
const systemInfo = { os: 'testOS' }
|
||||
const res = { error: 'load error' }
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { executeOnMain, systemInformation, dirName } from '../../core'
|
||||
import { events } from '../../events'
|
||||
import { Model, ModelEvent, ModelFile } from '../../../types'
|
||||
import { Model, ModelEvent } from '../../../types'
|
||||
import { OAIEngine } from './OAIEngine'
|
||||
|
||||
/**
|
||||
@ -22,35 +22,36 @@ export abstract class LocalOAIEngine extends OAIEngine {
|
||||
override onLoad() {
|
||||
super.onLoad()
|
||||
// These events are applicable to local inference providers
|
||||
events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
|
||||
events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
|
||||
events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the model.
|
||||
*/
|
||||
override async loadModel(model: ModelFile): Promise<void> {
|
||||
override async loadModel(model: Model): Promise<void> {
|
||||
if (model.engine.toString() !== this.provider) return
|
||||
const modelFolder = await dirName(model.file_path)
|
||||
const systemInfo = await systemInformation()
|
||||
const res = await executeOnMain(
|
||||
this.nodeModule,
|
||||
this.loadModelFunctionName,
|
||||
{
|
||||
modelFolder,
|
||||
model,
|
||||
},
|
||||
systemInfo
|
||||
)
|
||||
// const modelFolder = await dirName(model.file_path)
|
||||
// const systemInfo = await systemInformation()
|
||||
// const res = await executeOnMain(
|
||||
// this.nodeModule,
|
||||
// this.loadModelFunctionName,
|
||||
// {
|
||||
// modelFolder,
|
||||
// model,
|
||||
// },
|
||||
// systemInfo
|
||||
// )
|
||||
|
||||
if (res?.error) {
|
||||
events.emit(ModelEvent.OnModelFail, { error: res.error })
|
||||
return Promise.reject(res.error)
|
||||
} else {
|
||||
this.loadedModel = model
|
||||
events.emit(ModelEvent.OnModelReady, model)
|
||||
return Promise.resolve()
|
||||
}
|
||||
// if (res?.error) {
|
||||
// events.emit(ModelEvent.OnModelFail, { error: res.error })
|
||||
// return Promise.reject(res.error)
|
||||
// } else {
|
||||
// this.loadedModel = model
|
||||
// events.emit(ModelEvent.OnModelReady, model)
|
||||
// return Promise.resolve()
|
||||
// }
|
||||
return Promise.resolve()
|
||||
}
|
||||
/**
|
||||
* Stops the model.
|
||||
|
||||
@ -55,7 +55,21 @@ export abstract class OAIEngine extends AIEngine {
|
||||
* Inference request
|
||||
*/
|
||||
override async inference(data: MessageRequest) {
|
||||
if (data.model?.engine?.toString() !== this.provider) return
|
||||
if (!data.model?.id) {
|
||||
events.emit(MessageEvent.OnMessageResponse, {
|
||||
status: MessageStatus.Error,
|
||||
content: [
|
||||
{
|
||||
type: ContentType.Text,
|
||||
text: {
|
||||
value: 'No model ID provided',
|
||||
annotations: [],
|
||||
},
|
||||
},
|
||||
],
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const timestamp = Date.now()
|
||||
const message: ThreadMessage = {
|
||||
@ -89,7 +103,6 @@ export abstract class OAIEngine extends AIEngine {
|
||||
model: model.id,
|
||||
stream: true,
|
||||
...model.parameters,
|
||||
...(this.provider === 'nitro' ? { engine: 'cortex.llamacpp'} : {}),
|
||||
}
|
||||
if (this.transformPayload) {
|
||||
requestBody = this.transformPayload(requestBody)
|
||||
|
||||
@ -10,7 +10,7 @@ export function requestInference(
|
||||
requestBody: any,
|
||||
model: {
|
||||
id: string
|
||||
parameters: ModelRuntimeParams
|
||||
parameters?: ModelRuntimeParams
|
||||
},
|
||||
controller?: AbortController,
|
||||
headers?: HeadersInit,
|
||||
@ -22,7 +22,7 @@ export function requestInference(
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Access-Control-Allow-Origin': '*',
|
||||
'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
|
||||
'Accept': model.parameters?.stream ? 'text/event-stream' : 'application/json',
|
||||
...headers,
|
||||
},
|
||||
body: JSON.stringify(requestBody),
|
||||
@ -45,7 +45,7 @@ export function requestInference(
|
||||
subscriber.complete()
|
||||
return
|
||||
}
|
||||
if (model.parameters.stream === false) {
|
||||
if (model.parameters?.stream === false) {
|
||||
const data = await response.json()
|
||||
if (transformResponse) {
|
||||
subscriber.next(transformResponse(data))
|
||||
|
||||
@ -1,13 +1,5 @@
|
||||
import { BaseExtension, ExtensionTypeEnum } from '../extension'
|
||||
import {
|
||||
GpuSetting,
|
||||
HuggingFaceRepoData,
|
||||
ImportingModel,
|
||||
Model,
|
||||
ModelFile,
|
||||
ModelInterface,
|
||||
OptionType,
|
||||
} from '../../types'
|
||||
import { Model, ModelInterface, OptionType } from '../../types'
|
||||
|
||||
/**
|
||||
* Model extension for managing models.
|
||||
@ -20,17 +12,10 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
|
||||
return ExtensionTypeEnum.Model
|
||||
}
|
||||
|
||||
abstract downloadModel(
|
||||
model: Model,
|
||||
gpuSettings?: GpuSetting,
|
||||
network?: { proxy: string; ignoreSSL?: boolean }
|
||||
): Promise<void>
|
||||
abstract cancelModelDownload(modelId: string): Promise<void>
|
||||
abstract deleteModel(model: ModelFile): Promise<void>
|
||||
abstract getDownloadedModels(): Promise<ModelFile[]>
|
||||
abstract getConfiguredModels(): Promise<ModelFile[]>
|
||||
abstract importModels(models: ImportingModel[], optionType: OptionType): Promise<void>
|
||||
abstract updateModelInfo(modelInfo: Partial<ModelFile>): Promise<ModelFile>
|
||||
abstract fetchHuggingFaceRepoData(repoId: string): Promise<HuggingFaceRepoData>
|
||||
abstract getDefaultModel(): Promise<Model>
|
||||
abstract getModels(): Promise<Model[]>
|
||||
abstract pullModel(model: string): Promise<void>
|
||||
abstract cancelModelPull(modelId: string): Promise<void>
|
||||
abstract importModel(model: string, modePath: string): Promise<void>
|
||||
abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
|
||||
abstract deleteModel(model: string): Promise<void>
|
||||
}
|
||||
|
||||
@ -1,32 +1,37 @@
|
||||
import * as Core from './core';
|
||||
import * as Events from './events';
|
||||
import * as FileSystem from './fs';
|
||||
import * as Extension from './extension';
|
||||
import * as Extensions from './extensions';
|
||||
import * as Tools from './tools';
|
||||
import * as Core from './core'
|
||||
import * as Events from './events'
|
||||
import * as FileSystem from './fs'
|
||||
import * as Extension from './extension'
|
||||
import * as Extensions from './extensions'
|
||||
import * as Tools from './tools'
|
||||
import * as Models from './models'
|
||||
|
||||
describe('Module Tests', () => {
|
||||
it('should export Core module', () => {
|
||||
expect(Core).toBeDefined();
|
||||
});
|
||||
it('should export Core module', () => {
|
||||
expect(Core).toBeDefined()
|
||||
})
|
||||
|
||||
it('should export Event module', () => {
|
||||
expect(Events).toBeDefined();
|
||||
});
|
||||
it('should export Event module', () => {
|
||||
expect(Events).toBeDefined()
|
||||
})
|
||||
|
||||
it('should export Filesystem module', () => {
|
||||
expect(FileSystem).toBeDefined();
|
||||
});
|
||||
it('should export Filesystem module', () => {
|
||||
expect(FileSystem).toBeDefined()
|
||||
})
|
||||
|
||||
it('should export Extension module', () => {
|
||||
expect(Extension).toBeDefined();
|
||||
});
|
||||
it('should export Extension module', () => {
|
||||
expect(Extension).toBeDefined()
|
||||
})
|
||||
|
||||
it('should export all base extensions', () => {
|
||||
expect(Extensions).toBeDefined();
|
||||
});
|
||||
it('should export all base extensions', () => {
|
||||
expect(Extensions).toBeDefined()
|
||||
})
|
||||
|
||||
it('should export all base tools', () => {
|
||||
expect(Tools).toBeDefined();
|
||||
});
|
||||
});
|
||||
it('should export all base tools', () => {
|
||||
expect(Tools).toBeDefined()
|
||||
})
|
||||
|
||||
it('should export all base tools', () => {
|
||||
expect(Models).toBeDefined()
|
||||
})
|
||||
})
|
||||
|
||||
@ -33,3 +33,9 @@ export * from './extensions'
|
||||
* @module
|
||||
*/
|
||||
export * from './tools'
|
||||
|
||||
/**
|
||||
* Export all base models.
|
||||
* @module
|
||||
*/
|
||||
export * from './models'
|
||||
|
||||
5
core/src/browser/models/index.ts
Normal file
5
core/src/browser/models/index.ts
Normal file
@ -0,0 +1,5 @@
|
||||
/**
|
||||
* Export ModelManager
|
||||
* @module
|
||||
*/
|
||||
export { ModelManager } from './manager'
|
||||
40
core/src/browser/models/manager.ts
Normal file
40
core/src/browser/models/manager.ts
Normal file
@ -0,0 +1,40 @@
|
||||
import { Model, ModelEvent } from '../../types'
|
||||
import { events } from '../events'
|
||||
|
||||
/**
|
||||
* Manages the registered models across extensions.
|
||||
*/
|
||||
export class ModelManager {
|
||||
public models = new Map<string, Model>()
|
||||
|
||||
constructor() {
|
||||
if (window) {
|
||||
window.core.modelManager = this
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a model.
|
||||
* @param model - The model to register.
|
||||
*/
|
||||
register<T extends Model>(model: T) {
|
||||
this.models.set(model.id, model)
|
||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a model by it's id.
|
||||
* @param id - The id of the model to retrieve.
|
||||
* @returns The model, if found.
|
||||
*/
|
||||
get<T extends Model>(id: string): T | undefined {
|
||||
return this.models.get(id) as T | undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* The instance of the tool manager.
|
||||
*/
|
||||
static instance(): ModelManager {
|
||||
return (window.core?.modelManager as ModelManager) ?? new ModelManager()
|
||||
}
|
||||
}
|
||||
@ -69,9 +69,11 @@ export enum DownloadRoute {
|
||||
}
|
||||
|
||||
export enum DownloadEvent {
|
||||
onFileDownloadUpdate = 'onFileDownloadUpdate',
|
||||
onFileDownloadError = 'onFileDownloadError',
|
||||
onFileDownloadSuccess = 'onFileDownloadSuccess',
|
||||
onFileDownloadUpdate = 'DownloadUpdated',
|
||||
onFileDownloadError = 'DownloadError',
|
||||
onFileDownloadSuccess = 'DownloadSuccess',
|
||||
onFileDownloadStopped = 'DownloadStopped',
|
||||
onFileDownloadStarted = 'DownloadStarted',
|
||||
onFileUnzipSuccess = 'onFileUnzipSuccess',
|
||||
}
|
||||
|
||||
|
||||
@ -6,8 +6,8 @@ import { FileMetadata } from '../file'
|
||||
*/
|
||||
export type ModelInfo = {
|
||||
id: string
|
||||
settings: ModelSettingParams
|
||||
parameters: ModelRuntimeParams
|
||||
settings?: ModelSettingParams
|
||||
parameters?: ModelRuntimeParams
|
||||
engine?: InferenceEngine
|
||||
}
|
||||
|
||||
@ -28,9 +28,10 @@ export enum InferenceEngine {
|
||||
nitro_tensorrt_llm = 'nitro-tensorrt-llm',
|
||||
cohere = 'cohere',
|
||||
nvidia = 'nvidia',
|
||||
cortex_llamacpp = 'cortex.llamacpp',
|
||||
cortex_onnx = 'cortex.onnx',
|
||||
cortex_tensorrtllm = 'cortex.tensorrt-llm',
|
||||
cortex = 'cortex',
|
||||
cortex_llamacpp = 'llama-cpp',
|
||||
cortex_onnx = 'onnxruntime',
|
||||
cortex_tensorrtllm = '.tensorrt-llm',
|
||||
}
|
||||
|
||||
export type ModelArtifact = {
|
||||
@ -153,8 +154,3 @@ export type ModelRuntimeParams = {
|
||||
export type ModelInitFailed = Model & {
|
||||
error: Error
|
||||
}
|
||||
|
||||
/**
|
||||
* ModelFile is the model.json entity and it's file metadata
|
||||
*/
|
||||
export type ModelFile = Model & FileMetadata
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { GpuSetting } from '../miscellaneous'
|
||||
import { Model, ModelFile } from './modelEntity'
|
||||
import { Model } from './modelEntity'
|
||||
import { OptionType } from './modelImport'
|
||||
|
||||
/**
|
||||
* Model extension for managing models.
|
||||
@ -8,38 +8,41 @@ export interface ModelInterface {
|
||||
/**
|
||||
* Downloads a model.
|
||||
* @param model - The model to download.
|
||||
* @param network - Optional object to specify proxy/whether to ignore SSL certificates.
|
||||
* @returns A Promise that resolves when the model has been downloaded.
|
||||
*/
|
||||
downloadModel(
|
||||
model: ModelFile,
|
||||
gpuSettings?: GpuSetting,
|
||||
network?: { ignoreSSL?: boolean; proxy?: string }
|
||||
): Promise<void>
|
||||
pullModel(model: string): Promise<void>
|
||||
|
||||
/**
|
||||
* Cancels the download of a specific model.
|
||||
* @param {string} modelId - The ID of the model to cancel the download for.
|
||||
* @returns {Promise<void>} A promise that resolves when the download has been cancelled.
|
||||
*/
|
||||
cancelModelDownload(modelId: string): Promise<void>
|
||||
cancelModelPull(modelId: string): Promise<void>
|
||||
|
||||
/**
|
||||
* Deletes a model.
|
||||
* @param modelId - The ID of the model to delete.
|
||||
* @returns A Promise that resolves when the model has been deleted.
|
||||
*/
|
||||
deleteModel(model: ModelFile): Promise<void>
|
||||
deleteModel(model: string): Promise<void>
|
||||
|
||||
/**
|
||||
* Gets a list of downloaded models.
|
||||
* Gets downloaded models.
|
||||
* @returns A Promise that resolves with an array of downloaded models.
|
||||
*/
|
||||
getDownloadedModels(): Promise<ModelFile[]>
|
||||
getModels(): Promise<Model[]>
|
||||
|
||||
/**
|
||||
* Gets a list of configured models.
|
||||
* @returns A Promise that resolves with an array of configured models.
|
||||
* Update a pulled model's metadata
|
||||
* @param model - The model to update.
|
||||
* @returns A Promise that resolves when the model has been updated.
|
||||
*/
|
||||
getConfiguredModels(): Promise<ModelFile[]>
|
||||
updateModel(model: Partial<Model>): Promise<Model>
|
||||
|
||||
/**
|
||||
* Import an existing model file.
|
||||
* @param model id of the model to import
|
||||
* @param modelPath - path of the model file
|
||||
*/
|
||||
importModel(model: string, modePath: string): Promise<void>
|
||||
}
|
||||
|
||||
@ -102,7 +102,7 @@ Enable the GPU acceleration option within the Jan application by following the [
|
||||
],
|
||||
"size": 669000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
```
|
||||
### Step 2: Modify the `model.json`
|
||||
|
||||
@ -10,8 +10,6 @@ import { HNSWLib } from 'langchain/vectorstores/hnswlib'
|
||||
import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
|
||||
import { readEmbeddingEngine } from './engine'
|
||||
|
||||
import path from 'path'
|
||||
|
||||
export class Retrieval {
|
||||
public chunkSize: number = 100
|
||||
public chunkOverlap?: number = 0
|
||||
|
||||
@ -4,10 +4,10 @@ set /p CORTEX_VERSION=<./bin/version.txt
|
||||
|
||||
@REM Download cortex.llamacpp binaries
|
||||
set VERSION=v0.1.25
|
||||
set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.25-windows-amd64
|
||||
set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.34-windows-amd64
|
||||
set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan
|
||||
|
||||
call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz
|
||||
call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-12-0/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-11-7/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/win-noavx/engines/cortex.llamacpp
|
||||
36
extensions/inference-cortex-extension/download.sh
Executable file
36
extensions/inference-cortex-extension/download.sh
Executable file
@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Read CORTEX_VERSION
|
||||
CORTEX_VERSION=$(cat ./bin/version.txt)
|
||||
CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
|
||||
ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.34/cortex.llamacpp-0.1.34"
|
||||
# Detect platform
|
||||
OS_TYPE=$(uname)
|
||||
|
||||
if [ "$OS_TYPE" == "Linux" ]; then
|
||||
# Linux downloads
|
||||
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
|
||||
chmod +x "./bin/cortex"
|
||||
|
||||
# Download engines for Linux
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1
|
||||
|
||||
elif [ "$OS_TYPE" == "Darwin" ]; then
|
||||
# macOS downloads
|
||||
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1
|
||||
chmod +x "./bin/cortex"
|
||||
|
||||
# Download engines for macOS
|
||||
download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp
|
||||
download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp
|
||||
|
||||
else
|
||||
echo "Unsupported operating system: $OS_TYPE"
|
||||
exit 1
|
||||
fi
|
||||
@ -10,12 +10,12 @@
|
||||
"scripts": {
|
||||
"test": "jest",
|
||||
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
||||
"downloadnitro:linux:darwin": "./download.sh",
|
||||
"downloadnitro:win32": "download.bat",
|
||||
"downloadnitro": "run-script-os",
|
||||
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
"build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
"build:publish": "yarn test && run-script-os"
|
||||
"downloadcortex:linux:darwin": "./download.sh",
|
||||
"downloadcortex:win32": "download.bat",
|
||||
"downloadcortex": "run-script-os",
|
||||
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
"build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
"build:publish": "run-script-os"
|
||||
},
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
@ -50,6 +50,7 @@
|
||||
"cpu-instructions": "^0.0.13",
|
||||
"decompress": "^4.2.1",
|
||||
"fetch-retry": "^5.0.6",
|
||||
"ky": "^1.7.2",
|
||||
"rxjs": "^7.8.1",
|
||||
"tcp-port-used": "^1.0.2",
|
||||
"terminate": "2.6.1",
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["34B", "Finetuned"],
|
||||
"size": 21556982144
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 5056982144
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["Vision"],
|
||||
"size": 5750000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -30,5 +30,5 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4370000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["22B", "Finetuned", "Featured"],
|
||||
"size": 13341237440
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["34B", "Finetuned"],
|
||||
"size": 21500000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["Tiny"],
|
||||
"size": 1430000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["33B"],
|
||||
"size": 19940000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["2B", "Finetuned", "Tiny"],
|
||||
"size": 1630000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 5330000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -37,5 +37,5 @@
|
||||
],
|
||||
"size": 16600000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -38,5 +38,5 @@
|
||||
],
|
||||
"size": 1710000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -37,5 +37,5 @@
|
||||
],
|
||||
"size": 5760000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["70B", "Foundational Model"],
|
||||
"size": 43920000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["7B", "Foundational Model"],
|
||||
"size": 4080000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["8B"],
|
||||
"size": 4920000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -34,5 +34,5 @@
|
||||
],
|
||||
"size": 4920000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -37,5 +37,5 @@
|
||||
],
|
||||
"size": 42500000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -37,5 +37,5 @@
|
||||
],
|
||||
"size": 4920000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["1B", "Featured"],
|
||||
"size": 1320000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["3B", "Featured"],
|
||||
"size": 3420000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -34,5 +34,5 @@
|
||||
],
|
||||
"size": 1170000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -32,5 +32,5 @@
|
||||
"tags": ["Vision"],
|
||||
"size": 7870000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -32,5 +32,5 @@
|
||||
"tags": ["Vision"],
|
||||
"size": 4370000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -32,5 +32,5 @@
|
||||
"size": 4370000000,
|
||||
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png"
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -30,5 +30,5 @@
|
||||
"tags": ["70B", "Foundational Model"],
|
||||
"size": 26440000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4370000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["Recommended", "7B", "Finetuned"],
|
||||
"size": 4370000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -34,5 +34,5 @@
|
||||
],
|
||||
"size": 2320000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -34,5 +34,5 @@
|
||||
],
|
||||
"size": 8366000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["34B", "Finetuned"],
|
||||
"size": 20220000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4770000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4680000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["14B", "Featured"],
|
||||
"size": 8990000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["32B"],
|
||||
"size": 19900000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["72B"],
|
||||
"size": 47400000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["7B", "Featured"],
|
||||
"size": 4680000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["7B", "Featured"],
|
||||
"size": 4680000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["3B", "Finetuned", "Tiny"],
|
||||
"size": 2970000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -30,5 +30,5 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4370000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["Tiny", "Foundation Model"],
|
||||
"size": 669000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"size": 4370000000,
|
||||
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png"
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4410000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["Recommended", "13B", "Finetuned"],
|
||||
"size": 7870000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["34B", "Foundational Model"],
|
||||
"size": 20660000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -114,19 +114,7 @@ export default [
|
||||
]),
|
||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
||||
INFERENCE_URL: JSON.stringify(
|
||||
process.env.INFERENCE_URL ||
|
||||
'http://127.0.0.1:3928/inferences/server/chat_completion'
|
||||
),
|
||||
TROUBLESHOOTING_URL: JSON.stringify(
|
||||
'https://jan.ai/guides/troubleshooting'
|
||||
),
|
||||
JAN_SERVER_INFERENCE_URL: JSON.stringify(
|
||||
'http://localhost:1337/v1/chat/completions'
|
||||
),
|
||||
CUDA_DOWNLOAD_URL: JSON.stringify(
|
||||
'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz'
|
||||
),
|
||||
CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291/v1'),
|
||||
}),
|
||||
// Allow json resolution
|
||||
json(),
|
||||
@ -1,7 +1,5 @@
|
||||
declare const NODE: string
|
||||
declare const INFERENCE_URL: string
|
||||
declare const TROUBLESHOOTING_URL: string
|
||||
declare const JAN_SERVER_INFERENCE_URL: string
|
||||
declare const CORTEX_API_URL: string
|
||||
declare const DEFAULT_SETTINGS: Array<any>
|
||||
declare const MODELS: Array<any>
|
||||
|
||||
111
extensions/inference-cortex-extension/src/index.ts
Normal file
111
extensions/inference-cortex-extension/src/index.ts
Normal file
@ -0,0 +1,111 @@
|
||||
/**
|
||||
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
* @version 1.0.0
|
||||
* @module inference-extension/src/index
|
||||
*/
|
||||
|
||||
import {
|
||||
Model,
|
||||
executeOnMain,
|
||||
systemInformation,
|
||||
log,
|
||||
joinPath,
|
||||
dirName,
|
||||
LocalOAIEngine,
|
||||
InferenceEngine,
|
||||
} from '@janhq/core'
|
||||
|
||||
import ky from 'ky'
|
||||
|
||||
/**
|
||||
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
*/
|
||||
export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
// DEPRECATED
|
||||
nodeModule: string = 'node'
|
||||
|
||||
provider: string = InferenceEngine.cortex
|
||||
|
||||
/**
|
||||
* The URL for making inference requests.
|
||||
*/
|
||||
inferenceUrl = `${CORTEX_API_URL}/chat/completions`
|
||||
|
||||
/**
|
||||
* Subscribes to events emitted by the @janhq/core package.
|
||||
*/
|
||||
async onLoad() {
|
||||
const models = MODELS as Model[]
|
||||
|
||||
this.registerModels(models)
|
||||
|
||||
super.onLoad()
|
||||
|
||||
// Run the process watchdog
|
||||
const systemInfo = await systemInformation()
|
||||
executeOnMain(NODE, 'run', systemInfo)
|
||||
}
|
||||
|
||||
onUnload(): void {
|
||||
executeOnMain(NODE, 'dispose')
|
||||
super.onUnload()
|
||||
}
|
||||
|
||||
override async loadModel(
|
||||
model: Model & { file_path?: string }
|
||||
): Promise<void> {
|
||||
// Legacy model cache - should import
|
||||
if (model.engine === InferenceEngine.nitro && model.file_path) {
|
||||
// Try importing the model
|
||||
await ky
|
||||
.post(`${CORTEX_API_URL}/models/${model.id}`, {
|
||||
json: { model: model.id, modelPath: await this.modelPath(model) },
|
||||
})
|
||||
.json()
|
||||
.catch((e) => log(e.message ?? e ?? ''))
|
||||
}
|
||||
|
||||
return ky
|
||||
.post(`${CORTEX_API_URL}/models/start`, {
|
||||
json: {
|
||||
...model.settings,
|
||||
model: model.id,
|
||||
engine:
|
||||
model.engine === InferenceEngine.nitro // Legacy model cache
|
||||
? InferenceEngine.cortex_llamacpp
|
||||
: model.engine,
|
||||
},
|
||||
})
|
||||
.json()
|
||||
.catch(async (e) => {
|
||||
throw (await e.response?.json()) ?? e
|
||||
})
|
||||
.then()
|
||||
}
|
||||
|
||||
override async unloadModel(model: Model): Promise<void> {
|
||||
return ky
|
||||
.post(`${CORTEX_API_URL}/models/stop`, {
|
||||
json: { model: model.id },
|
||||
})
|
||||
.json()
|
||||
.then()
|
||||
}
|
||||
|
||||
private async modelPath(
|
||||
model: Model & { file_path?: string }
|
||||
): Promise<string> {
|
||||
if (!model.file_path) return model.id
|
||||
return await joinPath([
|
||||
await dirName(model.file_path),
|
||||
model.sources[0]?.filename ??
|
||||
model.settings?.llama_model_path ??
|
||||
model.sources[0]?.url.split('/').pop() ??
|
||||
model.id,
|
||||
])
|
||||
}
|
||||
}
|
||||
@ -1,5 +1,5 @@
|
||||
import { describe, expect, it } from '@jest/globals'
|
||||
import { executableNitroFile } from './execute'
|
||||
import { executableCortexFile } from './execute'
|
||||
import { GpuSetting } from '@janhq/core'
|
||||
import { cpuInfo } from 'cpu-instructions'
|
||||
|
||||
@ -30,7 +30,7 @@ jest.mock('cpu-instructions', () => ({
|
||||
let mock = cpuInfo.cpuInfo as jest.Mock
|
||||
mock.mockReturnValue([])
|
||||
|
||||
describe('test executable nitro file', () => {
|
||||
describe('test executable cortex file', () => {
|
||||
afterAll(function () {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: originalPlatform,
|
||||
@ -44,10 +44,13 @@ describe('test executable nitro file', () => {
|
||||
Object.defineProperty(process, 'arch', {
|
||||
value: 'arm64',
|
||||
})
|
||||
expect(executableNitroFile(testSettings)).toEqual(
|
||||
expect(executableCortexFile(testSettings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`mac-arm64`),
|
||||
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(),
|
||||
executablePath:
|
||||
originalPlatform === 'darwin'
|
||||
? expect.stringContaining(`/cortex`)
|
||||
: expect.anything(),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -55,10 +58,13 @@ describe('test executable nitro file', () => {
|
||||
Object.defineProperty(process, 'arch', {
|
||||
value: 'x64',
|
||||
})
|
||||
expect(executableNitroFile(testSettings)).toEqual(
|
||||
expect(executableCortexFile(testSettings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`mac-x64`),
|
||||
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
|
||||
executablePath:
|
||||
originalPlatform === 'darwin'
|
||||
? expect.stringContaining(`/cortex`)
|
||||
: expect.anything(),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -73,10 +79,10 @@ describe('test executable nitro file', () => {
|
||||
...testSettings,
|
||||
run_mode: 'cpu',
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`win`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||
executablePath: expect.stringContaining(`/cortex.exe`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -107,10 +113,10 @@ describe('test executable nitro file', () => {
|
||||
},
|
||||
],
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`win-cuda-11-7`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||
executablePath: expect.stringContaining(`/cortex.exe`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -141,10 +147,10 @@ describe('test executable nitro file', () => {
|
||||
},
|
||||
],
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`win-cuda-12-0`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||
executablePath: expect.stringContaining(`/cortex.exe`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -159,10 +165,10 @@ describe('test executable nitro file', () => {
|
||||
...testSettings,
|
||||
run_mode: 'cpu',
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`linux`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||
executablePath: expect.stringContaining(`/cortex`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -193,10 +199,10 @@ describe('test executable nitro file', () => {
|
||||
},
|
||||
],
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`linux-cuda-11-7`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||
executablePath: expect.stringContaining(`/cortex`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -227,10 +233,10 @@ describe('test executable nitro file', () => {
|
||||
},
|
||||
],
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`linux-cuda-12-0`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||
executablePath: expect.stringContaining(`/cortex`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -251,10 +257,10 @@ describe('test executable nitro file', () => {
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mock.mockReturnValue([instruction])
|
||||
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`linux-${instruction}`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||
executablePath: expect.stringContaining(`/cortex`),
|
||||
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
@ -274,10 +280,10 @@ describe('test executable nitro file', () => {
|
||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mock.mockReturnValue([instruction])
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`win-${instruction}`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||
executablePath: expect.stringContaining(`/cortex.exe`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -313,10 +319,10 @@ describe('test executable nitro file', () => {
|
||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mock.mockReturnValue([instruction])
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`win-cuda-12-0`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||
executablePath: expect.stringContaining(`/cortex.exe`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -352,10 +358,10 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mock.mockReturnValue([instruction])
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`linux-cuda-12-0`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||
executablePath: expect.stringContaining(`/cortex`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -392,10 +398,10 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mock.mockReturnValue([instruction])
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`linux-vulkan`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||
executablePath: expect.stringContaining(`/cortex`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -418,10 +424,13 @@ describe('test executable nitro file', () => {
|
||||
run_mode: 'cpu',
|
||||
}
|
||||
mock.mockReturnValue([])
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`mac-x64`),
|
||||
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
|
||||
executablePath:
|
||||
originalPlatform === 'darwin'
|
||||
? expect.stringContaining(`/cortex`)
|
||||
: expect.anything(),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -2,7 +2,7 @@ import { GpuSetting } from '@janhq/core'
|
||||
import * as path from 'path'
|
||||
import { cpuInfo } from 'cpu-instructions'
|
||||
|
||||
export interface NitroExecutableOptions {
|
||||
export interface CortexExecutableOptions {
|
||||
enginePath: string
|
||||
executablePath: string
|
||||
cudaVisibleDevices: string
|
||||
@ -81,9 +81,9 @@ const cpuInstructions = () => {
|
||||
* Find which executable file to run based on the current platform.
|
||||
* @returns The name of the executable file to run.
|
||||
*/
|
||||
export const executableNitroFile = (
|
||||
export const executableCortexFile = (
|
||||
gpuSetting?: GpuSetting
|
||||
): NitroExecutableOptions => {
|
||||
): CortexExecutableOptions => {
|
||||
let engineFolder = [
|
||||
os(),
|
||||
...(gpuSetting?.vulkan
|
||||
@ -99,7 +99,7 @@ export const executableNitroFile = (
|
||||
.join('-')
|
||||
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||
let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}`
|
||||
let binaryName = `cortex${extension()}`
|
||||
|
||||
return {
|
||||
enginePath: path.join(__dirname, '..', 'bin', engineFolder),
|
||||
94
extensions/inference-cortex-extension/src/node/index.test.ts
Normal file
94
extensions/inference-cortex-extension/src/node/index.test.ts
Normal file
@ -0,0 +1,94 @@
|
||||
jest.mock('@janhq/core/node', () => ({
|
||||
...jest.requireActual('@janhq/core/node'),
|
||||
getJanDataFolderPath: () => '',
|
||||
getSystemResourceInfo: () => {
|
||||
return {
|
||||
cpu: {
|
||||
cores: 1,
|
||||
logicalCores: 1,
|
||||
threads: 1,
|
||||
model: 'model',
|
||||
speed: 1,
|
||||
},
|
||||
memory: {
|
||||
total: 1,
|
||||
free: 1,
|
||||
},
|
||||
gpu: {
|
||||
model: 'model',
|
||||
memory: 1,
|
||||
cuda: {
|
||||
version: 'version',
|
||||
devices: 'devices',
|
||||
},
|
||||
vulkan: {
|
||||
version: 'version',
|
||||
devices: 'devices',
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('fs', () => ({
|
||||
default: {
|
||||
readdirSync: () => [],
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('child_process', () => ({
|
||||
exec: () => {
|
||||
return {
|
||||
stdout: { on: jest.fn() },
|
||||
stderr: { on: jest.fn() },
|
||||
on: jest.fn(),
|
||||
}
|
||||
},
|
||||
spawn: () => {
|
||||
return {
|
||||
stdout: { on: jest.fn() },
|
||||
stderr: { on: jest.fn() },
|
||||
on: jest.fn(),
|
||||
pid: '111',
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('./execute', () => ({
|
||||
executableCortexFile: () => {
|
||||
return {
|
||||
enginePath: 'enginePath',
|
||||
executablePath: 'executablePath',
|
||||
cudaVisibleDevices: 'cudaVisibleDevices',
|
||||
vkVisibleDevices: 'vkVisibleDevices',
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
import index from './index'
|
||||
|
||||
describe('dispose', () => {
|
||||
it('should dispose a model successfully on Mac', async () => {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'darwin',
|
||||
})
|
||||
|
||||
// Call the dispose function
|
||||
const result = await index.dispose()
|
||||
|
||||
// Assert that the result is as expected
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('should kill the subprocess successfully on Windows', async () => {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'win32',
|
||||
})
|
||||
|
||||
// Call the killSubprocess function
|
||||
const result = await index.dispose()
|
||||
|
||||
// Assert that the result is as expected
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
})
|
||||
83
extensions/inference-cortex-extension/src/node/index.ts
Normal file
83
extensions/inference-cortex-extension/src/node/index.ts
Normal file
@ -0,0 +1,83 @@
|
||||
import path from 'path'
|
||||
import { log, SystemInformation } from '@janhq/core/node'
|
||||
import { executableCortexFile } from './execute'
|
||||
import { ProcessWatchdog } from './watchdog'
|
||||
|
||||
// The HOST address to use for the Nitro subprocess
|
||||
const LOCAL_PORT = '39291'
|
||||
let watchdog: ProcessWatchdog | undefined = undefined
|
||||
|
||||
/**
|
||||
* Spawns a Nitro subprocess.
|
||||
* @returns A promise that resolves when the Nitro subprocess is started.
|
||||
*/
|
||||
function run(systemInfo?: SystemInformation): Promise<any> {
|
||||
log(`[CORTEX]:: Spawning cortex subprocess...`)
|
||||
|
||||
return new Promise<void>(async (resolve, reject) => {
|
||||
let executableOptions = executableCortexFile(
|
||||
// If ngl is not set or equal to 0, run on CPU with correct instructions
|
||||
systemInfo?.gpuSetting
|
||||
? {
|
||||
...systemInfo.gpuSetting,
|
||||
run_mode: systemInfo.gpuSetting.run_mode,
|
||||
}
|
||||
: undefined
|
||||
)
|
||||
|
||||
// Execute the binary
|
||||
log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
|
||||
log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`)
|
||||
|
||||
// Add engine path to the PATH and LD_LIBRARY_PATH
|
||||
process.env.PATH = (process.env.PATH || '').concat(
|
||||
path.delimiter,
|
||||
executableOptions.enginePath
|
||||
)
|
||||
log(`[CORTEX] PATH: ${process.env.PATH}`)
|
||||
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
|
||||
path.delimiter,
|
||||
executableOptions.enginePath
|
||||
)
|
||||
|
||||
watchdog = new ProcessWatchdog(
|
||||
executableOptions.executablePath,
|
||||
['--start-server', '--port', LOCAL_PORT.toString()],
|
||||
{
|
||||
cwd: executableOptions.enginePath,
|
||||
env: {
|
||||
...process.env,
|
||||
ENGINE_PATH: executableOptions.enginePath,
|
||||
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
||||
// Vulkan - Support 1 device at a time for now
|
||||
...(executableOptions.vkVisibleDevices?.length > 0 && {
|
||||
GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
|
||||
}),
|
||||
},
|
||||
}
|
||||
)
|
||||
watchdog.start()
|
||||
resolve()
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Every module should have a dispose function
|
||||
* This will be called when the extension is unloaded and should clean up any resources
|
||||
* Also called when app is closed
|
||||
*/
|
||||
function dispose() {
|
||||
watchdog?.terminate()
|
||||
}
|
||||
|
||||
/**
|
||||
* Cortex process info
|
||||
*/
|
||||
export interface CortexProcessInfo {
|
||||
isRunning: boolean
|
||||
}
|
||||
|
||||
export default {
|
||||
run,
|
||||
dispose,
|
||||
}
|
||||
84
extensions/inference-cortex-extension/src/node/watchdog.ts
Normal file
84
extensions/inference-cortex-extension/src/node/watchdog.ts
Normal file
@ -0,0 +1,84 @@
|
||||
import { log } from '@janhq/core/node'
|
||||
import { spawn, ChildProcess } from 'child_process'
|
||||
import { EventEmitter } from 'events'
|
||||
|
||||
interface WatchdogOptions {
|
||||
cwd?: string
|
||||
restartDelay?: number
|
||||
maxRestarts?: number
|
||||
env?: NodeJS.ProcessEnv
|
||||
}
|
||||
|
||||
export class ProcessWatchdog extends EventEmitter {
|
||||
private command: string
|
||||
private args: string[]
|
||||
private options: WatchdogOptions
|
||||
private process: ChildProcess | null
|
||||
private restartDelay: number
|
||||
private maxRestarts: number
|
||||
private restartCount: number
|
||||
private isTerminating: boolean
|
||||
|
||||
constructor(command: string, args: string[], options: WatchdogOptions = {}) {
|
||||
super()
|
||||
this.command = command
|
||||
this.args = args
|
||||
this.options = options
|
||||
this.process = null
|
||||
this.restartDelay = options.restartDelay || 5000
|
||||
this.maxRestarts = options.maxRestarts || 5
|
||||
this.restartCount = 0
|
||||
this.isTerminating = false
|
||||
}
|
||||
|
||||
start(): void {
|
||||
this.spawnProcess()
|
||||
}
|
||||
|
||||
private spawnProcess(): void {
|
||||
if (this.isTerminating) return
|
||||
|
||||
log(`Starting process: ${this.command} ${this.args.join(' ')}`)
|
||||
this.process = spawn(this.command, this.args, this.options)
|
||||
|
||||
this.process.stdout?.on('data', (data: Buffer) => {
|
||||
log(`Process output: ${data}`)
|
||||
this.emit('output', data.toString())
|
||||
})
|
||||
|
||||
this.process.stderr?.on('data', (data: Buffer) => {
|
||||
log(`Process error: ${data}`)
|
||||
this.emit('error', data.toString())
|
||||
})
|
||||
|
||||
this.process.on('close', (code: number | null) => {
|
||||
log(`Process exited with code ${code}`)
|
||||
this.emit('close', code)
|
||||
if (!this.isTerminating) {
|
||||
this.restartProcess()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
private restartProcess(): void {
|
||||
if (this.restartCount < this.maxRestarts) {
|
||||
this.restartCount++
|
||||
log(
|
||||
`Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})`
|
||||
)
|
||||
setTimeout(() => this.spawnProcess(), this.restartDelay)
|
||||
} else {
|
||||
log('Max restart attempts reached. Exiting watchdog.')
|
||||
this.emit('maxRestartsReached')
|
||||
}
|
||||
}
|
||||
|
||||
terminate(): void {
|
||||
this.isTerminating = true
|
||||
if (this.process) {
|
||||
log('Terminating watched process...')
|
||||
this.process.kill()
|
||||
}
|
||||
this.emit('terminated')
|
||||
}
|
||||
}
|
||||
@ -1 +0,0 @@
|
||||
0.5.0
|
||||
@ -1,41 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Read CORTEX_VERSION
|
||||
CORTEX_VERSION=$(cat ./bin/version.txt)
|
||||
CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
|
||||
|
||||
# Detect platform
|
||||
OS_TYPE=$(uname)
|
||||
|
||||
if [ "$OS_TYPE" == "Linux" ]; then
|
||||
# Linux downloads
|
||||
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
|
||||
chmod +x "./bin/cortex-cpp"
|
||||
|
||||
ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64"
|
||||
|
||||
# Download engines for Linux
|
||||
download "${ENGINE_DOWNLOAD_URL}-noavx.tar.gz" -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-avx.tar.gz" -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-avx2.tar.gz" -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-avx512.tar.gz" -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-vulkan.tar.gz" -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1
|
||||
|
||||
elif [ "$OS_TYPE" == "Darwin" ]; then
|
||||
# macOS downloads
|
||||
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/mac-arm64" 1
|
||||
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/mac-x64" 1
|
||||
chmod +x "./bin/mac-arm64/cortex-cpp"
|
||||
chmod +x "./bin/mac-x64/cortex-cpp"
|
||||
|
||||
ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac"
|
||||
# Download engines for macOS
|
||||
download "${ENGINE_DOWNLOAD_URL}-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp
|
||||
download "${ENGINE_DOWNLOAD_URL}-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp
|
||||
|
||||
else
|
||||
echo "Unsupported operating system: $OS_TYPE"
|
||||
exit 1
|
||||
fi
|
||||
@ -1,193 +0,0 @@
|
||||
/**
|
||||
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
* @version 1.0.0
|
||||
* @module inference-extension/src/index
|
||||
*/
|
||||
|
||||
import {
|
||||
events,
|
||||
executeOnMain,
|
||||
Model,
|
||||
ModelEvent,
|
||||
LocalOAIEngine,
|
||||
InstallationState,
|
||||
systemInformation,
|
||||
fs,
|
||||
getJanDataFolderPath,
|
||||
joinPath,
|
||||
DownloadRequest,
|
||||
baseName,
|
||||
downloadFile,
|
||||
DownloadState,
|
||||
DownloadEvent,
|
||||
ModelFile,
|
||||
} from '@janhq/core'
|
||||
|
||||
declare const CUDA_DOWNLOAD_URL: string
|
||||
/**
|
||||
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
*/
|
||||
export default class JanInferenceNitroExtension extends LocalOAIEngine {
|
||||
nodeModule: string = NODE
|
||||
provider: string = 'nitro'
|
||||
|
||||
/**
|
||||
* Checking the health for Nitro's process each 5 secs.
|
||||
*/
|
||||
private static readonly _intervalHealthCheck = 5 * 1000
|
||||
|
||||
/**
|
||||
* The interval id for the health check. Used to stop the health check.
|
||||
*/
|
||||
private getNitroProcessHealthIntervalId: NodeJS.Timeout | undefined = undefined
|
||||
|
||||
/**
|
||||
* Tracking the current state of nitro process.
|
||||
*/
|
||||
private nitroProcessInfo: any = undefined
|
||||
|
||||
/**
|
||||
* The URL for making inference requests.
|
||||
*/
|
||||
inferenceUrl = ''
|
||||
|
||||
/**
|
||||
* Subscribes to events emitted by the @janhq/core package.
|
||||
*/
|
||||
async onLoad() {
|
||||
this.inferenceUrl = INFERENCE_URL
|
||||
|
||||
// If the extension is running in the browser, use the base API URL from the core package.
|
||||
if (!('electronAPI' in window)) {
|
||||
this.inferenceUrl = `${window.core?.api?.baseApiUrl}/v1/chat/completions`
|
||||
}
|
||||
|
||||
this.getNitroProcessHealthIntervalId = setInterval(
|
||||
() => this.periodicallyGetNitroHealth(),
|
||||
JanInferenceNitroExtension._intervalHealthCheck
|
||||
)
|
||||
const models = MODELS as unknown as Model[]
|
||||
this.registerModels(models)
|
||||
super.onLoad()
|
||||
|
||||
// Add additional dependencies PATH to the env
|
||||
executeOnMain(NODE, 'addAdditionalDependencies', {
|
||||
name: this.name,
|
||||
version: this.version,
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Periodically check for nitro process's health.
|
||||
*/
|
||||
private async periodicallyGetNitroHealth(): Promise<void> {
|
||||
const health = await executeOnMain(NODE, 'getCurrentNitroProcessInfo')
|
||||
|
||||
const isRunning = this.nitroProcessInfo?.isRunning ?? false
|
||||
if (isRunning && health.isRunning === false) {
|
||||
console.debug('Nitro process is stopped')
|
||||
events.emit(ModelEvent.OnModelStopped, {})
|
||||
}
|
||||
this.nitroProcessInfo = health
|
||||
}
|
||||
|
||||
override loadModel(model: ModelFile): Promise<void> {
|
||||
if (model.engine !== this.provider) return Promise.resolve()
|
||||
this.getNitroProcessHealthIntervalId = setInterval(
|
||||
() => this.periodicallyGetNitroHealth(),
|
||||
JanInferenceNitroExtension._intervalHealthCheck
|
||||
)
|
||||
return super.loadModel(model)
|
||||
}
|
||||
|
||||
override async unloadModel(model?: Model): Promise<void> {
|
||||
if (model?.engine && model.engine !== this.provider) return
|
||||
|
||||
// stop the periocally health check
|
||||
if (this.getNitroProcessHealthIntervalId) {
|
||||
clearInterval(this.getNitroProcessHealthIntervalId)
|
||||
this.getNitroProcessHealthIntervalId = undefined
|
||||
}
|
||||
return super.unloadModel(model)
|
||||
}
|
||||
|
||||
override async install(): Promise<void> {
|
||||
const info = await systemInformation()
|
||||
|
||||
const platform = info.osInfo?.platform === 'win32' ? 'windows' : 'linux'
|
||||
const downloadUrl = CUDA_DOWNLOAD_URL
|
||||
|
||||
const url = downloadUrl
|
||||
.replace('<version>', info.gpuSetting?.cuda?.version ?? '12.4')
|
||||
.replace('<platform>', platform)
|
||||
|
||||
console.debug('Downloading Cuda Toolkit Dependency: ', url)
|
||||
|
||||
const janDataFolderPath = await getJanDataFolderPath()
|
||||
|
||||
const executableFolderPath = await joinPath([
|
||||
janDataFolderPath,
|
||||
'engines',
|
||||
this.name ?? 'cortex-cpp',
|
||||
this.version ?? '1.0.0',
|
||||
])
|
||||
|
||||
if (!(await fs.existsSync(executableFolderPath))) {
|
||||
await fs.mkdir(executableFolderPath)
|
||||
}
|
||||
|
||||
const tarball = await baseName(url)
|
||||
const tarballFullPath = await joinPath([executableFolderPath, tarball])
|
||||
|
||||
const downloadRequest: DownloadRequest = {
|
||||
url,
|
||||
localPath: tarballFullPath,
|
||||
extensionId: this.name,
|
||||
downloadType: 'extension',
|
||||
}
|
||||
downloadFile(downloadRequest)
|
||||
|
||||
const onFileDownloadSuccess = async (state: DownloadState) => {
|
||||
console.log(state)
|
||||
// if other download, ignore
|
||||
if (state.fileName !== tarball) return
|
||||
events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
|
||||
await executeOnMain(
|
||||
NODE,
|
||||
'decompressRunner',
|
||||
tarballFullPath,
|
||||
executableFolderPath
|
||||
)
|
||||
events.emit(DownloadEvent.onFileUnzipSuccess, state)
|
||||
}
|
||||
events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
|
||||
}
|
||||
|
||||
override async installationState(): Promise<InstallationState> {
|
||||
const info = await systemInformation()
|
||||
if (
|
||||
info.gpuSetting?.run_mode === 'gpu' &&
|
||||
!info.gpuSetting?.vulkan &&
|
||||
info.osInfo &&
|
||||
info.osInfo.platform !== 'darwin' &&
|
||||
!info.gpuSetting?.cuda?.exist
|
||||
) {
|
||||
const janDataFolderPath = await getJanDataFolderPath()
|
||||
|
||||
const executableFolderPath = await joinPath([
|
||||
janDataFolderPath,
|
||||
'engines',
|
||||
this.name ?? 'cortex-cpp',
|
||||
this.version ?? '1.0.0',
|
||||
])
|
||||
|
||||
if (!(await fs.existsSync(executableFolderPath))) return 'NotInstalled'
|
||||
return 'Installed'
|
||||
}
|
||||
return 'NotRequired'
|
||||
}
|
||||
}
|
||||
@ -1,465 +0,0 @@
|
||||
jest.mock('fetch-retry', () => ({
|
||||
default: () => () => {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: () =>
|
||||
Promise.resolve({
|
||||
model_loaded: true,
|
||||
}),
|
||||
text: () => Promise.resolve(''),
|
||||
})
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('path', () => ({
|
||||
default: {
|
||||
isAbsolute: jest.fn(),
|
||||
join: jest.fn(),
|
||||
parse: () => {
|
||||
return { dir: 'dir' }
|
||||
},
|
||||
delimiter: { concat: () => '' },
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('decompress', () => ({
|
||||
default: () => {
|
||||
return Promise.resolve()
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('@janhq/core/node', () => ({
|
||||
...jest.requireActual('@janhq/core/node'),
|
||||
getJanDataFolderPath: () => '',
|
||||
getSystemResourceInfo: () => {
|
||||
return {
|
||||
cpu: {
|
||||
cores: 1,
|
||||
logicalCores: 1,
|
||||
threads: 1,
|
||||
model: 'model',
|
||||
speed: 1,
|
||||
},
|
||||
memory: {
|
||||
total: 1,
|
||||
free: 1,
|
||||
},
|
||||
gpu: {
|
||||
model: 'model',
|
||||
memory: 1,
|
||||
cuda: {
|
||||
version: 'version',
|
||||
devices: 'devices',
|
||||
},
|
||||
vulkan: {
|
||||
version: 'version',
|
||||
devices: 'devices',
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('fs', () => ({
|
||||
default: {
|
||||
readdirSync: () => [],
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('child_process', () => ({
|
||||
exec: () => {
|
||||
return {
|
||||
stdout: { on: jest.fn() },
|
||||
stderr: { on: jest.fn() },
|
||||
on: jest.fn(),
|
||||
}
|
||||
},
|
||||
spawn: () => {
|
||||
return {
|
||||
stdout: { on: jest.fn() },
|
||||
stderr: { on: jest.fn() },
|
||||
on: jest.fn(),
|
||||
pid: '111',
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('tcp-port-used', () => ({
|
||||
default: {
|
||||
waitUntilFree: () => Promise.resolve(true),
|
||||
waitUntilUsed: () => Promise.resolve(true),
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('./execute', () => ({
|
||||
executableNitroFile: () => {
|
||||
return {
|
||||
enginePath: 'enginePath',
|
||||
executablePath: 'executablePath',
|
||||
cudaVisibleDevices: 'cudaVisibleDevices',
|
||||
vkVisibleDevices: 'vkVisibleDevices',
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('terminate', () => ({
|
||||
default: (id: String, func: Function) => {
|
||||
console.log(id)
|
||||
func()
|
||||
},
|
||||
}))
|
||||
|
||||
import * as execute from './execute'
|
||||
import index from './index'
|
||||
|
||||
let executeMock = execute
|
||||
|
||||
const modelInitOptions: any = {
|
||||
modelFolder: '/path/to/model',
|
||||
model: {
|
||||
id: 'test',
|
||||
name: 'test',
|
||||
engine: 'nitro',
|
||||
version: '0.0',
|
||||
format: 'GGUF',
|
||||
object: 'model',
|
||||
sources: [],
|
||||
created: 0,
|
||||
description: 'test',
|
||||
parameters: {},
|
||||
metadata: {
|
||||
author: '',
|
||||
tags: [],
|
||||
size: 0,
|
||||
},
|
||||
settings: {
|
||||
prompt_template: '{prompt}',
|
||||
llama_model_path: 'model.gguf',
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
describe('loadModel', () => {
|
||||
it('should load a model successfully', async () => {
|
||||
// Mock the necessary parameters and system information
|
||||
|
||||
const systemInfo = {
|
||||
// Mock the system information if needed
|
||||
}
|
||||
|
||||
// Call the loadModel function
|
||||
const result = await index.loadModel(modelInitOptions, systemInfo)
|
||||
|
||||
// Assert that the result is as expected
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('should reject with an error message if the model is not a nitro model', async () => {
|
||||
// Mock the necessary parameters and system information
|
||||
|
||||
const systemInfo = {
|
||||
// Mock the system information if needed
|
||||
}
|
||||
modelInitOptions.model.engine = 'not-nitro'
|
||||
// Call the loadModel function
|
||||
try {
|
||||
await index.loadModel(modelInitOptions, systemInfo)
|
||||
} catch (error) {
|
||||
// Assert that the error message is as expected
|
||||
expect(error).toBe('Not a cortex model')
|
||||
}
|
||||
modelInitOptions.model.engine = 'nitro'
|
||||
})
|
||||
|
||||
it('should reject if model load failed with an error message', async () => {
|
||||
// Mock the necessary parameters and system information
|
||||
|
||||
const systemInfo = {
|
||||
// Mock the system information if needed
|
||||
}
|
||||
// Mock the fetch-retry module to return a failed response
|
||||
jest.mock('fetch-retry', () => ({
|
||||
default: () => () => {
|
||||
return Promise.resolve({
|
||||
ok: false,
|
||||
status: 500,
|
||||
json: () =>
|
||||
Promise.resolve({
|
||||
model_loaded: false,
|
||||
}),
|
||||
text: () => Promise.resolve('Failed to load model'),
|
||||
})
|
||||
},
|
||||
}))
|
||||
|
||||
// Call the loadModel function
|
||||
try {
|
||||
await index.loadModel(modelInitOptions, systemInfo)
|
||||
} catch (error) {
|
||||
// Assert that the error message is as expected
|
||||
expect(error).toBe('Failed to load model')
|
||||
}
|
||||
})
|
||||
|
||||
it('should reject if port not available', async () => {
|
||||
// Mock the necessary parameters and system information
|
||||
|
||||
const systemInfo = {
|
||||
// Mock the system information if needed
|
||||
}
|
||||
|
||||
// Mock the tcp-port-used module to return false
|
||||
jest.mock('tcp-port-used', () => ({
|
||||
default: {
|
||||
waitUntilFree: () => Promise.resolve(false),
|
||||
waitUntilUsed: () => Promise.resolve(false),
|
||||
},
|
||||
}))
|
||||
|
||||
// Call the loadModel function
|
||||
try {
|
||||
await index.loadModel(modelInitOptions, systemInfo)
|
||||
} catch (error) {
|
||||
// Assert that the error message is as expected
|
||||
expect(error).toBe('Port not available')
|
||||
}
|
||||
})
|
||||
|
||||
it('should run on GPU model if ngl is set', async () => {
|
||||
const systemInfo: any = {
|
||||
gpuSetting: {
|
||||
run_mode: 'gpu',
|
||||
},
|
||||
}
|
||||
// Spy executableNitroFile
|
||||
jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
|
||||
enginePath: '',
|
||||
executablePath: '',
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
|
||||
Object.defineProperty(process, 'platform', { value: 'win32' })
|
||||
await index.loadModel(
|
||||
{
|
||||
...modelInitOptions,
|
||||
model: {
|
||||
...modelInitOptions.model,
|
||||
settings: {
|
||||
...modelInitOptions.model.settings,
|
||||
ngl: 40,
|
||||
},
|
||||
},
|
||||
},
|
||||
systemInfo
|
||||
)
|
||||
expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
|
||||
run_mode: 'gpu',
|
||||
})
|
||||
})
|
||||
|
||||
it('should run on correct CPU instructions if ngl is not set', async () => {
|
||||
const systemInfo: any = {
|
||||
gpuSetting: {
|
||||
run_mode: 'gpu',
|
||||
},
|
||||
}
|
||||
// Spy executableNitroFile
|
||||
jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
|
||||
enginePath: '',
|
||||
executablePath: '',
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
|
||||
Object.defineProperty(process, 'platform', { value: 'win32' })
|
||||
await index.loadModel(
|
||||
{
|
||||
...modelInitOptions,
|
||||
model: {
|
||||
...modelInitOptions.model,
|
||||
settings: {
|
||||
...modelInitOptions.model.settings,
|
||||
ngl: undefined,
|
||||
},
|
||||
},
|
||||
},
|
||||
systemInfo
|
||||
)
|
||||
expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
|
||||
run_mode: 'cpu',
|
||||
})
|
||||
})
|
||||
|
||||
it('should run on correct CPU instructions if ngl is 0', async () => {
|
||||
const systemInfo: any = {
|
||||
gpuSetting: {
|
||||
run_mode: 'gpu',
|
||||
},
|
||||
}
|
||||
// Spy executableNitroFile
|
||||
jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
|
||||
enginePath: '',
|
||||
executablePath: '',
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
|
||||
Object.defineProperty(process, 'platform', { value: 'win32' })
|
||||
await index.loadModel(
|
||||
{
|
||||
...modelInitOptions,
|
||||
model: {
|
||||
...modelInitOptions.model,
|
||||
settings: {
|
||||
...modelInitOptions.model.settings,
|
||||
ngl: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
systemInfo
|
||||
)
|
||||
expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
|
||||
run_mode: 'cpu',
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('unloadModel', () => {
|
||||
it('should unload a model successfully', async () => {
|
||||
// Call the unloadModel function
|
||||
const result = await index.unloadModel()
|
||||
|
||||
// Assert that the result is as expected
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('should reject with an error message if the model is not a nitro model', async () => {
|
||||
// Call the unloadModel function
|
||||
try {
|
||||
await index.unloadModel()
|
||||
} catch (error) {
|
||||
// Assert that the error message is as expected
|
||||
expect(error).toBe('Not a cortex model')
|
||||
}
|
||||
})
|
||||
|
||||
it('should reject if model unload failed with an error message', async () => {
|
||||
// Mock the fetch-retry module to return a failed response
|
||||
jest.mock('fetch-retry', () => ({
|
||||
default: () => () => {
|
||||
return Promise.resolve({
|
||||
ok: false,
|
||||
status: 500,
|
||||
json: () =>
|
||||
Promise.resolve({
|
||||
model_unloaded: false,
|
||||
}),
|
||||
text: () => Promise.resolve('Failed to unload model'),
|
||||
})
|
||||
},
|
||||
}))
|
||||
|
||||
// Call the unloadModel function
|
||||
try {
|
||||
await index.unloadModel()
|
||||
} catch (error) {
|
||||
// Assert that the error message is as expected
|
||||
expect(error).toBe('Failed to unload model')
|
||||
}
|
||||
})
|
||||
|
||||
it('should reject if port not available', async () => {
|
||||
// Mock the tcp-port-used module to return false
|
||||
jest.mock('tcp-port-used', () => ({
|
||||
default: {
|
||||
waitUntilFree: () => Promise.resolve(false),
|
||||
waitUntilUsed: () => Promise.resolve(false),
|
||||
},
|
||||
}))
|
||||
|
||||
// Call the unloadModel function
|
||||
try {
|
||||
await index.unloadModel()
|
||||
} catch (error) {
|
||||
// Assert that the error message is as expected
|
||||
expect(error).toBe('Port not available')
|
||||
}
|
||||
})
|
||||
})
|
||||
describe('dispose', () => {
|
||||
it('should dispose a model successfully on Mac', async () => {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'darwin',
|
||||
})
|
||||
|
||||
// Call the dispose function
|
||||
const result = await index.dispose()
|
||||
|
||||
// Assert that the result is as expected
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('should kill the subprocess successfully on Windows', async () => {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'win32',
|
||||
})
|
||||
|
||||
// Call the killSubprocess function
|
||||
const result = await index.dispose()
|
||||
|
||||
// Assert that the result is as expected
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
})
|
||||
|
||||
describe('getCurrentNitroProcessInfo', () => {
|
||||
it('should return the current nitro process info', async () => {
|
||||
// Call the getCurrentNitroProcessInfo function
|
||||
const result = await index.getCurrentNitroProcessInfo()
|
||||
|
||||
// Assert that the result is as expected
|
||||
expect(result).toEqual({
|
||||
isRunning: true,
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('decompressRunner', () => {
|
||||
it('should decompress the runner successfully', async () => {
|
||||
jest.mock('decompress', () => ({
|
||||
default: () => {
|
||||
return Promise.resolve()
|
||||
},
|
||||
}))
|
||||
// Call the decompressRunner function
|
||||
const result = await index.decompressRunner('', '')
|
||||
|
||||
// Assert that the result is as expected
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
it('should not reject if decompression failed', async () => {
|
||||
jest.mock('decompress', () => ({
|
||||
default: () => {
|
||||
return Promise.reject('Failed to decompress')
|
||||
},
|
||||
}))
|
||||
// Call the decompressRunner function
|
||||
const result = await index.decompressRunner('', '')
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
})
|
||||
|
||||
describe('addAdditionalDependencies', () => {
|
||||
it('should add additional dependencies successfully', async () => {
|
||||
// Call the addAdditionalDependencies function
|
||||
const result = await index.addAdditionalDependencies({
|
||||
name: 'name',
|
||||
version: 'version',
|
||||
})
|
||||
|
||||
// Assert that the result is as expected
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
})
|
||||
@ -1,501 +0,0 @@
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
|
||||
import tcpPortUsed from 'tcp-port-used'
|
||||
import fetchRT from 'fetch-retry'
|
||||
import {
|
||||
log,
|
||||
getSystemResourceInfo,
|
||||
InferenceEngine,
|
||||
ModelSettingParams,
|
||||
PromptTemplate,
|
||||
SystemInformation,
|
||||
getJanDataFolderPath,
|
||||
ModelFile,
|
||||
} from '@janhq/core/node'
|
||||
import { executableNitroFile } from './execute'
|
||||
import terminate from 'terminate'
|
||||
import decompress from 'decompress'
|
||||
|
||||
// Polyfill fetch with retry
|
||||
const fetchRetry = fetchRT(fetch)
|
||||
|
||||
/**
|
||||
* The response object for model init operation.
|
||||
*/
|
||||
interface ModelInitOptions {
|
||||
modelFolder: string
|
||||
model: ModelFile
|
||||
}
|
||||
// The PORT to use for the Nitro subprocess
|
||||
const PORT = 3928
|
||||
// The HOST address to use for the Nitro subprocess
|
||||
const LOCAL_HOST = '127.0.0.1'
|
||||
// The URL for the Nitro subprocess
|
||||
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
|
||||
// The URL for the Nitro subprocess to load a model
|
||||
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
|
||||
// The URL for the Nitro subprocess to validate a model
|
||||
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
|
||||
// The URL for the Nitro subprocess to kill itself
|
||||
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
||||
|
||||
const NITRO_PORT_FREE_CHECK_INTERVAL = 100
|
||||
|
||||
// The supported model format
|
||||
// TODO: Should be an array to support more models
|
||||
const SUPPORTED_MODEL_FORMAT = '.gguf'
|
||||
|
||||
// The subprocess instance for Nitro
|
||||
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
|
||||
|
||||
// The current model settings
|
||||
let currentSettings: (ModelSettingParams & { model?: string }) | undefined =
|
||||
undefined
|
||||
|
||||
/**
|
||||
* Stops a Nitro subprocess.
|
||||
* @param wrapper - The model wrapper.
|
||||
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
|
||||
*/
|
||||
function unloadModel(): Promise<void> {
|
||||
return killSubprocess()
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes a Nitro subprocess to load a machine learning model.
|
||||
* @param wrapper - The model wrapper.
|
||||
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
|
||||
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
|
||||
*/
|
||||
async function loadModel(
|
||||
params: ModelInitOptions,
|
||||
systemInfo?: SystemInformation
|
||||
): Promise<ModelOperationResponse | void> {
|
||||
if (params.model.engine !== InferenceEngine.nitro) {
|
||||
// Not a nitro model
|
||||
return Promise.resolve()
|
||||
}
|
||||
|
||||
if (params.model.engine !== InferenceEngine.nitro) {
|
||||
return Promise.reject('Not a cortex model')
|
||||
} else {
|
||||
const nitroResourceProbe = await getSystemResourceInfo()
|
||||
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
|
||||
if (params.model.settings.prompt_template) {
|
||||
const promptTemplate = params.model.settings.prompt_template
|
||||
const prompt = promptTemplateConverter(promptTemplate)
|
||||
if (prompt?.error) {
|
||||
return Promise.reject(prompt.error)
|
||||
}
|
||||
params.model.settings.system_prompt = prompt.system_prompt
|
||||
params.model.settings.user_prompt = prompt.user_prompt
|
||||
params.model.settings.ai_prompt = prompt.ai_prompt
|
||||
}
|
||||
|
||||
// modelFolder is the absolute path to the running model folder
|
||||
// e.g. ~/jan/models/llama-2
|
||||
let modelFolder = params.modelFolder
|
||||
|
||||
let llama_model_path = params.model.settings.llama_model_path
|
||||
|
||||
// Absolute model path support
|
||||
if (
|
||||
params.model?.sources.length &&
|
||||
params.model.sources.every((e) => fs.existsSync(e.url))
|
||||
) {
|
||||
llama_model_path =
|
||||
params.model.sources.length === 1
|
||||
? params.model.sources[0].url
|
||||
: params.model.sources.find((e) =>
|
||||
e.url.includes(llama_model_path ?? params.model.id)
|
||||
)?.url
|
||||
}
|
||||
|
||||
if (!llama_model_path || !path.isAbsolute(llama_model_path)) {
|
||||
// Look for GGUF model file
|
||||
const modelFiles: string[] = fs.readdirSync(modelFolder)
|
||||
const ggufBinFile = modelFiles.find(
|
||||
(file) =>
|
||||
// 1. Prioritize llama_model_path (predefined)
|
||||
(llama_model_path && file === llama_model_path) ||
|
||||
// 2. Prioritize GGUF File (manual import)
|
||||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) ||
|
||||
// 3. Fallback Model ID (for backward compatibility)
|
||||
file === params.model.id
|
||||
)
|
||||
if (ggufBinFile) llama_model_path = path.join(modelFolder, ggufBinFile)
|
||||
}
|
||||
|
||||
// Look for absolute source path for single model
|
||||
|
||||
if (!llama_model_path) return Promise.reject('No GGUF model file found')
|
||||
|
||||
currentSettings = {
|
||||
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
|
||||
// model.settings can override the default settings
|
||||
...params.model.settings,
|
||||
llama_model_path,
|
||||
model: params.model.id,
|
||||
// This is critical and requires real CPU physical core count (or performance core)
|
||||
...(params.model.settings.mmproj && {
|
||||
mmproj: path.isAbsolute(params.model.settings.mmproj)
|
||||
? params.model.settings.mmproj
|
||||
: path.join(modelFolder, params.model.settings.mmproj),
|
||||
}),
|
||||
}
|
||||
return runNitroAndLoadModel(params.model.id, systemInfo)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 1. Spawn Nitro process
|
||||
* 2. Load model into Nitro subprocess
|
||||
* 3. Validate model status
|
||||
* @returns
|
||||
*/
|
||||
async function runNitroAndLoadModel(
|
||||
modelId: string,
|
||||
systemInfo?: SystemInformation
|
||||
) {
|
||||
// Gather system information for CPU physical cores and memory
|
||||
return killSubprocess()
|
||||
.then(() =>
|
||||
tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
|
||||
)
|
||||
.then(() => spawnNitroProcess(systemInfo))
|
||||
.then(() => loadLLMModel(currentSettings))
|
||||
.then(() => validateModelStatus(modelId))
|
||||
.catch((err) => {
|
||||
// TODO: Broadcast error so app could display proper error message
|
||||
log(`[CORTEX]::Error: ${err}`)
|
||||
return { error: err }
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse prompt template into agrs settings
|
||||
* @param promptTemplate Template as string
|
||||
* @returns
|
||||
*/
|
||||
function promptTemplateConverter(promptTemplate: string): PromptTemplate {
|
||||
// Split the string using the markers
|
||||
const systemMarker = '{system_message}'
|
||||
const promptMarker = '{prompt}'
|
||||
|
||||
if (
|
||||
promptTemplate.includes(systemMarker) &&
|
||||
promptTemplate.includes(promptMarker)
|
||||
) {
|
||||
// Find the indices of the markers
|
||||
const systemIndex = promptTemplate.indexOf(systemMarker)
|
||||
const promptIndex = promptTemplate.indexOf(promptMarker)
|
||||
|
||||
// Extract the parts of the string
|
||||
const system_prompt = promptTemplate.substring(0, systemIndex)
|
||||
const user_prompt = promptTemplate.substring(
|
||||
systemIndex + systemMarker.length,
|
||||
promptIndex
|
||||
)
|
||||
const ai_prompt = promptTemplate.substring(
|
||||
promptIndex + promptMarker.length
|
||||
)
|
||||
|
||||
// Return the split parts
|
||||
return { system_prompt, user_prompt, ai_prompt }
|
||||
} else if (promptTemplate.includes(promptMarker)) {
|
||||
// Extract the parts of the string for the case where only promptMarker is present
|
||||
const promptIndex = promptTemplate.indexOf(promptMarker)
|
||||
const user_prompt = promptTemplate.substring(0, promptIndex)
|
||||
const ai_prompt = promptTemplate.substring(
|
||||
promptIndex + promptMarker.length
|
||||
)
|
||||
|
||||
// Return the split parts
|
||||
return { user_prompt, ai_prompt }
|
||||
}
|
||||
|
||||
// Return an error if none of the conditions are met
|
||||
return { error: 'Cannot split prompt template' }
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
|
||||
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
|
||||
*/
|
||||
function loadLLMModel(settings: any): Promise<Response> {
|
||||
if (!settings?.ngl) {
|
||||
settings.ngl = 100
|
||||
}
|
||||
log(`[CORTEX]:: Loading model with params ${JSON.stringify(settings)}`)
|
||||
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(settings),
|
||||
retries: 3,
|
||||
retryDelay: 300,
|
||||
})
|
||||
.then((res) => {
|
||||
log(
|
||||
`[CORTEX]:: Load model success with response ${JSON.stringify(
|
||||
res
|
||||
)}`
|
||||
)
|
||||
return Promise.resolve(res)
|
||||
})
|
||||
.catch((err) => {
|
||||
log(`[CORTEX]::Error: Load model failed with error ${err}`)
|
||||
return Promise.reject(err)
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates the status of a model.
|
||||
* @returns {Promise<ModelOperationResponse>} A promise that resolves to an object.
|
||||
* If the model is loaded successfully, the object is empty.
|
||||
* If the model is not loaded successfully, the object contains an error message.
|
||||
*/
|
||||
async function validateModelStatus(modelId: string): Promise<void> {
|
||||
// Send a GET request to the validation URL.
|
||||
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
|
||||
log(`[CORTEX]:: Validating model ${modelId}`)
|
||||
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({
|
||||
model: modelId,
|
||||
// TODO: force to use cortex llamacpp by default
|
||||
engine: 'cortex.llamacpp',
|
||||
}),
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
retries: 5,
|
||||
retryDelay: 300,
|
||||
}).then(async (res: Response) => {
|
||||
log(
|
||||
`[CORTEX]:: Validate model state with response ${JSON.stringify(
|
||||
res.status
|
||||
)}`
|
||||
)
|
||||
// If the response is OK, check model_loaded status.
|
||||
if (res.ok) {
|
||||
const body = await res.json()
|
||||
// If the model is loaded, return an empty object.
|
||||
// Otherwise, return an object with an error message.
|
||||
if (body.model_loaded) {
|
||||
log(
|
||||
`[CORTEX]:: Validate model state success with response ${JSON.stringify(
|
||||
body
|
||||
)}`
|
||||
)
|
||||
return Promise.resolve()
|
||||
}
|
||||
}
|
||||
const errorBody = await res.text()
|
||||
log(
|
||||
`[CORTEX]:: Validate model state failed with response ${errorBody} and status is ${JSON.stringify(
|
||||
res.statusText
|
||||
)}`
|
||||
)
|
||||
return Promise.reject('Validate model status failed')
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Terminates the Nitro subprocess.
|
||||
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
|
||||
*/
|
||||
async function killSubprocess(): Promise<void> {
|
||||
const controller = new AbortController()
|
||||
setTimeout(() => controller.abort(), 5000)
|
||||
log(`[CORTEX]:: Request to kill cortex`)
|
||||
|
||||
const killRequest = () => {
|
||||
return fetch(NITRO_HTTP_KILL_URL, {
|
||||
method: 'DELETE',
|
||||
signal: controller.signal,
|
||||
})
|
||||
.catch(() => {}) // Do nothing with this attempt
|
||||
.then(() =>
|
||||
tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
|
||||
)
|
||||
.then(() => log(`[CORTEX]:: cortex process is terminated`))
|
||||
.catch((err) => {
|
||||
log(
|
||||
`[CORTEX]:: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
|
||||
)
|
||||
throw 'PORT_NOT_AVAILABLE'
|
||||
})
|
||||
}
|
||||
|
||||
if (subprocess?.pid && process.platform !== 'darwin') {
|
||||
log(`[CORTEX]:: Killing PID ${subprocess.pid}`)
|
||||
const pid = subprocess.pid
|
||||
return new Promise((resolve, reject) => {
|
||||
terminate(pid, function (err) {
|
||||
if (err) {
|
||||
log('[CORTEX]::Failed to kill PID - sending request to kill')
|
||||
killRequest().then(resolve).catch(reject)
|
||||
} else {
|
||||
tcpPortUsed
|
||||
.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
|
||||
.then(() => log(`[CORTEX]:: cortex process is terminated`))
|
||||
.then(() => resolve())
|
||||
.catch(() => {
|
||||
log(
|
||||
'[CORTEX]::Failed to kill PID (Port check timeout) - sending request to kill'
|
||||
)
|
||||
killRequest().then(resolve).catch(reject)
|
||||
})
|
||||
}
|
||||
})
|
||||
})
|
||||
} else {
|
||||
return killRequest()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawns a Nitro subprocess.
|
||||
* @returns A promise that resolves when the Nitro subprocess is started.
|
||||
*/
|
||||
function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
|
||||
log(`[CORTEX]:: Spawning cortex subprocess...`)
|
||||
|
||||
return new Promise<void>(async (resolve, reject) => {
|
||||
let executableOptions = executableNitroFile(
|
||||
// If ngl is not set or equal to 0, run on CPU with correct instructions
|
||||
systemInfo?.gpuSetting
|
||||
? {
|
||||
...systemInfo.gpuSetting,
|
||||
run_mode:
|
||||
currentSettings?.ngl === undefined || currentSettings.ngl === 0
|
||||
? 'cpu'
|
||||
: systemInfo.gpuSetting.run_mode,
|
||||
}
|
||||
: undefined
|
||||
)
|
||||
|
||||
const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
|
||||
// Execute the binary
|
||||
log(
|
||||
`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||
)
|
||||
log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`)
|
||||
|
||||
// Add engine path to the PATH and LD_LIBRARY_PATH
|
||||
process.env.PATH = (process.env.PATH || '').concat(
|
||||
path.delimiter,
|
||||
executableOptions.enginePath
|
||||
)
|
||||
log(`[CORTEX] PATH: ${process.env.PATH}`)
|
||||
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
|
||||
path.delimiter,
|
||||
executableOptions.enginePath
|
||||
)
|
||||
|
||||
subprocess = spawn(
|
||||
executableOptions.executablePath,
|
||||
['1', LOCAL_HOST, PORT.toString()],
|
||||
{
|
||||
cwd: path.join(path.parse(executableOptions.executablePath).dir),
|
||||
env: {
|
||||
...process.env,
|
||||
ENGINE_PATH: executableOptions.enginePath,
|
||||
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
||||
// Vulkan - Support 1 device at a time for now
|
||||
...(executableOptions.vkVisibleDevices?.length > 0 && {
|
||||
GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
|
||||
}),
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
// Handle subprocess output
|
||||
subprocess.stdout.on('data', (data: any) => {
|
||||
log(`[CORTEX]:: ${data}`)
|
||||
})
|
||||
|
||||
subprocess.stderr.on('data', (data: any) => {
|
||||
log(`[CORTEX]::Error: ${data}`)
|
||||
})
|
||||
|
||||
subprocess.on('close', (code: any) => {
|
||||
log(`[CORTEX]:: cortex exited with code: ${code}`)
|
||||
subprocess = undefined
|
||||
reject(`child process exited with code ${code}`)
|
||||
})
|
||||
|
||||
tcpPortUsed
|
||||
.waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
|
||||
.then(() => {
|
||||
log(`[CORTEX]:: cortex is ready`)
|
||||
resolve()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Every module should have a dispose function
|
||||
* This will be called when the extension is unloaded and should clean up any resources
|
||||
* Also called when app is closed
|
||||
*/
|
||||
function dispose() {
|
||||
// clean other registered resources here
|
||||
killSubprocess()
|
||||
}
|
||||
|
||||
/**
|
||||
* Nitro process info
|
||||
*/
|
||||
export interface NitroProcessInfo {
|
||||
isRunning: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve current nitro process
|
||||
*/
|
||||
const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
|
||||
return {
|
||||
isRunning: subprocess != null,
|
||||
}
|
||||
}
|
||||
|
||||
const addAdditionalDependencies = (data: { name: string; version: string }) => {
|
||||
log(
|
||||
`[CORTEX]::Debug: Adding additional dependencies for ${data.name} ${data.version}`
|
||||
)
|
||||
const additionalPath = path.delimiter.concat(
|
||||
path.join(getJanDataFolderPath(), 'engines', data.name, data.version)
|
||||
)
|
||||
// Set the updated PATH
|
||||
process.env.PATH = (process.env.PATH || '').concat(
|
||||
path.delimiter,
|
||||
additionalPath
|
||||
)
|
||||
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
|
||||
path.delimiter,
|
||||
additionalPath
|
||||
)
|
||||
}
|
||||
|
||||
const decompressRunner = async (zipPath: string, output: string) => {
|
||||
console.debug(`Decompressing ${zipPath} to ${output}...`)
|
||||
try {
|
||||
const files = await decompress(zipPath, output)
|
||||
console.debug('Decompress finished!', files)
|
||||
} catch (err) {
|
||||
console.error(`Decompress ${zipPath} failed: ${err}`)
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
loadModel,
|
||||
unloadModel,
|
||||
dispose,
|
||||
getCurrentNitroProcessInfo,
|
||||
addAdditionalDependencies,
|
||||
decompressRunner,
|
||||
}
|
||||
@ -4,7 +4,6 @@
|
||||
"version": "1.0.34",
|
||||
"description": "Model Management Extension provides model exploration and seamless downloads",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
"author": "Jan <service@jan.ai>",
|
||||
"license": "AGPL-3.0",
|
||||
"scripts": {
|
||||
@ -36,15 +35,9 @@
|
||||
"README.md"
|
||||
],
|
||||
"dependencies": {
|
||||
"@huggingface/gguf": "^0.0.11",
|
||||
"@huggingface/jinja": "^0.3.0",
|
||||
"@janhq/core": "file:../../core",
|
||||
"hyllama": "^0.2.2",
|
||||
"python-shell": "^5.0.0"
|
||||
"ky": "^1.7.2",
|
||||
"p-queue": "^8.0.1"
|
||||
},
|
||||
"bundleDependencies": [
|
||||
"hyllama",
|
||||
"@huggingface/gguf",
|
||||
"@huggingface/jinja"
|
||||
]
|
||||
"bundleDependencies": []
|
||||
}
|
||||
|
||||
@ -1,36 +0,0 @@
|
||||
{
|
||||
"object": "model",
|
||||
"version": "1.0",
|
||||
"format": "gguf",
|
||||
"sources": [
|
||||
{
|
||||
"url": "N/A",
|
||||
"filename": "N/A"
|
||||
}
|
||||
],
|
||||
"id": "N/A",
|
||||
"name": "N/A",
|
||||
"created": 0,
|
||||
"description": "User self import model",
|
||||
"settings": {
|
||||
"ctx_len": 2048,
|
||||
"embedding": false,
|
||||
"prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:",
|
||||
"llama_model_path": "N/A"
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 2048,
|
||||
"stop": ["<|END_OF_TURN_TOKEN|>", "<end_of_turn>", "[/INST]", "<|end_of_text|>", "<|eot_id|>", "<|im_end|>", "<|end|>"],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "User",
|
||||
"tags": [],
|
||||
"size": 0
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
@ -6,7 +6,6 @@ import replace from '@rollup/plugin-replace'
|
||||
import commonjs from '@rollup/plugin-commonjs'
|
||||
const settingJson = require('./resources/settings.json')
|
||||
const packageJson = require('./package.json')
|
||||
const defaultModelJson = require('./resources/default-model.json')
|
||||
|
||||
export default [
|
||||
{
|
||||
@ -20,17 +19,18 @@ export default [
|
||||
plugins: [
|
||||
replace({
|
||||
preventAssignment: true,
|
||||
DEFAULT_MODEL: JSON.stringify(defaultModelJson),
|
||||
SETTINGS: JSON.stringify(settingJson),
|
||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||
}),
|
||||
// Allow json resolution
|
||||
json(),
|
||||
// Compile TypeScript files
|
||||
typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }),
|
||||
typescript({
|
||||
useTsconfigDeclarationDir: true,
|
||||
exclude: ['**/__tests__', '**/*.test.ts'],
|
||||
}),
|
||||
// Compile TypeScript files
|
||||
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
|
||||
// commonjs(),
|
||||
commonjs(),
|
||||
// Allow node_modules resolution, so you can use 'external' to control
|
||||
// which external modules to include in the bundle
|
||||
// https://github.com/rollup/rollup-plugin-node-resolve#usage
|
||||
@ -39,39 +39,6 @@ export default [
|
||||
browser: true,
|
||||
}),
|
||||
|
||||
// Resolve source maps to the original source
|
||||
sourceMaps(),
|
||||
],
|
||||
},
|
||||
{
|
||||
input: `src/node/index.ts`,
|
||||
output: [
|
||||
{
|
||||
file: 'dist/node/index.cjs.js',
|
||||
format: 'cjs',
|
||||
sourcemap: true,
|
||||
inlineDynamicImports: true,
|
||||
},
|
||||
],
|
||||
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
|
||||
external: ['@janhq/core/node'],
|
||||
watch: {
|
||||
include: 'src/node/**',
|
||||
},
|
||||
plugins: [
|
||||
// Allow json resolution
|
||||
json(),
|
||||
// Compile TypeScript files
|
||||
typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }),
|
||||
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
|
||||
commonjs(),
|
||||
// Allow node_modules resolution, so you can use 'external' to control
|
||||
// which external modules to include in the bundle
|
||||
// https://github.com/rollup/rollup-plugin-node-resolve#usage
|
||||
resolve({
|
||||
extensions: ['.ts', '.js', '.json'],
|
||||
}),
|
||||
|
||||
// Resolve source maps to the original source
|
||||
sourceMaps(),
|
||||
],
|
||||
|
||||
@ -1,6 +0,0 @@
|
||||
export class InvalidHostError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message)
|
||||
this.name = 'InvalidHostError'
|
||||
}
|
||||
}
|
||||
@ -1,6 +0,0 @@
|
||||
export class NotSupportedModelError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message)
|
||||
this.name = 'NotSupportedModelError'
|
||||
}
|
||||
}
|
||||
@ -1,6 +1,5 @@
|
||||
export {}
|
||||
declare global {
|
||||
declare const DEFAULT_MODEL: object
|
||||
declare const NODE: string
|
||||
|
||||
interface Core {
|
||||
|
||||
166
extensions/model-extension/src/cortex.ts
Normal file
166
extensions/model-extension/src/cortex.ts
Normal file
@ -0,0 +1,166 @@
|
||||
import PQueue from 'p-queue'
|
||||
import ky from 'ky'
|
||||
import {
|
||||
DownloadEvent,
|
||||
events,
|
||||
Model,
|
||||
ModelRuntimeParams,
|
||||
ModelSettingParams,
|
||||
} from '@janhq/core'
|
||||
/**
|
||||
* cortex.cpp Model APIs interface
|
||||
*/
|
||||
interface ICortexAPI {
|
||||
getModel(model: string): Promise<Model>
|
||||
getModels(): Promise<Model[]>
|
||||
pullModel(model: string): Promise<void>
|
||||
importModel(path: string, modelPath: string): Promise<void>
|
||||
deleteModel(model: string): Promise<void>
|
||||
updateModel(model: object): Promise<void>
|
||||
cancelModelPull(model: string): Promise<void>
|
||||
}
|
||||
/**
|
||||
* Simple CortexAPI service
|
||||
* It could be replaced by cortex client sdk later on
|
||||
*/
|
||||
const API_URL = 'http://127.0.0.1:39291'
|
||||
const SOCKET_URL = 'ws://127.0.0.1:39291'
|
||||
|
||||
type ModelList = {
|
||||
data: any[]
|
||||
}
|
||||
|
||||
export class CortexAPI implements ICortexAPI {
|
||||
queue = new PQueue({ concurrency: 1 })
|
||||
socket?: WebSocket = undefined
|
||||
|
||||
constructor() {
|
||||
this.queue.add(() => this.healthz())
|
||||
this.subscribeToEvents()
|
||||
}
|
||||
|
||||
getModel(model: string): Promise<any> {
|
||||
return this.queue.add(() =>
|
||||
ky
|
||||
.get(`${API_URL}/v1/models/${model}`)
|
||||
.json()
|
||||
.then((e) => this.transformModel(e))
|
||||
)
|
||||
}
|
||||
|
||||
getModels(): Promise<Model[]> {
|
||||
return this.queue
|
||||
.add(() => ky.get(`${API_URL}/models`).json<ModelList>())
|
||||
.then((e) =>
|
||||
typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : []
|
||||
)
|
||||
}
|
||||
|
||||
pullModel(model: string): Promise<void> {
|
||||
return this.queue.add(() =>
|
||||
ky
|
||||
.post(`${API_URL}/v1/models/pull`, { json: { model } })
|
||||
.json()
|
||||
.catch(async (e) => {
|
||||
throw (await e.response?.json()) ?? e
|
||||
})
|
||||
.then()
|
||||
)
|
||||
}
|
||||
|
||||
importModel(model: string, modelPath: string): Promise<void> {
|
||||
return this.queue.add(() =>
|
||||
ky
|
||||
.post(`${API_URL}/v1/models/import`, { json: { model, modelPath } })
|
||||
.json()
|
||||
.catch((e) => console.debug(e)) // Ignore error
|
||||
.then()
|
||||
)
|
||||
}
|
||||
|
||||
deleteModel(model: string): Promise<void> {
|
||||
return this.queue.add(() =>
|
||||
ky.delete(`${API_URL}/models/${model}`).json().then()
|
||||
)
|
||||
}
|
||||
|
||||
updateModel(model: object): Promise<void> {
|
||||
return this.queue.add(() =>
|
||||
ky
|
||||
.patch(`${API_URL}/v1/models/${model}`, { json: { model } })
|
||||
.json()
|
||||
.then()
|
||||
)
|
||||
}
|
||||
cancelModelPull(model: string): Promise<void> {
|
||||
return this.queue.add(() =>
|
||||
ky
|
||||
.delete(`${API_URL}/models/pull`, { json: { taskId: model } })
|
||||
.json()
|
||||
.then()
|
||||
)
|
||||
}
|
||||
|
||||
healthz(): Promise<void> {
|
||||
return ky
|
||||
.get(`${API_URL}/healthz`, {
|
||||
retry: {
|
||||
limit: 10,
|
||||
methods: ['get'],
|
||||
},
|
||||
})
|
||||
.then(() => {})
|
||||
}
|
||||
|
||||
subscribeToEvents() {
|
||||
this.queue.add(
|
||||
() =>
|
||||
new Promise<void>((resolve) => {
|
||||
this.socket = new WebSocket(`${SOCKET_URL}/events`)
|
||||
console.log('Socket connected')
|
||||
|
||||
this.socket.addEventListener('message', (event) => {
|
||||
const data = JSON.parse(event.data)
|
||||
const transferred = data.task.items.reduce(
|
||||
(accumulator, currentValue) =>
|
||||
accumulator + currentValue.downloadedBytes,
|
||||
0
|
||||
)
|
||||
const total = data.task.items.reduce(
|
||||
(accumulator, currentValue) => accumulator + currentValue.bytes,
|
||||
0
|
||||
)
|
||||
const percent = ((transferred ?? 1) / (total ?? 1)) * 100
|
||||
|
||||
events.emit(data.type, {
|
||||
modelId: data.task.id,
|
||||
percent: percent,
|
||||
size: {
|
||||
transferred: transferred,
|
||||
total: total,
|
||||
},
|
||||
})
|
||||
})
|
||||
resolve()
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
private transformModel(model: any) {
|
||||
model.parameters = setParameters<ModelRuntimeParams>(model)
|
||||
model.settings = setParameters<ModelSettingParams>(model)
|
||||
model.metadata = {
|
||||
tags: [],
|
||||
}
|
||||
return model as Model
|
||||
}
|
||||
}
|
||||
|
||||
type FilteredParams<T> = {
|
||||
[K in keyof T]: T[K]
|
||||
}
|
||||
|
||||
function setParameters<T>(params: T): T {
|
||||
const filteredParams: FilteredParams<T> = { ...params }
|
||||
return filteredParams
|
||||
}
|
||||
@ -1,87 +0,0 @@
|
||||
import { extractFileName } from './path';
|
||||
|
||||
describe('extractFileName Function', () => {
|
||||
it('should correctly extract the file name with the provided file extension', () => {
|
||||
const url = 'http://example.com/some/path/to/file.ext';
|
||||
const fileExtension = '.ext';
|
||||
const fileName = extractFileName(url, fileExtension);
|
||||
expect(fileName).toBe('file.ext');
|
||||
});
|
||||
|
||||
it('should correctly append the file extension if it does not already exist in the file name', () => {
|
||||
const url = 'http://example.com/some/path/to/file';
|
||||
const fileExtension = '.txt';
|
||||
const fileName = extractFileName(url, fileExtension);
|
||||
expect(fileName).toBe('file.txt');
|
||||
});
|
||||
|
||||
it('should handle cases where the URL does not have a file extension correctly', () => {
|
||||
const url = 'http://example.com/some/path/to/file';
|
||||
const fileExtension = '.jpg';
|
||||
const fileName = extractFileName(url, fileExtension);
|
||||
expect(fileName).toBe('file.jpg');
|
||||
});
|
||||
|
||||
it('should correctly handle URLs without a trailing slash', () => {
|
||||
const url = 'http://example.com/some/path/tofile';
|
||||
const fileExtension = '.txt';
|
||||
const fileName = extractFileName(url, fileExtension);
|
||||
expect(fileName).toBe('tofile.txt');
|
||||
});
|
||||
|
||||
it('should correctly handle URLs with multiple file extensions', () => {
|
||||
const url = 'http://example.com/some/path/tofile.tar.gz';
|
||||
const fileExtension = '.gz';
|
||||
const fileName = extractFileName(url, fileExtension);
|
||||
expect(fileName).toBe('tofile.tar.gz');
|
||||
});
|
||||
|
||||
it('should correctly handle URLs with special characters', () => {
|
||||
const url = 'http://example.com/some/path/tófílë.extë';
|
||||
const fileExtension = '.extë';
|
||||
const fileName = extractFileName(url, fileExtension);
|
||||
expect(fileName).toBe('tófílë.extë');
|
||||
});
|
||||
|
||||
it('should correctly handle URLs that are just a file with no path', () => {
|
||||
const url = 'http://example.com/file.txt';
|
||||
const fileExtension = '.txt';
|
||||
const fileName = extractFileName(url, fileExtension);
|
||||
expect(fileName).toBe('file.txt');
|
||||
});
|
||||
|
||||
it('should correctly handle URLs that have special query parameters', () => {
|
||||
const url = 'http://example.com/some/path/tofile.ext?query=1';
|
||||
const fileExtension = '.ext';
|
||||
const fileName = extractFileName(url.split('?')[0], fileExtension);
|
||||
expect(fileName).toBe('tofile.ext');
|
||||
});
|
||||
|
||||
it('should correctly handle URLs that have uppercase characters', () => {
|
||||
const url = 'http://EXAMPLE.COM/PATH/TO/FILE.EXT';
|
||||
const fileExtension = '.ext';
|
||||
const fileName = extractFileName(url, fileExtension);
|
||||
expect(fileName).toBe('FILE.EXT');
|
||||
});
|
||||
|
||||
it('should correctly handle invalid URLs', () => {
|
||||
const url = 'invalid-url';
|
||||
const fileExtension = '.txt';
|
||||
const fileName = extractFileName(url, fileExtension);
|
||||
expect(fileName).toBe('invalid-url.txt');
|
||||
});
|
||||
|
||||
it('should correctly handle empty URLs', () => {
|
||||
const url = '';
|
||||
const fileExtension = '.txt';
|
||||
const fileName = extractFileName(url, fileExtension);
|
||||
expect(fileName).toBe('.txt');
|
||||
});
|
||||
|
||||
it('should correctly handle undefined URLs', () => {
|
||||
const url = undefined;
|
||||
const fileExtension = '.txt';
|
||||
const fileName = extractFileName(url as any, fileExtension);
|
||||
expect(fileName).toBe('.txt');
|
||||
});
|
||||
});
|
||||
@ -1,13 +0,0 @@
|
||||
/**
|
||||
* try to retrieve the download file name from the source url
|
||||
*/
|
||||
|
||||
export function extractFileName(url: string, fileExtension: string): string {
|
||||
if(!url) return fileExtension
|
||||
|
||||
const extractedFileName = url.split('/').pop()
|
||||
const fileName = extractedFileName.toLowerCase().endsWith(fileExtension)
|
||||
? extractedFileName
|
||||
: extractedFileName + fileExtension
|
||||
return fileName
|
||||
}
|
||||
@ -1,846 +1,90 @@
|
||||
/**
|
||||
* @jest-environment jsdom
|
||||
*/
|
||||
const readDirSyncMock = jest.fn()
|
||||
const existMock = jest.fn()
|
||||
const readFileSyncMock = jest.fn()
|
||||
const downloadMock = jest.fn()
|
||||
const mkdirMock = jest.fn()
|
||||
const writeFileSyncMock = jest.fn()
|
||||
const copyFileMock = jest.fn()
|
||||
const dirNameMock = jest.fn()
|
||||
const executeMock = jest.fn()
|
||||
import JanModelExtension from './index'
|
||||
import { Model } from '@janhq/core'
|
||||
|
||||
let SETTINGS = []
|
||||
// @ts-ignore
|
||||
global.SETTINGS = SETTINGS
|
||||
|
||||
jest.mock('@janhq/core', () => ({
|
||||
...jest.requireActual('@janhq/core/node'),
|
||||
events: {
|
||||
emit: jest.fn(),
|
||||
},
|
||||
fs: {
|
||||
existsSync: existMock,
|
||||
readdirSync: readDirSyncMock,
|
||||
readFileSync: readFileSyncMock,
|
||||
writeFileSync: writeFileSyncMock,
|
||||
mkdir: mkdirMock,
|
||||
copyFile: copyFileMock,
|
||||
fileStat: () => ({
|
||||
isDirectory: false,
|
||||
}),
|
||||
},
|
||||
dirName: dirNameMock,
|
||||
joinPath: (paths) => paths.join('/'),
|
||||
ModelExtension: jest.fn(),
|
||||
downloadFile: downloadMock,
|
||||
executeOnMain: executeMock,
|
||||
ModelExtension: jest.fn().mockImplementation(function () {
|
||||
// @ts-ignore
|
||||
this.registerSettings = () => {
|
||||
return Promise.resolve()
|
||||
}
|
||||
// @ts-ignore
|
||||
return this
|
||||
}),
|
||||
}))
|
||||
|
||||
jest.mock('@huggingface/gguf')
|
||||
|
||||
global.fetch = jest.fn(() =>
|
||||
Promise.resolve({
|
||||
json: () => Promise.resolve({ test: 100 }),
|
||||
arrayBuffer: jest.fn(),
|
||||
})
|
||||
) as jest.Mock
|
||||
|
||||
import JanModelExtension from '.'
|
||||
import { fs, dirName } from '@janhq/core'
|
||||
import { gguf } from '@huggingface/gguf'
|
||||
|
||||
describe('JanModelExtension', () => {
|
||||
let sut: JanModelExtension
|
||||
|
||||
beforeAll(() => {
|
||||
// @ts-ignore
|
||||
sut = new JanModelExtension()
|
||||
})
|
||||
let extension: JanModelExtension
|
||||
let mockCortexAPI: any
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks()
|
||||
})
|
||||
|
||||
describe('getConfiguredModels', () => {
|
||||
describe("when there's no models are pre-populated", () => {
|
||||
it('should return empty array', async () => {
|
||||
// Mock configured models data
|
||||
const configuredModels = []
|
||||
existMock.mockReturnValue(true)
|
||||
readDirSyncMock.mockReturnValue([])
|
||||
|
||||
const result = await sut.getConfiguredModels()
|
||||
expect(result).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
describe("when there's are pre-populated models - all flattened", () => {
|
||||
it('returns configured models data - flatten folder - with correct file_path and model id', async () => {
|
||||
// Mock configured models data
|
||||
const configuredModels = [
|
||||
{
|
||||
id: '1',
|
||||
name: 'Model 1',
|
||||
version: '1.0.0',
|
||||
description: 'Model 1 description',
|
||||
object: {
|
||||
type: 'model',
|
||||
uri: 'http://localhost:5000/models/model1',
|
||||
},
|
||||
format: 'onnx',
|
||||
sources: [],
|
||||
created: new Date(),
|
||||
updated: new Date(),
|
||||
parameters: {},
|
||||
settings: {},
|
||||
metadata: {},
|
||||
engine: 'test',
|
||||
} as any,
|
||||
{
|
||||
id: '2',
|
||||
name: 'Model 2',
|
||||
version: '2.0.0',
|
||||
description: 'Model 2 description',
|
||||
object: {
|
||||
type: 'model',
|
||||
uri: 'http://localhost:5000/models/model2',
|
||||
},
|
||||
format: 'onnx',
|
||||
sources: [],
|
||||
parameters: {},
|
||||
settings: {},
|
||||
metadata: {},
|
||||
engine: 'test',
|
||||
} as any,
|
||||
]
|
||||
existMock.mockReturnValue(true)
|
||||
|
||||
readDirSyncMock.mockImplementation((path) => {
|
||||
if (path === 'file://models') return ['model1', 'model2']
|
||||
else return ['model.json']
|
||||
})
|
||||
|
||||
readFileSyncMock.mockImplementation((path) => {
|
||||
if (path.includes('model1'))
|
||||
return JSON.stringify(configuredModels[0])
|
||||
else return JSON.stringify(configuredModels[1])
|
||||
})
|
||||
|
||||
const result = await sut.getConfiguredModels()
|
||||
expect(result).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
file_path: 'file://models/model1/model.json',
|
||||
id: '1',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
file_path: 'file://models/model2/model.json',
|
||||
id: '2',
|
||||
}),
|
||||
])
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe("when there's are pre-populated models - there are nested folders", () => {
|
||||
it('returns configured models data - flatten folder - with correct file_path and model id', async () => {
|
||||
// Mock configured models data
|
||||
const configuredModels = [
|
||||
{
|
||||
id: '1',
|
||||
name: 'Model 1',
|
||||
version: '1.0.0',
|
||||
description: 'Model 1 description',
|
||||
object: {
|
||||
type: 'model',
|
||||
uri: 'http://localhost:5000/models/model1',
|
||||
},
|
||||
format: 'onnx',
|
||||
sources: [],
|
||||
created: new Date(),
|
||||
updated: new Date(),
|
||||
parameters: {},
|
||||
settings: {},
|
||||
metadata: {},
|
||||
engine: 'test',
|
||||
} as any,
|
||||
{
|
||||
id: '2',
|
||||
name: 'Model 2',
|
||||
version: '2.0.0',
|
||||
description: 'Model 2 description',
|
||||
object: {
|
||||
type: 'model',
|
||||
uri: 'http://localhost:5000/models/model2',
|
||||
},
|
||||
format: 'onnx',
|
||||
sources: [],
|
||||
parameters: {},
|
||||
settings: {},
|
||||
metadata: {},
|
||||
engine: 'test',
|
||||
} as any,
|
||||
]
|
||||
existMock.mockReturnValue(true)
|
||||
|
||||
readDirSyncMock.mockImplementation((path) => {
|
||||
if (path === 'file://models') return ['model1', 'model2/model2-1']
|
||||
else return ['model.json']
|
||||
})
|
||||
|
||||
readFileSyncMock.mockImplementation((path) => {
|
||||
if (path.includes('model1'))
|
||||
return JSON.stringify(configuredModels[0])
|
||||
else if (path.includes('model2/model2-1'))
|
||||
return JSON.stringify(configuredModels[1])
|
||||
})
|
||||
|
||||
const result = await sut.getConfiguredModels()
|
||||
expect(result).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
file_path: 'file://models/model1/model.json',
|
||||
id: '1',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
file_path: 'file://models/model2/model2-1/model.json',
|
||||
id: '2',
|
||||
}),
|
||||
])
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('getDownloadedModels', () => {
|
||||
describe('no models downloaded', () => {
|
||||
it('should return empty array', async () => {
|
||||
// Mock downloaded models data
|
||||
existMock.mockReturnValue(true)
|
||||
readDirSyncMock.mockReturnValue([])
|
||||
|
||||
const result = await sut.getDownloadedModels()
|
||||
expect(result).toEqual([])
|
||||
})
|
||||
})
|
||||
describe('only one model is downloaded', () => {
|
||||
describe('flatten folder', () => {
|
||||
it('returns downloaded models - with correct file_path and model id', async () => {
|
||||
// Mock configured models data
|
||||
const configuredModels = [
|
||||
{
|
||||
id: '1',
|
||||
name: 'Model 1',
|
||||
version: '1.0.0',
|
||||
description: 'Model 1 description',
|
||||
object: {
|
||||
type: 'model',
|
||||
uri: 'http://localhost:5000/models/model1',
|
||||
},
|
||||
format: 'onnx',
|
||||
sources: [],
|
||||
created: new Date(),
|
||||
updated: new Date(),
|
||||
parameters: {},
|
||||
settings: {},
|
||||
metadata: {},
|
||||
engine: 'test',
|
||||
} as any,
|
||||
{
|
||||
id: '2',
|
||||
name: 'Model 2',
|
||||
version: '2.0.0',
|
||||
description: 'Model 2 description',
|
||||
object: {
|
||||
type: 'model',
|
||||
uri: 'http://localhost:5000/models/model2',
|
||||
},
|
||||
format: 'onnx',
|
||||
sources: [],
|
||||
parameters: {},
|
||||
settings: {},
|
||||
metadata: {},
|
||||
engine: 'test',
|
||||
} as any,
|
||||
]
|
||||
existMock.mockReturnValue(true)
|
||||
|
||||
readDirSyncMock.mockImplementation((path) => {
|
||||
if (path === 'file://models') return ['model1', 'model2']
|
||||
else if (path === 'file://models/model1')
|
||||
return ['model.json', 'test.gguf']
|
||||
else return ['model.json']
|
||||
})
|
||||
|
||||
readFileSyncMock.mockImplementation((path) => {
|
||||
if (path.includes('model1'))
|
||||
return JSON.stringify(configuredModels[0])
|
||||
else return JSON.stringify(configuredModels[1])
|
||||
})
|
||||
|
||||
const result = await sut.getDownloadedModels()
|
||||
expect(result).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
file_path: 'file://models/model1/model.json',
|
||||
id: '1',
|
||||
}),
|
||||
])
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('all models are downloaded', () => {
|
||||
describe('nested folders', () => {
|
||||
it('returns downloaded models - with correct file_path and model id', async () => {
|
||||
// Mock configured models data
|
||||
const configuredModels = [
|
||||
{
|
||||
id: '1',
|
||||
name: 'Model 1',
|
||||
version: '1.0.0',
|
||||
description: 'Model 1 description',
|
||||
object: {
|
||||
type: 'model',
|
||||
uri: 'http://localhost:5000/models/model1',
|
||||
},
|
||||
format: 'onnx',
|
||||
sources: [],
|
||||
created: new Date(),
|
||||
updated: new Date(),
|
||||
parameters: {},
|
||||
settings: {},
|
||||
metadata: {},
|
||||
engine: 'test',
|
||||
} as any,
|
||||
{
|
||||
id: '2',
|
||||
name: 'Model 2',
|
||||
version: '2.0.0',
|
||||
description: 'Model 2 description',
|
||||
object: {
|
||||
type: 'model',
|
||||
uri: 'http://localhost:5000/models/model2',
|
||||
},
|
||||
format: 'onnx',
|
||||
sources: [],
|
||||
parameters: {},
|
||||
settings: {},
|
||||
metadata: {},
|
||||
engine: 'test',
|
||||
} as any,
|
||||
]
|
||||
existMock.mockReturnValue(true)
|
||||
|
||||
readDirSyncMock.mockImplementation((path) => {
|
||||
if (path === 'file://models') return ['model1', 'model2/model2-1']
|
||||
else return ['model.json', 'test.gguf']
|
||||
})
|
||||
|
||||
readFileSyncMock.mockImplementation((path) => {
|
||||
if (path.includes('model1'))
|
||||
return JSON.stringify(configuredModels[0])
|
||||
else return JSON.stringify(configuredModels[1])
|
||||
})
|
||||
|
||||
const result = await sut.getDownloadedModels()
|
||||
expect(result).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
file_path: 'file://models/model1/model.json',
|
||||
id: '1',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
file_path: 'file://models/model2/model2-1/model.json',
|
||||
id: '2',
|
||||
}),
|
||||
])
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('all models are downloaded with uppercased GGUF files', () => {
|
||||
it('returns downloaded models - with correct file_path and model id', async () => {
|
||||
// Mock configured models data
|
||||
const configuredModels = [
|
||||
{
|
||||
id: '1',
|
||||
name: 'Model 1',
|
||||
version: '1.0.0',
|
||||
description: 'Model 1 description',
|
||||
object: {
|
||||
type: 'model',
|
||||
uri: 'http://localhost:5000/models/model1',
|
||||
},
|
||||
format: 'onnx',
|
||||
sources: [],
|
||||
created: new Date(),
|
||||
updated: new Date(),
|
||||
parameters: {},
|
||||
settings: {},
|
||||
metadata: {},
|
||||
engine: 'test',
|
||||
} as any,
|
||||
{
|
||||
id: '2',
|
||||
name: 'Model 2',
|
||||
version: '2.0.0',
|
||||
description: 'Model 2 description',
|
||||
object: {
|
||||
type: 'model',
|
||||
uri: 'http://localhost:5000/models/model2',
|
||||
},
|
||||
format: 'onnx',
|
||||
sources: [],
|
||||
parameters: {},
|
||||
settings: {},
|
||||
metadata: {},
|
||||
engine: 'test',
|
||||
} as any,
|
||||
]
|
||||
existMock.mockReturnValue(true)
|
||||
|
||||
readDirSyncMock.mockImplementation((path) => {
|
||||
if (path === 'file://models') return ['model1', 'model2/model2-1']
|
||||
else if (path === 'file://models/model1')
|
||||
return ['model.json', 'test.GGUF']
|
||||
else return ['model.json', 'test.gguf']
|
||||
})
|
||||
|
||||
readFileSyncMock.mockImplementation((path) => {
|
||||
if (path.includes('model1'))
|
||||
return JSON.stringify(configuredModels[0])
|
||||
else return JSON.stringify(configuredModels[1])
|
||||
})
|
||||
|
||||
const result = await sut.getDownloadedModels()
|
||||
expect(result).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
file_path: 'file://models/model1/model.json',
|
||||
id: '1',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
file_path: 'file://models/model2/model2-1/model.json',
|
||||
id: '2',
|
||||
}),
|
||||
])
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('all models are downloaded - GGUF & Tensort RT', () => {
|
||||
it('returns downloaded models - with correct file_path and model id', async () => {
|
||||
// Mock configured models data
|
||||
const configuredModels = [
|
||||
{
|
||||
id: '1',
|
||||
name: 'Model 1',
|
||||
version: '1.0.0',
|
||||
description: 'Model 1 description',
|
||||
object: {
|
||||
type: 'model',
|
||||
uri: 'http://localhost:5000/models/model1',
|
||||
},
|
||||
format: 'onnx',
|
||||
sources: [],
|
||||
created: new Date(),
|
||||
updated: new Date(),
|
||||
parameters: {},
|
||||
settings: {},
|
||||
metadata: {},
|
||||
engine: 'test',
|
||||
} as any,
|
||||
{
|
||||
id: '2',
|
||||
name: 'Model 2',
|
||||
version: '2.0.0',
|
||||
description: 'Model 2 description',
|
||||
object: {
|
||||
type: 'model',
|
||||
uri: 'http://localhost:5000/models/model2',
|
||||
},
|
||||
format: 'onnx',
|
||||
sources: [],
|
||||
parameters: {},
|
||||
settings: {},
|
||||
metadata: {},
|
||||
engine: 'test',
|
||||
} as any,
|
||||
]
|
||||
existMock.mockReturnValue(true)
|
||||
|
||||
readDirSyncMock.mockImplementation((path) => {
|
||||
if (path === 'file://models') return ['model1', 'model2/model2-1']
|
||||
else if (path === 'file://models/model1')
|
||||
return ['model.json', 'test.gguf']
|
||||
else return ['model.json', 'test.engine']
|
||||
})
|
||||
|
||||
readFileSyncMock.mockImplementation((path) => {
|
||||
if (path.includes('model1'))
|
||||
return JSON.stringify(configuredModels[0])
|
||||
else return JSON.stringify(configuredModels[1])
|
||||
})
|
||||
|
||||
const result = await sut.getDownloadedModels()
|
||||
expect(result).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
file_path: 'file://models/model1/model.json',
|
||||
id: '1',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
file_path: 'file://models/model2/model2-1/model.json',
|
||||
id: '2',
|
||||
}),
|
||||
])
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('deleteModel', () => {
|
||||
describe('model is a GGUF model', () => {
|
||||
it('should delete the GGUF file', async () => {
|
||||
fs.unlinkSync = jest.fn()
|
||||
const dirMock = dirName as jest.Mock
|
||||
dirMock.mockReturnValue('file://models/model1')
|
||||
|
||||
fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
|
||||
|
||||
readDirSyncMock.mockImplementation((path) => {
|
||||
return ['model.json', 'test.gguf']
|
||||
})
|
||||
|
||||
existMock.mockReturnValue(true)
|
||||
|
||||
await sut.deleteModel({
|
||||
file_path: 'file://models/model1/model.json',
|
||||
} as any)
|
||||
|
||||
expect(fs.unlinkSync).toHaveBeenCalledWith(
|
||||
'file://models/model1/test.gguf'
|
||||
)
|
||||
})
|
||||
|
||||
it('no gguf file presented', async () => {
|
||||
fs.unlinkSync = jest.fn()
|
||||
const dirMock = dirName as jest.Mock
|
||||
dirMock.mockReturnValue('file://models/model1')
|
||||
|
||||
fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
|
||||
|
||||
readDirSyncMock.mockReturnValue(['model.json'])
|
||||
|
||||
existMock.mockReturnValue(true)
|
||||
|
||||
await sut.deleteModel({
|
||||
file_path: 'file://models/model1/model.json',
|
||||
} as any)
|
||||
|
||||
expect(fs.unlinkSync).toHaveBeenCalledTimes(0)
|
||||
})
|
||||
|
||||
it('delete an imported model', async () => {
|
||||
fs.rm = jest.fn()
|
||||
const dirMock = dirName as jest.Mock
|
||||
dirMock.mockReturnValue('file://models/model1')
|
||||
|
||||
readDirSyncMock.mockReturnValue(['model.json', 'test.gguf'])
|
||||
|
||||
// MARK: This is a tricky logic implement?
|
||||
// I will just add test for now but will align on the legacy implementation
|
||||
fs.readFileSync = jest.fn().mockReturnValue(
|
||||
JSON.stringify({
|
||||
metadata: {
|
||||
author: 'user',
|
||||
},
|
||||
})
|
||||
)
|
||||
|
||||
existMock.mockReturnValue(true)
|
||||
|
||||
await sut.deleteModel({
|
||||
file_path: 'file://models/model1/model.json',
|
||||
} as any)
|
||||
|
||||
expect(fs.rm).toHaveBeenCalledWith('file://models/model1')
|
||||
})
|
||||
|
||||
it('delete tensorrt-models', async () => {
|
||||
fs.rm = jest.fn()
|
||||
const dirMock = dirName as jest.Mock
|
||||
dirMock.mockReturnValue('file://models/model1')
|
||||
|
||||
readDirSyncMock.mockReturnValue(['model.json', 'test.engine'])
|
||||
|
||||
fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
|
||||
|
||||
existMock.mockReturnValue(true)
|
||||
|
||||
await sut.deleteModel({
|
||||
file_path: 'file://models/model1/model.json',
|
||||
} as any)
|
||||
|
||||
expect(fs.unlinkSync).toHaveBeenCalledWith(
|
||||
'file://models/model1/test.engine'
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('downloadModel', () => {
|
||||
const model: any = {
|
||||
id: 'model-id',
|
||||
name: 'Test Model',
|
||||
sources: [
|
||||
{ url: 'http://example.com/model.gguf', filename: 'model.gguf' },
|
||||
],
|
||||
engine: 'test-engine',
|
||||
mockCortexAPI = {
|
||||
getModels: jest.fn().mockResolvedValue([]),
|
||||
pullModel: jest.fn().mockResolvedValue(undefined),
|
||||
importModel: jest.fn().mockResolvedValue(undefined),
|
||||
deleteModel: jest.fn().mockResolvedValue(undefined),
|
||||
updateModel: jest.fn().mockResolvedValue({}),
|
||||
cancelModelPull: jest.fn().mockResolvedValue(undefined),
|
||||
}
|
||||
|
||||
const network = {
|
||||
ignoreSSL: true,
|
||||
proxy: 'http://proxy.example.com',
|
||||
}
|
||||
// @ts-ignore
|
||||
extension = new JanModelExtension()
|
||||
extension.cortexAPI = mockCortexAPI
|
||||
})
|
||||
|
||||
const gpuSettings: any = {
|
||||
gpus: [{ name: 'nvidia-rtx-3080', arch: 'ampere' }],
|
||||
}
|
||||
it('should register settings on load', async () => {
|
||||
// @ts-ignore
|
||||
const registerSettingsSpy = jest.spyOn(extension, 'registerSettings')
|
||||
await extension.onLoad()
|
||||
expect(registerSettingsSpy).toHaveBeenCalledWith(SETTINGS)
|
||||
})
|
||||
|
||||
it('should reject with invalid gguf metadata', async () => {
|
||||
existMock.mockImplementation(() => false)
|
||||
it('should pull a model', async () => {
|
||||
const model = 'test-model'
|
||||
await extension.pullModel(model)
|
||||
expect(mockCortexAPI.pullModel).toHaveBeenCalledWith(model)
|
||||
})
|
||||
|
||||
expect(
|
||||
sut.downloadModel(model, gpuSettings, network)
|
||||
).rejects.toBeTruthy()
|
||||
})
|
||||
it('should cancel model download', async () => {
|
||||
const model = 'test-model'
|
||||
await extension.cancelModelPull(model)
|
||||
expect(mockCortexAPI.cancelModelPull).toHaveBeenCalledWith(model)
|
||||
})
|
||||
|
||||
it('should download corresponding ID', async () => {
|
||||
existMock.mockImplementation(() => true)
|
||||
dirNameMock.mockImplementation(() => 'file://models/model1')
|
||||
downloadMock.mockImplementation(() => {
|
||||
return Promise.resolve({})
|
||||
})
|
||||
it('should delete a model', async () => {
|
||||
const model = 'test-model'
|
||||
await extension.deleteModel(model)
|
||||
expect(mockCortexAPI.deleteModel).toHaveBeenCalledWith(model)
|
||||
})
|
||||
|
||||
expect(
|
||||
await sut.downloadModel(
|
||||
{ ...model, file_path: 'file://models/model1/model.json' },
|
||||
gpuSettings,
|
||||
network
|
||||
)
|
||||
).toBeUndefined()
|
||||
it('should get all models', async () => {
|
||||
const models = await extension.getModels()
|
||||
expect(models).toEqual([])
|
||||
expect(mockCortexAPI.getModels).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
expect(downloadMock).toHaveBeenCalledWith(
|
||||
{
|
||||
localPath: 'file://models/model1/model.gguf',
|
||||
modelId: 'model-id',
|
||||
url: 'http://example.com/model.gguf',
|
||||
},
|
||||
{ ignoreSSL: true, proxy: 'http://proxy.example.com' }
|
||||
)
|
||||
})
|
||||
it('should update a model', async () => {
|
||||
const model: Partial<Model> = { id: 'test-model' }
|
||||
const updatedModel = await extension.updateModel(model)
|
||||
expect(updatedModel).toEqual({})
|
||||
expect(mockCortexAPI.updateModel).toHaveBeenCalledWith(model)
|
||||
})
|
||||
|
||||
it('should handle invalid model file', async () => {
|
||||
executeMock.mockResolvedValue({})
|
||||
|
||||
fs.readFileSync = jest.fn(() => {
|
||||
return JSON.stringify({ metadata: { author: 'user' } })
|
||||
})
|
||||
|
||||
expect(
|
||||
sut.downloadModel(
|
||||
{ ...model, file_path: 'file://models/model1/model.json' },
|
||||
gpuSettings,
|
||||
network
|
||||
)
|
||||
).resolves.not.toThrow()
|
||||
|
||||
expect(downloadMock).not.toHaveBeenCalled()
|
||||
})
|
||||
it('should handle model file with no sources', async () => {
|
||||
executeMock.mockResolvedValue({})
|
||||
const modelWithoutSources = { ...model, sources: [] }
|
||||
|
||||
expect(
|
||||
sut.downloadModel(
|
||||
{
|
||||
...modelWithoutSources,
|
||||
file_path: 'file://models/model1/model.json',
|
||||
},
|
||||
gpuSettings,
|
||||
network
|
||||
)
|
||||
).resolves.toBe(undefined)
|
||||
|
||||
expect(downloadMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should handle model file with multiple sources', async () => {
|
||||
const modelWithMultipleSources = {
|
||||
...model,
|
||||
sources: [
|
||||
{ url: 'http://example.com/model1.gguf', filename: 'model1.gguf' },
|
||||
{ url: 'http://example.com/model2.gguf', filename: 'model2.gguf' },
|
||||
],
|
||||
}
|
||||
|
||||
executeMock.mockResolvedValue({
|
||||
metadata: { 'tokenizer.ggml.eos_token_id': 0 },
|
||||
})
|
||||
;(gguf as jest.Mock).mockResolvedValue({
|
||||
metadata: { 'tokenizer.ggml.eos_token_id': 0 },
|
||||
})
|
||||
// @ts-ignore
|
||||
global.NODE = 'node'
|
||||
// @ts-ignore
|
||||
global.DEFAULT_MODEL = {
|
||||
parameters: { stop: [] },
|
||||
}
|
||||
downloadMock.mockImplementation(() => {
|
||||
return Promise.resolve({})
|
||||
})
|
||||
|
||||
expect(
|
||||
await sut.downloadModel(
|
||||
{
|
||||
...modelWithMultipleSources,
|
||||
file_path: 'file://models/model1/model.json',
|
||||
},
|
||||
gpuSettings,
|
||||
network
|
||||
)
|
||||
).toBeUndefined()
|
||||
|
||||
expect(downloadMock).toHaveBeenCalledWith(
|
||||
{
|
||||
localPath: 'file://models/model1/model1.gguf',
|
||||
modelId: 'model-id',
|
||||
url: 'http://example.com/model1.gguf',
|
||||
},
|
||||
{ ignoreSSL: true, proxy: 'http://proxy.example.com' }
|
||||
)
|
||||
|
||||
expect(downloadMock).toHaveBeenCalledWith(
|
||||
{
|
||||
localPath: 'file://models/model1/model2.gguf',
|
||||
modelId: 'model-id',
|
||||
url: 'http://example.com/model2.gguf',
|
||||
},
|
||||
{ ignoreSSL: true, proxy: 'http://proxy.example.com' }
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle model file with no file_path', async () => {
|
||||
executeMock.mockResolvedValue({
|
||||
metadata: { 'tokenizer.ggml.eos_token_id': 0 },
|
||||
})
|
||||
;(gguf as jest.Mock).mockResolvedValue({
|
||||
metadata: { 'tokenizer.ggml.eos_token_id': 0 },
|
||||
})
|
||||
// @ts-ignore
|
||||
global.NODE = 'node'
|
||||
// @ts-ignore
|
||||
global.DEFAULT_MODEL = {
|
||||
parameters: { stop: [] },
|
||||
}
|
||||
const modelWithoutFilepath = { ...model, file_path: undefined }
|
||||
|
||||
await sut.downloadModel(modelWithoutFilepath, gpuSettings, network)
|
||||
|
||||
expect(downloadMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
localPath: 'file://models/model-id/model.gguf',
|
||||
}),
|
||||
expect.anything()
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle model file with invalid file_path', async () => {
|
||||
executeMock.mockResolvedValue({
|
||||
metadata: { 'tokenizer.ggml.eos_token_id': 0 },
|
||||
})
|
||||
;(gguf as jest.Mock).mockResolvedValue({
|
||||
metadata: { 'tokenizer.ggml.eos_token_id': 0 },
|
||||
})
|
||||
// @ts-ignore
|
||||
global.NODE = 'node'
|
||||
// @ts-ignore
|
||||
global.DEFAULT_MODEL = {
|
||||
parameters: { stop: [] },
|
||||
}
|
||||
const modelWithInvalidFilepath = {
|
||||
...model,
|
||||
file_path: 'file://models/invalid-model.json',
|
||||
}
|
||||
|
||||
await sut.downloadModel(modelWithInvalidFilepath, gpuSettings, network)
|
||||
|
||||
expect(downloadMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
localPath: 'file://models/model1/model.gguf',
|
||||
}),
|
||||
expect.anything()
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle model with valid chat_template', async () => {
|
||||
executeMock.mockResolvedValue('{prompt}')
|
||||
;(gguf as jest.Mock).mockResolvedValue({
|
||||
metadata: {},
|
||||
})
|
||||
// @ts-ignore
|
||||
global.NODE = 'node'
|
||||
// @ts-ignore
|
||||
global.DEFAULT_MODEL = {
|
||||
parameters: { stop: [] },
|
||||
settings: {
|
||||
prompt_template: '<|im-start|>{prompt}<|im-end|>',
|
||||
},
|
||||
}
|
||||
|
||||
const result = await sut.retrieveGGUFMetadata({})
|
||||
|
||||
expect(result).toEqual({
|
||||
parameters: {
|
||||
stop: [],
|
||||
},
|
||||
settings: {
|
||||
ctx_len: 4096,
|
||||
ngl: 33,
|
||||
prompt_template: '{prompt}',
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
it('should handle model without chat_template', async () => {
|
||||
executeMock.mockRejectedValue({})
|
||||
;(gguf as jest.Mock).mockResolvedValue({
|
||||
metadata: {},
|
||||
})
|
||||
// @ts-ignore
|
||||
global.NODE = 'node'
|
||||
// @ts-ignore
|
||||
global.DEFAULT_MODEL = {
|
||||
parameters: { stop: [] },
|
||||
settings: {
|
||||
prompt_template: '<|im-start|>{prompt}<|im-end|>',
|
||||
},
|
||||
}
|
||||
|
||||
const result = await sut.retrieveGGUFMetadata({})
|
||||
|
||||
expect(result).toEqual({
|
||||
parameters: {
|
||||
stop: [],
|
||||
},
|
||||
settings: {
|
||||
ctx_len: 4096,
|
||||
ngl: 33,
|
||||
prompt_template: '<|im-start|>{prompt}<|im-end|>',
|
||||
},
|
||||
})
|
||||
})
|
||||
it('should import a model', async () => {
|
||||
const model: any = { path: 'test-path' }
|
||||
const optionType: any = 'test-option'
|
||||
await extension.importModel(model, optionType)
|
||||
expect(mockCortexAPI.importModel).toHaveBeenCalledWith(
|
||||
model.path,
|
||||
optionType
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,54 +0,0 @@
|
||||
import { closeSync, openSync, readSync } from 'fs'
|
||||
import { Template } from '@huggingface/jinja'
|
||||
/**
|
||||
* This is to retrieve the metadata from a GGUF file
|
||||
* It uses hyllama and jinja from @huggingface module
|
||||
*/
|
||||
export const retrieveGGUFMetadata = async (ggufPath: string) => {
|
||||
try {
|
||||
const { ggufMetadata } = await import('hyllama')
|
||||
// Read first 10mb of gguf file
|
||||
const fd = openSync(ggufPath, 'r')
|
||||
const buffer = new Uint8Array(10_000_000)
|
||||
readSync(fd, buffer, 0, 10_000_000, 0)
|
||||
closeSync(fd)
|
||||
|
||||
// Parse metadata and tensor info
|
||||
const { metadata } = ggufMetadata(buffer.buffer)
|
||||
|
||||
return metadata
|
||||
} catch (e) {
|
||||
console.log('[MODEL_EXT]', e)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert metadata to jinja template
|
||||
* @param metadata
|
||||
*/
|
||||
export const renderJinjaTemplate = (metadata: any): string => {
|
||||
const template = new Template(metadata['tokenizer.chat_template'])
|
||||
const eos_id = metadata['tokenizer.ggml.eos_token_id']
|
||||
const bos_id = metadata['tokenizer.ggml.bos_token_id']
|
||||
if (eos_id === undefined || bos_id === undefined) {
|
||||
return ''
|
||||
}
|
||||
const eos_token = metadata['tokenizer.ggml.tokens'][eos_id]
|
||||
const bos_token = metadata['tokenizer.ggml.tokens'][bos_id]
|
||||
// Parse jinja template
|
||||
return template.render({
|
||||
add_generation_prompt: true,
|
||||
eos_token,
|
||||
bos_token,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: '{system_message}',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: '{prompt}',
|
||||
},
|
||||
],
|
||||
})
|
||||
}
|
||||
@ -1,53 +0,0 @@
|
||||
import { renderJinjaTemplate } from './index'
|
||||
import { Template } from '@huggingface/jinja'
|
||||
|
||||
jest.mock('@huggingface/jinja', () => ({
|
||||
Template: jest.fn((template: string) => ({
|
||||
render: jest.fn(() => `${template}_rendered`),
|
||||
})),
|
||||
}))
|
||||
|
||||
describe('renderJinjaTemplate', () => {
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks() // Clear mocks between tests
|
||||
})
|
||||
|
||||
it('should render the template with correct parameters', () => {
|
||||
const metadata = {
|
||||
'tokenizer.chat_template': 'Hello, {{ messages }}!',
|
||||
'tokenizer.ggml.eos_token_id': 0,
|
||||
'tokenizer.ggml.bos_token_id': 1,
|
||||
'tokenizer.ggml.tokens': ['EOS', 'BOS'],
|
||||
}
|
||||
|
||||
const renderedTemplate = renderJinjaTemplate(metadata)
|
||||
|
||||
expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!')
|
||||
|
||||
expect(renderedTemplate).toBe('Hello, {{ messages }}!_rendered')
|
||||
})
|
||||
|
||||
it('should handle missing token IDs gracefully', () => {
|
||||
const metadata = {
|
||||
'tokenizer.chat_template': 'Hello, {{ messages }}!',
|
||||
'tokenizer.ggml.eos_token_id': 0,
|
||||
'tokenizer.ggml.tokens': ['EOS'],
|
||||
}
|
||||
|
||||
const renderedTemplate = renderJinjaTemplate(metadata)
|
||||
|
||||
expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!')
|
||||
|
||||
expect(renderedTemplate).toBe('')
|
||||
})
|
||||
|
||||
it('should handle empty template gracefully', () => {
|
||||
const metadata = {}
|
||||
|
||||
const renderedTemplate = renderJinjaTemplate(metadata)
|
||||
|
||||
expect(Template).toHaveBeenCalledWith(undefined)
|
||||
|
||||
expect(renderedTemplate).toBe("")
|
||||
})
|
||||
})
|
||||
@ -7,9 +7,7 @@ import {
|
||||
DownloadEvent,
|
||||
DownloadRequest,
|
||||
DownloadState,
|
||||
GpuSetting,
|
||||
InstallationState,
|
||||
Model,
|
||||
baseName,
|
||||
downloadFile,
|
||||
events,
|
||||
@ -23,7 +21,7 @@ import {
|
||||
ModelEvent,
|
||||
getJanDataFolderPath,
|
||||
SystemInformation,
|
||||
ModelFile,
|
||||
Model,
|
||||
} from '@janhq/core'
|
||||
|
||||
/**
|
||||
@ -137,7 +135,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
|
||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||
}
|
||||
|
||||
override async loadModel(model: ModelFile): Promise<void> {
|
||||
override async loadModel(model: Model): Promise<void> {
|
||||
if ((await this.installationState()) === 'Installed')
|
||||
return super.loadModel(model)
|
||||
|
||||
@ -177,7 +175,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
|
||||
override async inference(data: MessageRequest) {
|
||||
if (!this.loadedModel) return
|
||||
// TensorRT LLM Extension supports streaming only
|
||||
if (data.model) data.model.parameters.stream = true
|
||||
if (data.model && data.model.parameters) data.model.parameters.stream = true
|
||||
super.inference(data)
|
||||
}
|
||||
|
||||
|
||||
@ -41,7 +41,7 @@ async function loadModel(
|
||||
// e.g. ~/jan/models/llama-2
|
||||
let modelFolder = params.modelFolder
|
||||
|
||||
if (params.model.settings.prompt_template) {
|
||||
if (params.model.settings?.prompt_template) {
|
||||
const promptTemplate = params.model.settings.prompt_template
|
||||
const prompt = promptTemplateConverter(promptTemplate)
|
||||
if (prompt?.error) {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user