Merge pull request #3821 from janhq/feat/path-to-cortexcpp
feat: Jan Integrates Cortex.cpp as Provider
This commit is contained in:
commit
a82c701087
@ -319,6 +319,13 @@ jobs:
|
||||
# TURBO_TEAM: 'linux'
|
||||
# TURBO_TOKEN: '${{ secrets.TURBO_TOKEN }}'
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: always()
|
||||
with:
|
||||
name: playwright-report
|
||||
path: electron/playwright-report/
|
||||
retention-days: 2
|
||||
|
||||
coverage-check:
|
||||
runs-on: [self-hosted, Linux, ubuntu-desktop]
|
||||
needs: base_branch_cov
|
||||
|
||||
@ -1 +1 @@
|
||||
npm run lint --fix
|
||||
npx oxlint@latest --fix
|
||||
@ -1,6 +1,8 @@
|
||||
import { SettingComponentProps } from '../types'
|
||||
import { Model, ModelEvent, SettingComponentProps } from '../types'
|
||||
import { getJanDataFolderPath, joinPath } from './core'
|
||||
import { events } from './events'
|
||||
import { fs } from './fs'
|
||||
import { ModelManager } from './models'
|
||||
|
||||
export enum ExtensionTypeEnum {
|
||||
Assistant = 'assistant',
|
||||
@ -103,6 +105,22 @@ export abstract class BaseExtension implements ExtensionType {
|
||||
return undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers models - it persists in-memory shared ModelManager instance's data map.
|
||||
* @param models
|
||||
*/
|
||||
async registerModels(models: Model[]): Promise<void> {
|
||||
for (const model of models) {
|
||||
ModelManager.instance().register(model)
|
||||
}
|
||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||
}
|
||||
|
||||
/**
|
||||
* Register settings for the extension.
|
||||
* @param settings
|
||||
* @returns
|
||||
*/
|
||||
async registerSettings(settings: SettingComponentProps[]): Promise<void> {
|
||||
if (!this.name) {
|
||||
console.error('Extension name is not defined')
|
||||
@ -139,6 +157,12 @@ export abstract class BaseExtension implements ExtensionType {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the setting value for the key.
|
||||
* @param key
|
||||
* @param defaultValue
|
||||
* @returns
|
||||
*/
|
||||
async getSetting<T>(key: string, defaultValue: T) {
|
||||
const keySetting = (await this.getSettings()).find((setting) => setting.key === key)
|
||||
|
||||
@ -168,6 +192,10 @@ export abstract class BaseExtension implements ExtensionType {
|
||||
return
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the settings for the extension.
|
||||
* @returns
|
||||
*/
|
||||
async getSettings(): Promise<SettingComponentProps[]> {
|
||||
if (!this.name) return []
|
||||
|
||||
@ -189,6 +217,11 @@ export abstract class BaseExtension implements ExtensionType {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the settings for the extension.
|
||||
* @param componentProps
|
||||
* @returns
|
||||
*/
|
||||
async updateSettings(componentProps: Partial<SettingComponentProps>[]): Promise<void> {
|
||||
if (!this.name) return
|
||||
|
||||
|
||||
@ -1,8 +1,6 @@
|
||||
import { AIEngine } from './AIEngine'
|
||||
import { events } from '../../events'
|
||||
import { ModelEvent, Model, ModelFile, InferenceEngine } from '../../../types'
|
||||
import { EngineManager } from './EngineManager'
|
||||
import { fs } from '../../fs'
|
||||
import { ModelEvent, Model } from '../../../types'
|
||||
|
||||
jest.mock('../../events')
|
||||
jest.mock('./EngineManager')
|
||||
@ -26,7 +24,7 @@ describe('AIEngine', () => {
|
||||
})
|
||||
|
||||
it('should load model if provider matches', async () => {
|
||||
const model: ModelFile = { id: 'model1', engine: 'test-provider' } as any
|
||||
const model: any = { id: 'model1', engine: 'test-provider' } as any
|
||||
|
||||
await engine.loadModel(model)
|
||||
|
||||
@ -34,7 +32,7 @@ describe('AIEngine', () => {
|
||||
})
|
||||
|
||||
it('should not load model if provider does not match', async () => {
|
||||
const model: ModelFile = { id: 'model1', engine: 'other-provider' } as any
|
||||
const model: any = { id: 'model1', engine: 'other-provider' } as any
|
||||
|
||||
await engine.loadModel(model)
|
||||
|
||||
|
||||
@ -1,17 +1,14 @@
|
||||
import { getJanDataFolderPath, joinPath } from '../../core'
|
||||
import { events } from '../../events'
|
||||
import { BaseExtension } from '../../extension'
|
||||
import { fs } from '../../fs'
|
||||
import { MessageRequest, Model, ModelEvent, ModelFile } from '../../../types'
|
||||
import { MessageRequest, Model, ModelEvent } from '../../../types'
|
||||
import { EngineManager } from './EngineManager'
|
||||
import { ModelManager } from '../../models/manager'
|
||||
|
||||
/**
|
||||
* Base AIEngine
|
||||
* Applicable to all AI Engines
|
||||
*/
|
||||
export abstract class AIEngine extends BaseExtension {
|
||||
private static modelsFolder = 'models'
|
||||
|
||||
// The inference engine
|
||||
abstract provider: string
|
||||
|
||||
@ -21,7 +18,7 @@ export abstract class AIEngine extends BaseExtension {
|
||||
override onLoad() {
|
||||
this.registerEngine()
|
||||
|
||||
events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
|
||||
events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
|
||||
events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
|
||||
}
|
||||
|
||||
@ -32,53 +29,10 @@ export abstract class AIEngine extends BaseExtension {
|
||||
EngineManager.instance().register(this)
|
||||
}
|
||||
|
||||
async registerModels(models: Model[]): Promise<void> {
|
||||
const modelFolderPath = await joinPath([await getJanDataFolderPath(), AIEngine.modelsFolder])
|
||||
|
||||
let shouldNotifyModelUpdate = false
|
||||
for (const model of models) {
|
||||
const modelPath = await joinPath([modelFolderPath, model.id])
|
||||
const isExist = await fs.existsSync(modelPath)
|
||||
|
||||
if (isExist) {
|
||||
await this.migrateModelIfNeeded(model, modelPath)
|
||||
continue
|
||||
}
|
||||
|
||||
await fs.mkdir(modelPath)
|
||||
await fs.writeFileSync(
|
||||
await joinPath([modelPath, 'model.json']),
|
||||
JSON.stringify(model, null, 2)
|
||||
)
|
||||
shouldNotifyModelUpdate = true
|
||||
}
|
||||
|
||||
if (shouldNotifyModelUpdate) {
|
||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||
}
|
||||
}
|
||||
|
||||
async migrateModelIfNeeded(model: Model, modelPath: string): Promise<void> {
|
||||
try {
|
||||
const modelJson = await fs.readFileSync(await joinPath([modelPath, 'model.json']), 'utf-8')
|
||||
const currentModel: Model = JSON.parse(modelJson)
|
||||
if (currentModel.version !== model.version) {
|
||||
await fs.writeFileSync(
|
||||
await joinPath([modelPath, 'model.json']),
|
||||
JSON.stringify(model, null, 2)
|
||||
)
|
||||
|
||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Error while try to migrating model', error)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the model.
|
||||
*/
|
||||
async loadModel(model: ModelFile): Promise<any> {
|
||||
async loadModel(model: Model): Promise<any> {
|
||||
if (model.engine.toString() !== this.provider) return Promise.resolve()
|
||||
events.emit(ModelEvent.OnModelReady, model)
|
||||
return Promise.resolve()
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import { InferenceEngine } from '../../../types'
|
||||
import { AIEngine } from './AIEngine'
|
||||
|
||||
/**
|
||||
@ -20,6 +21,22 @@ export class EngineManager {
|
||||
* @returns The engine, if found.
|
||||
*/
|
||||
get<T extends AIEngine>(provider: string): T | undefined {
|
||||
// Backward compatible provider
|
||||
// nitro is migrated to cortex
|
||||
if (
|
||||
[
|
||||
InferenceEngine.nitro,
|
||||
InferenceEngine.cortex,
|
||||
InferenceEngine.cortex_llamacpp,
|
||||
InferenceEngine.cortex_onnx,
|
||||
InferenceEngine.cortex_tensorrtllm,
|
||||
InferenceEngine.cortex_onnx,
|
||||
]
|
||||
.map((e) => e.toString())
|
||||
.includes(provider)
|
||||
)
|
||||
provider = InferenceEngine.cortex
|
||||
|
||||
return this.engines.get(provider) as T | undefined
|
||||
}
|
||||
|
||||
@ -27,6 +44,6 @@ export class EngineManager {
|
||||
* The instance of the engine manager.
|
||||
*/
|
||||
static instance(): EngineManager {
|
||||
return window.core?.engineManager as EngineManager ?? new EngineManager()
|
||||
return (window.core?.engineManager as EngineManager) ?? new EngineManager()
|
||||
}
|
||||
}
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
*/
|
||||
import { LocalOAIEngine } from './LocalOAIEngine'
|
||||
import { events } from '../../events'
|
||||
import { ModelEvent, ModelFile, Model } from '../../../types'
|
||||
import { ModelEvent, Model } from '../../../types'
|
||||
import { executeOnMain, systemInformation, dirName } from '../../core'
|
||||
|
||||
jest.mock('../../core', () => ({
|
||||
@ -43,7 +43,7 @@ describe('LocalOAIEngine', () => {
|
||||
})
|
||||
|
||||
it('should load model correctly', async () => {
|
||||
const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
|
||||
const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
|
||||
const modelFolder = 'path/to'
|
||||
const systemInfo = { os: 'testOS' }
|
||||
const res = { error: null }
|
||||
@ -66,7 +66,7 @@ describe('LocalOAIEngine', () => {
|
||||
})
|
||||
|
||||
it('should handle load model error', async () => {
|
||||
const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
|
||||
const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
|
||||
const modelFolder = 'path/to'
|
||||
const systemInfo = { os: 'testOS' }
|
||||
const res = { error: 'load error' }
|
||||
@ -91,9 +91,7 @@ describe('LocalOAIEngine', () => {
|
||||
|
||||
it('should not unload model if engine does not match', async () => {
|
||||
const model: Model = { engine: 'otherProvider' } as any
|
||||
|
||||
await engine.unloadModel(model)
|
||||
|
||||
expect(executeOnMain).not.toHaveBeenCalled()
|
||||
expect(events.emit).not.toHaveBeenCalledWith(ModelEvent.OnModelStopped, {})
|
||||
})
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { executeOnMain, systemInformation, dirName } from '../../core'
|
||||
import { executeOnMain, systemInformation, dirName, joinPath, getJanDataFolderPath } from '../../core'
|
||||
import { events } from '../../events'
|
||||
import { Model, ModelEvent, ModelFile } from '../../../types'
|
||||
import { Model, ModelEvent } from '../../../types'
|
||||
import { OAIEngine } from './OAIEngine'
|
||||
|
||||
/**
|
||||
@ -22,16 +22,16 @@ export abstract class LocalOAIEngine extends OAIEngine {
|
||||
override onLoad() {
|
||||
super.onLoad()
|
||||
// These events are applicable to local inference providers
|
||||
events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
|
||||
events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
|
||||
events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the model.
|
||||
*/
|
||||
override async loadModel(model: ModelFile): Promise<void> {
|
||||
override async loadModel(model: Model & { file_path?: string }): Promise<void> {
|
||||
if (model.engine.toString() !== this.provider) return
|
||||
const modelFolder = await dirName(model.file_path)
|
||||
const modelFolder = 'file_path' in model && model.file_path ? await dirName(model.file_path) : await this.getModelFilePath(model.id)
|
||||
const systemInfo = await systemInformation()
|
||||
const res = await executeOnMain(
|
||||
this.nodeModule,
|
||||
@ -63,4 +63,12 @@ export abstract class LocalOAIEngine extends OAIEngine {
|
||||
events.emit(ModelEvent.OnModelStopped, {})
|
||||
})
|
||||
}
|
||||
|
||||
/// Legacy
|
||||
private getModelFilePath = async (
|
||||
id: string,
|
||||
): Promise<string> => {
|
||||
return joinPath([await getJanDataFolderPath(), 'models', id])
|
||||
}
|
||||
///
|
||||
}
|
||||
|
||||
@ -55,7 +55,21 @@ export abstract class OAIEngine extends AIEngine {
|
||||
* Inference request
|
||||
*/
|
||||
override async inference(data: MessageRequest) {
|
||||
if (data.model?.engine?.toString() !== this.provider) return
|
||||
if (!data.model?.id) {
|
||||
events.emit(MessageEvent.OnMessageResponse, {
|
||||
status: MessageStatus.Error,
|
||||
content: [
|
||||
{
|
||||
type: ContentType.Text,
|
||||
text: {
|
||||
value: 'No model ID provided',
|
||||
annotations: [],
|
||||
},
|
||||
},
|
||||
],
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const timestamp = Date.now()
|
||||
const message: ThreadMessage = {
|
||||
@ -89,7 +103,6 @@ export abstract class OAIEngine extends AIEngine {
|
||||
model: model.id,
|
||||
stream: true,
|
||||
...model.parameters,
|
||||
...(this.provider === 'nitro' ? { engine: 'cortex.llamacpp'} : {}),
|
||||
}
|
||||
if (this.transformPayload) {
|
||||
requestBody = this.transformPayload(requestBody)
|
||||
|
||||
@ -10,7 +10,7 @@ export function requestInference(
|
||||
requestBody: any,
|
||||
model: {
|
||||
id: string
|
||||
parameters: ModelRuntimeParams
|
||||
parameters?: ModelRuntimeParams
|
||||
},
|
||||
controller?: AbortController,
|
||||
headers?: HeadersInit,
|
||||
@ -22,7 +22,7 @@ export function requestInference(
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Access-Control-Allow-Origin': '*',
|
||||
'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
|
||||
'Accept': model.parameters?.stream ? 'text/event-stream' : 'application/json',
|
||||
...headers,
|
||||
},
|
||||
body: JSON.stringify(requestBody),
|
||||
@ -45,7 +45,7 @@ export function requestInference(
|
||||
subscriber.complete()
|
||||
return
|
||||
}
|
||||
if (model.parameters.stream === false) {
|
||||
if (model.parameters?.stream === false) {
|
||||
const data = await response.json()
|
||||
if (transformResponse) {
|
||||
subscriber.next(transformResponse(data))
|
||||
|
||||
@ -1,13 +1,5 @@
|
||||
import { BaseExtension, ExtensionTypeEnum } from '../extension'
|
||||
import {
|
||||
GpuSetting,
|
||||
HuggingFaceRepoData,
|
||||
ImportingModel,
|
||||
Model,
|
||||
ModelFile,
|
||||
ModelInterface,
|
||||
OptionType,
|
||||
} from '../../types'
|
||||
import { Model, ModelInterface, OptionType } from '../../types'
|
||||
|
||||
/**
|
||||
* Model extension for managing models.
|
||||
@ -20,17 +12,10 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
|
||||
return ExtensionTypeEnum.Model
|
||||
}
|
||||
|
||||
abstract downloadModel(
|
||||
model: Model,
|
||||
gpuSettings?: GpuSetting,
|
||||
network?: { proxy: string; ignoreSSL?: boolean }
|
||||
): Promise<void>
|
||||
abstract cancelModelDownload(modelId: string): Promise<void>
|
||||
abstract deleteModel(model: ModelFile): Promise<void>
|
||||
abstract getDownloadedModels(): Promise<ModelFile[]>
|
||||
abstract getConfiguredModels(): Promise<ModelFile[]>
|
||||
abstract importModels(models: ImportingModel[], optionType: OptionType): Promise<void>
|
||||
abstract updateModelInfo(modelInfo: Partial<ModelFile>): Promise<ModelFile>
|
||||
abstract fetchHuggingFaceRepoData(repoId: string): Promise<HuggingFaceRepoData>
|
||||
abstract getDefaultModel(): Promise<Model>
|
||||
abstract getModels(): Promise<Model[]>
|
||||
abstract pullModel(model: string, id?: string, name?: string): Promise<void>
|
||||
abstract cancelModelPull(modelId: string): Promise<void>
|
||||
abstract importModel(model: string, modePath: string, name?: string, optionType?: OptionType): Promise<void>
|
||||
abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
|
||||
abstract deleteModel(model: string): Promise<void>
|
||||
}
|
||||
|
||||
@ -1,32 +1,37 @@
|
||||
import * as Core from './core';
|
||||
import * as Events from './events';
|
||||
import * as FileSystem from './fs';
|
||||
import * as Extension from './extension';
|
||||
import * as Extensions from './extensions';
|
||||
import * as Tools from './tools';
|
||||
import * as Core from './core'
|
||||
import * as Events from './events'
|
||||
import * as FileSystem from './fs'
|
||||
import * as Extension from './extension'
|
||||
import * as Extensions from './extensions'
|
||||
import * as Tools from './tools'
|
||||
import * as Models from './models'
|
||||
|
||||
describe('Module Tests', () => {
|
||||
it('should export Core module', () => {
|
||||
expect(Core).toBeDefined();
|
||||
});
|
||||
expect(Core).toBeDefined()
|
||||
})
|
||||
|
||||
it('should export Event module', () => {
|
||||
expect(Events).toBeDefined();
|
||||
});
|
||||
expect(Events).toBeDefined()
|
||||
})
|
||||
|
||||
it('should export Filesystem module', () => {
|
||||
expect(FileSystem).toBeDefined();
|
||||
});
|
||||
expect(FileSystem).toBeDefined()
|
||||
})
|
||||
|
||||
it('should export Extension module', () => {
|
||||
expect(Extension).toBeDefined();
|
||||
});
|
||||
expect(Extension).toBeDefined()
|
||||
})
|
||||
|
||||
it('should export all base extensions', () => {
|
||||
expect(Extensions).toBeDefined();
|
||||
});
|
||||
expect(Extensions).toBeDefined()
|
||||
})
|
||||
|
||||
it('should export all base tools', () => {
|
||||
expect(Tools).toBeDefined();
|
||||
});
|
||||
});
|
||||
expect(Tools).toBeDefined()
|
||||
})
|
||||
|
||||
it('should export all base tools', () => {
|
||||
expect(Models).toBeDefined()
|
||||
})
|
||||
})
|
||||
|
||||
@ -33,3 +33,9 @@ export * from './extensions'
|
||||
* @module
|
||||
*/
|
||||
export * from './tools'
|
||||
|
||||
/**
|
||||
* Export all base models.
|
||||
* @module
|
||||
*/
|
||||
export * from './models'
|
||||
|
||||
10
core/src/browser/models/index.ts
Normal file
10
core/src/browser/models/index.ts
Normal file
@ -0,0 +1,10 @@
|
||||
/**
|
||||
* Export ModelManager
|
||||
* @module
|
||||
*/
|
||||
export { ModelManager } from './manager'
|
||||
|
||||
/**
|
||||
* Export all utils
|
||||
*/
|
||||
export * from './utils'
|
||||
47
core/src/browser/models/manager.ts
Normal file
47
core/src/browser/models/manager.ts
Normal file
@ -0,0 +1,47 @@
|
||||
import { Model, ModelEvent } from '../../types'
|
||||
import { events } from '../events'
|
||||
|
||||
/**
|
||||
* Manages the registered models across extensions.
|
||||
*/
|
||||
export class ModelManager {
|
||||
public models = new Map<string, Model>()
|
||||
|
||||
constructor() {
|
||||
if (window) {
|
||||
window.core.modelManager = this
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a model.
|
||||
* @param model - The model to register.
|
||||
*/
|
||||
register<T extends Model>(model: T) {
|
||||
if (this.models.has(model.id)) {
|
||||
this.models.set(model.id, {
|
||||
...model,
|
||||
...this.models.get(model.id),
|
||||
})
|
||||
} else {
|
||||
this.models.set(model.id, model)
|
||||
}
|
||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a model by it's id.
|
||||
* @param id - The id of the model to retrieve.
|
||||
* @returns The model, if found.
|
||||
*/
|
||||
get<T extends Model>(id: string): T | undefined {
|
||||
return this.models.get(id) as T | undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* The instance of the tool manager.
|
||||
*/
|
||||
static instance(): ModelManager {
|
||||
return (window.core?.modelManager as ModelManager) ?? new ModelManager()
|
||||
}
|
||||
}
|
||||
@ -1,7 +1,10 @@
|
||||
// web/utils/modelParam.test.ts
|
||||
import { normalizeValue, validationRules } from './modelParam'
|
||||
import { extractModelLoadParams } from './modelParam';
|
||||
import { extractInferenceParams } from './modelParam';
|
||||
import {
|
||||
normalizeValue,
|
||||
validationRules,
|
||||
extractModelLoadParams,
|
||||
extractInferenceParams,
|
||||
} from './utils'
|
||||
|
||||
describe('validationRules', () => {
|
||||
it('should validate temperature correctly', () => {
|
||||
@ -151,7 +154,6 @@ describe('validationRules', () => {
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
it('should normalize invalid values for keys not listed in validationRules', () => {
|
||||
expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid')
|
||||
expect(normalizeValue('invalid_key', 123)).toBe(123)
|
||||
@ -192,19 +194,16 @@ describe('normalizeValue', () => {
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
it('should handle invalid values correctly by falling back to originParams', () => {
|
||||
const modelParams = { temperature: 'invalid', token_limit: -1 };
|
||||
const originParams = { temperature: 0.5, token_limit: 100 };
|
||||
expect(extractInferenceParams(modelParams, originParams)).toEqual(originParams);
|
||||
});
|
||||
|
||||
const modelParams = { temperature: 'invalid', token_limit: -1 }
|
||||
const originParams = { temperature: 0.5, token_limit: 100 }
|
||||
expect(extractInferenceParams(modelParams as any, originParams)).toEqual(originParams)
|
||||
})
|
||||
|
||||
it('should return an empty object when no modelParams are provided', () => {
|
||||
expect(extractModelLoadParams()).toEqual({});
|
||||
});
|
||||
|
||||
expect(extractModelLoadParams()).toEqual({})
|
||||
})
|
||||
|
||||
it('should return an empty object when no modelParams are provided', () => {
|
||||
expect(extractInferenceParams()).toEqual({});
|
||||
});
|
||||
expect(extractInferenceParams()).toEqual({})
|
||||
})
|
||||
@ -1,26 +1,20 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
/* eslint-disable @typescript-eslint/naming-convention */
|
||||
import { ModelRuntimeParams, ModelSettingParams } from '@janhq/core'
|
||||
|
||||
import { ModelParams } from '@/types/model'
|
||||
import { ModelParams, ModelRuntimeParams, ModelSettingParams } from '../../types'
|
||||
|
||||
/**
|
||||
* Validation rules for model parameters
|
||||
*/
|
||||
export const validationRules: { [key: string]: (value: any) => boolean } = {
|
||||
temperature: (value: any) =>
|
||||
typeof value === 'number' && value >= 0 && value <= 2,
|
||||
temperature: (value: any) => typeof value === 'number' && value >= 0 && value <= 2,
|
||||
token_limit: (value: any) => Number.isInteger(value) && value >= 0,
|
||||
top_k: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
|
||||
top_p: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
|
||||
stream: (value: any) => typeof value === 'boolean',
|
||||
max_tokens: (value: any) => Number.isInteger(value) && value >= 0,
|
||||
stop: (value: any) =>
|
||||
Array.isArray(value) && value.every((v) => typeof v === 'string'),
|
||||
frequency_penalty: (value: any) =>
|
||||
typeof value === 'number' && value >= 0 && value <= 1,
|
||||
presence_penalty: (value: any) =>
|
||||
typeof value === 'number' && value >= 0 && value <= 1,
|
||||
stop: (value: any) => Array.isArray(value) && value.every((v) => typeof v === 'string'),
|
||||
frequency_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
|
||||
presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
|
||||
|
||||
ctx_len: (value: any) => Number.isInteger(value) && value >= 0,
|
||||
ngl: (value: any) => Number.isInteger(value) && value >= 0,
|
||||
@ -76,6 +70,7 @@ export const extractInferenceParams = (
|
||||
stop: undefined,
|
||||
frequency_penalty: undefined,
|
||||
presence_penalty: undefined,
|
||||
engine: undefined,
|
||||
}
|
||||
|
||||
const runtimeParams: ModelRuntimeParams = {}
|
||||
@ -119,11 +114,18 @@ export const extractModelLoadParams = (
|
||||
embedding: undefined,
|
||||
n_parallel: undefined,
|
||||
cpu_threads: undefined,
|
||||
pre_prompt: undefined,
|
||||
system_prompt: undefined,
|
||||
ai_prompt: undefined,
|
||||
user_prompt: undefined,
|
||||
prompt_template: undefined,
|
||||
model_path: undefined,
|
||||
llama_model_path: undefined,
|
||||
mmproj: undefined,
|
||||
cont_batching: undefined,
|
||||
vision_model: undefined,
|
||||
text_model: undefined,
|
||||
engine: undefined,
|
||||
}
|
||||
const settingParams: ModelSettingParams = {}
|
||||
|
||||
@ -8,7 +8,8 @@ jest.mock('../../helper', () => ({
|
||||
|
||||
jest.mock('../../helper/path', () => ({
|
||||
validatePath: jest.fn().mockReturnValue('path/to/folder'),
|
||||
normalizeFilePath: () => process.platform === 'win32' ? 'C:\\Users\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
|
||||
normalizeFilePath: () =>
|
||||
process.platform === 'win32' ? 'C:\\Users\\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
|
||||
}))
|
||||
|
||||
jest.mock(
|
||||
|
||||
@ -50,11 +50,6 @@ export class Downloader implements Processor {
|
||||
const initialDownloadState: DownloadState = {
|
||||
modelId,
|
||||
fileName,
|
||||
time: {
|
||||
elapsed: 0,
|
||||
remaining: 0,
|
||||
},
|
||||
speed: 0,
|
||||
percent: 0,
|
||||
size: {
|
||||
total: 0,
|
||||
|
||||
@ -343,7 +343,7 @@ export const chatCompletions = async (request: any, reply: any) => {
|
||||
|
||||
// add engine for new cortex cpp engine
|
||||
if (requestedModel.engine === 'nitro') {
|
||||
request.body.engine = 'cortex.llamacpp'
|
||||
request.body.engine = 'llama-cpp'
|
||||
}
|
||||
|
||||
const fetch = require('node-fetch')
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
import { CORTEX_DEFAULT_PORT } from './consts'
|
||||
|
||||
import { NITRO_DEFAULT_PORT } from './consts';
|
||||
|
||||
it('should test NITRO_DEFAULT_PORT', () => {
|
||||
expect(NITRO_DEFAULT_PORT).toBe(3928);
|
||||
});
|
||||
it('should test CORTEX_DEFAULT_PORT', () => {
|
||||
expect(CORTEX_DEFAULT_PORT).toBe(39291)
|
||||
})
|
||||
|
||||
@ -1,19 +1,9 @@
|
||||
// The PORT to use for the Nitro subprocess
|
||||
export const NITRO_DEFAULT_PORT = 3928
|
||||
export const CORTEX_DEFAULT_PORT = 39291
|
||||
|
||||
// The HOST address to use for the Nitro subprocess
|
||||
export const LOCAL_HOST = '127.0.0.1'
|
||||
|
||||
export const SUPPORTED_MODEL_FORMAT = '.gguf'
|
||||
|
||||
// The URL for the Nitro subprocess
|
||||
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
|
||||
// The URL for the Nitro subprocess to load a model
|
||||
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
|
||||
// The URL for the Nitro subprocess to validate a model
|
||||
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
|
||||
|
||||
// The URL for the Nitro subprocess to kill itself
|
||||
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
||||
|
||||
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
|
||||
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url
|
||||
|
||||
@ -1,5 +1,3 @@
|
||||
|
||||
|
||||
import { startModel } from './startStopModel'
|
||||
|
||||
describe('startModel', () => {
|
||||
@ -7,10 +5,6 @@
|
||||
const modelId = 'testModelId'
|
||||
const settingParams = undefined
|
||||
|
||||
const result = await startModel(modelId, settingParams)
|
||||
|
||||
expect(result).toEqual({
|
||||
error: expect.any(Error),
|
||||
})
|
||||
expect(startModel(modelId, settingParams)).resolves.toThrow()
|
||||
})
|
||||
})
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
import { join } from 'path'
|
||||
import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper'
|
||||
import { ModelSettingParams } from '../../../../types'
|
||||
import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'
|
||||
|
||||
/**
|
||||
* Start a model
|
||||
@ -9,70 +8,18 @@ import { ModelSettingParams } from '../../../../types'
|
||||
* @returns
|
||||
*/
|
||||
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
|
||||
try {
|
||||
await runModel(modelId, settingParams)
|
||||
|
||||
return {
|
||||
message: `Model ${modelId} started`,
|
||||
}
|
||||
} catch (e) {
|
||||
return {
|
||||
error: e,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a model using installed cortex extension
|
||||
* @param model
|
||||
* @param settingParams
|
||||
*/
|
||||
const runModel = async (model: string, settingParams?: ModelSettingParams): Promise<void> => {
|
||||
const janDataFolderPath = getJanDataFolderPath()
|
||||
const modelFolder = join(janDataFolderPath, 'models', model)
|
||||
let module = join(
|
||||
getJanExtensionsPath(),
|
||||
'@janhq',
|
||||
'inference-cortex-extension',
|
||||
'dist',
|
||||
'node',
|
||||
'index.cjs'
|
||||
)
|
||||
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
|
||||
return import(module).then((extension) =>
|
||||
extension
|
||||
.loadModel(
|
||||
{
|
||||
modelFolder,
|
||||
model,
|
||||
},
|
||||
settingParams
|
||||
)
|
||||
.then(() => log(`[SERVER]::Debug: Model is loaded`))
|
||||
.then({
|
||||
message: 'Model started',
|
||||
return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ model: modelId, ...settingParams }),
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
/*
|
||||
* Stop model and kill nitro process.
|
||||
* Stop model.
|
||||
*/
|
||||
export const stopModel = async (_modelId: string) => {
|
||||
let module = join(
|
||||
getJanExtensionsPath(),
|
||||
'@janhq',
|
||||
'inference-cortex-extension',
|
||||
'dist',
|
||||
'node',
|
||||
'index.cjs'
|
||||
)
|
||||
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
|
||||
return import(module).then((extension) =>
|
||||
extension
|
||||
.unloadModel()
|
||||
.then(() => log(`[SERVER]::Debug: Model is unloaded`))
|
||||
.then({
|
||||
message: 'Model stopped',
|
||||
export const stopModel = async (modelId: string) => {
|
||||
return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ model: modelId }),
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
@ -72,6 +72,8 @@ export enum DownloadEvent {
|
||||
onFileDownloadUpdate = 'onFileDownloadUpdate',
|
||||
onFileDownloadError = 'onFileDownloadError',
|
||||
onFileDownloadSuccess = 'onFileDownloadSuccess',
|
||||
onFileDownloadStopped = 'onFileDownloadStopped',
|
||||
onFileDownloadStarted = 'onFileDownloadStarted',
|
||||
onFileUnzipSuccess = 'onFileUnzipSuccess',
|
||||
}
|
||||
|
||||
|
||||
@ -6,8 +6,8 @@ export type FileStat = {
|
||||
export type DownloadState = {
|
||||
modelId: string // TODO: change to download id
|
||||
fileName: string
|
||||
time: DownloadTime
|
||||
speed: number
|
||||
time?: DownloadTime
|
||||
speed?: number
|
||||
|
||||
percent: number
|
||||
size: DownloadSize
|
||||
|
||||
@ -6,8 +6,8 @@ import { FileMetadata } from '../file'
|
||||
*/
|
||||
export type ModelInfo = {
|
||||
id: string
|
||||
settings: ModelSettingParams
|
||||
parameters: ModelRuntimeParams
|
||||
settings?: ModelSettingParams
|
||||
parameters?: ModelRuntimeParams
|
||||
engine?: InferenceEngine
|
||||
}
|
||||
|
||||
@ -15,7 +15,6 @@ export type ModelInfo = {
|
||||
* Represents the inference engine.
|
||||
* @stored
|
||||
*/
|
||||
|
||||
export enum InferenceEngine {
|
||||
anthropic = 'anthropic',
|
||||
mistral = 'mistral',
|
||||
@ -28,11 +27,13 @@ export enum InferenceEngine {
|
||||
nitro_tensorrt_llm = 'nitro-tensorrt-llm',
|
||||
cohere = 'cohere',
|
||||
nvidia = 'nvidia',
|
||||
cortex_llamacpp = 'cortex.llamacpp',
|
||||
cortex_onnx = 'cortex.onnx',
|
||||
cortex_tensorrtllm = 'cortex.tensorrt-llm',
|
||||
cortex = 'cortex',
|
||||
cortex_llamacpp = 'llama-cpp',
|
||||
cortex_onnx = 'onnxruntime',
|
||||
cortex_tensorrtllm = 'tensorrt-llm',
|
||||
}
|
||||
|
||||
// Represents an artifact of a model, including its filename and URL
|
||||
export type ModelArtifact = {
|
||||
filename: string
|
||||
url: string
|
||||
@ -104,6 +105,7 @@ export type Model = {
|
||||
engine: InferenceEngine
|
||||
}
|
||||
|
||||
// Represents metadata associated with a model
|
||||
export type ModelMetadata = {
|
||||
author: string
|
||||
tags: string[]
|
||||
@ -124,14 +126,20 @@ export type ModelSettingParams = {
|
||||
n_parallel?: number
|
||||
cpu_threads?: number
|
||||
prompt_template?: string
|
||||
pre_prompt?: string
|
||||
system_prompt?: string
|
||||
ai_prompt?: string
|
||||
user_prompt?: string
|
||||
// path param
|
||||
model_path?: string
|
||||
// legacy path param
|
||||
llama_model_path?: string
|
||||
// clip model path
|
||||
mmproj?: string
|
||||
cont_batching?: boolean
|
||||
vision_model?: boolean
|
||||
text_model?: boolean
|
||||
engine?: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
@ -150,11 +158,12 @@ export type ModelRuntimeParams = {
|
||||
engine?: string
|
||||
}
|
||||
|
||||
// Represents a model that failed to initialize, including the error
|
||||
export type ModelInitFailed = Model & {
|
||||
error: Error
|
||||
}
|
||||
|
||||
/**
|
||||
* ModelFile is the model.json entity and it's file metadata
|
||||
* ModelParams types
|
||||
*/
|
||||
export type ModelFile = Model & FileMetadata
|
||||
export type ModelParams = ModelRuntimeParams | ModelSettingParams
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
export type OptionType = 'SYMLINK' | 'MOVE_BINARY_FILE'
|
||||
export type OptionType = 'symlink' | 'copy'
|
||||
|
||||
export type ModelImportOption = {
|
||||
type: OptionType
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { GpuSetting } from '../miscellaneous'
|
||||
import { Model, ModelFile } from './modelEntity'
|
||||
import { Model } from './modelEntity'
|
||||
import { OptionType } from './modelImport'
|
||||
|
||||
/**
|
||||
* Model extension for managing models.
|
||||
@ -8,38 +8,46 @@ export interface ModelInterface {
|
||||
/**
|
||||
* Downloads a model.
|
||||
* @param model - The model to download.
|
||||
* @param network - Optional object to specify proxy/whether to ignore SSL certificates.
|
||||
* @returns A Promise that resolves when the model has been downloaded.
|
||||
*/
|
||||
downloadModel(
|
||||
model: ModelFile,
|
||||
gpuSettings?: GpuSetting,
|
||||
network?: { ignoreSSL?: boolean; proxy?: string }
|
||||
): Promise<void>
|
||||
pullModel(model: string, id?: string, name?: string): Promise<void>
|
||||
|
||||
/**
|
||||
* Cancels the download of a specific model.
|
||||
* @param {string} modelId - The ID of the model to cancel the download for.
|
||||
* @returns {Promise<void>} A promise that resolves when the download has been cancelled.
|
||||
*/
|
||||
cancelModelDownload(modelId: string): Promise<void>
|
||||
cancelModelPull(model: string): Promise<void>
|
||||
|
||||
/**
|
||||
* Deletes a model.
|
||||
* @param modelId - The ID of the model to delete.
|
||||
* @returns A Promise that resolves when the model has been deleted.
|
||||
*/
|
||||
deleteModel(model: ModelFile): Promise<void>
|
||||
deleteModel(model: string): Promise<void>
|
||||
|
||||
/**
|
||||
* Gets a list of downloaded models.
|
||||
* Gets downloaded models.
|
||||
* @returns A Promise that resolves with an array of downloaded models.
|
||||
*/
|
||||
getDownloadedModels(): Promise<ModelFile[]>
|
||||
getModels(): Promise<Model[]>
|
||||
|
||||
/**
|
||||
* Gets a list of configured models.
|
||||
* @returns A Promise that resolves with an array of configured models.
|
||||
* Update a pulled model's metadata
|
||||
* @param model - The model to update.
|
||||
* @returns A Promise that resolves when the model has been updated.
|
||||
*/
|
||||
getConfiguredModels(): Promise<ModelFile[]>
|
||||
updateModel(model: Partial<Model>): Promise<Model>
|
||||
|
||||
/**
|
||||
* Import an existing model file.
|
||||
* @param model id of the model to import
|
||||
* @param modelPath - path of the model file
|
||||
*/
|
||||
importModel(
|
||||
model: string,
|
||||
modePath: string,
|
||||
name?: string,
|
||||
optionType?: OptionType
|
||||
): Promise<void>
|
||||
}
|
||||
|
||||
@ -1,16 +1,13 @@
|
||||
import * as monitoringInterface from './monitoringInterface'
|
||||
import * as resourceInfo from './resourceInfo'
|
||||
|
||||
import * as monitoringInterface from './monitoringInterface';
|
||||
import * as resourceInfo from './resourceInfo';
|
||||
|
||||
import * as index from './index';
|
||||
import * as monitoringInterface from './monitoringInterface';
|
||||
import * as resourceInfo from './resourceInfo';
|
||||
import * as index from './index'
|
||||
|
||||
it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
|
||||
for (const key in monitoringInterface) {
|
||||
expect(index[key]).toBe(monitoringInterface[key]);
|
||||
expect(index[key]).toBe(monitoringInterface[key])
|
||||
}
|
||||
for (const key in resourceInfo) {
|
||||
expect(index[key]).toBe(resourceInfo[key]);
|
||||
expect(index[key]).toBe(resourceInfo[key])
|
||||
}
|
||||
});
|
||||
})
|
||||
|
||||
@ -18,7 +18,8 @@
|
||||
"docs/**/*",
|
||||
"scripts/**/*",
|
||||
"icons/**/*",
|
||||
"themes"
|
||||
"themes",
|
||||
"shared"
|
||||
],
|
||||
"asarUnpack": [
|
||||
"pre-install",
|
||||
@ -26,7 +27,8 @@
|
||||
"docs",
|
||||
"scripts",
|
||||
"icons",
|
||||
"themes"
|
||||
"themes",
|
||||
"shared"
|
||||
],
|
||||
"publish": [
|
||||
{
|
||||
|
||||
0
electron/shared/.gitkeep
Normal file
0
electron/shared/.gitkeep
Normal file
@ -15,6 +15,8 @@ import {
|
||||
import { Constants } from './constants'
|
||||
import { HubPage } from '../pages/hubPage'
|
||||
import { CommonActions } from '../pages/commonActions'
|
||||
import { rmSync } from 'fs'
|
||||
import * as path from 'path'
|
||||
|
||||
export let electronApp: ElectronApplication
|
||||
export let page: Page
|
||||
@ -103,10 +105,14 @@ export const test = base.extend<
|
||||
},
|
||||
{ auto: true },
|
||||
],
|
||||
|
||||
})
|
||||
|
||||
test.beforeAll(async () => {
|
||||
await rmSync(path.join(__dirname, '../../test-data'), {
|
||||
recursive: true,
|
||||
force: true,
|
||||
})
|
||||
|
||||
test.setTimeout(TIMEOUT)
|
||||
await setupElectron()
|
||||
await page.waitForSelector('img[alt="Jan - Logo"]', {
|
||||
|
||||
@ -16,6 +16,7 @@ test.beforeAll(async () => {
|
||||
test('explores hub', async ({ hubPage }) => {
|
||||
await hubPage.navigateByMenu()
|
||||
await hubPage.verifyContainerVisible()
|
||||
await hubPage.scrollToBottom()
|
||||
const useModelBtn = page.getByTestId(/^use-model-btn-.*/).first()
|
||||
|
||||
await expect(useModelBtn).toBeVisible({
|
||||
|
||||
@ -8,9 +8,8 @@ export class BasePage {
|
||||
constructor(
|
||||
protected readonly page: Page,
|
||||
readonly action: CommonActions,
|
||||
protected containerId: string,
|
||||
) {
|
||||
}
|
||||
protected containerId: string
|
||||
) {}
|
||||
|
||||
public getValue(key: string) {
|
||||
return this.action.getValue(key)
|
||||
@ -37,6 +36,12 @@ export class BasePage {
|
||||
expect(container.isVisible()).toBeTruthy()
|
||||
}
|
||||
|
||||
async scrollToBottom() {
|
||||
await this.page.evaluate(() => {
|
||||
window.scrollTo(0, document.body.scrollHeight)
|
||||
})
|
||||
}
|
||||
|
||||
async waitUpdateLoader() {
|
||||
await this.isElementVisible('img[alt="Jan - Logo"]')
|
||||
}
|
||||
|
||||
@ -63,12 +63,15 @@ export default class JanAssistantExtension extends AssistantExtension {
|
||||
}
|
||||
|
||||
async getAssistants(): Promise<Assistant[]> {
|
||||
try {
|
||||
// get all the assistant directories
|
||||
// get all the assistant metadata json
|
||||
const results: Assistant[] = []
|
||||
|
||||
const allFileName: string[] = await fs.readdirSync(
|
||||
JanAssistantExtension._homeDir
|
||||
)
|
||||
|
||||
for (const fileName of allFileName) {
|
||||
const filePath = await joinPath([
|
||||
JanAssistantExtension._homeDir,
|
||||
@ -96,6 +99,10 @@ export default class JanAssistantExtension extends AssistantExtension {
|
||||
}
|
||||
|
||||
return results
|
||||
} catch (err) {
|
||||
console.debug(err)
|
||||
return [this.defaultAssistant]
|
||||
}
|
||||
}
|
||||
|
||||
async deleteAssistant(assistant: Assistant): Promise<void> {
|
||||
@ -112,7 +119,10 @@ export default class JanAssistantExtension extends AssistantExtension {
|
||||
}
|
||||
|
||||
private async createJanAssistant(): Promise<void> {
|
||||
const janAssistant: Assistant = {
|
||||
await this.createAssistant(this.defaultAssistant)
|
||||
}
|
||||
|
||||
private defaultAssistant: Assistant = {
|
||||
avatar: '',
|
||||
thread_location: undefined,
|
||||
id: 'jan',
|
||||
@ -144,7 +154,4 @@ Helpful Answer:`,
|
||||
file_ids: [],
|
||||
metadata: undefined,
|
||||
}
|
||||
|
||||
await this.createAssistant(janAssistant)
|
||||
}
|
||||
}
|
||||
|
||||
@ -10,8 +10,6 @@ import { HNSWLib } from 'langchain/vectorstores/hnswlib'
|
||||
import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
|
||||
import { readEmbeddingEngine } from './engine'
|
||||
|
||||
import path from 'path'
|
||||
|
||||
export class Retrieval {
|
||||
public chunkSize: number = 100
|
||||
public chunkOverlap?: number = 0
|
||||
|
||||
1
extensions/inference-cortex-extension/bin/version.txt
Normal file
1
extensions/inference-cortex-extension/bin/version.txt
Normal file
@ -0,0 +1 @@
|
||||
1.0.2-rc4
|
||||
41
extensions/inference-cortex-extension/download.bat
Normal file
41
extensions/inference-cortex-extension/download.bat
Normal file
@ -0,0 +1,41 @@
|
||||
@echo off
|
||||
set BIN_PATH=./bin
|
||||
set SHARED_PATH=./../../electron/shared
|
||||
set /p CORTEX_VERSION=<./bin/version.txt
|
||||
|
||||
@REM Download cortex.llamacpp binaries
|
||||
set VERSION=v0.1.35
|
||||
set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
|
||||
set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
|
||||
set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan
|
||||
|
||||
call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
|
||||
call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
|
||||
call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
|
||||
|
||||
move %BIN_PATH%\cortex-server-beta.exe %BIN_PATH%\cortex-server.exe
|
||||
del %BIN_PATH%\cortex-beta.exe
|
||||
del %BIN_PATH%\cortex.exe
|
||||
|
||||
@REM Loop through each folder and move DLLs (excluding engine.dll)
|
||||
for %%F in (%SUBFOLDERS%) do (
|
||||
echo Processing folder: %BIN_PATH%\%%F
|
||||
|
||||
@REM Move all .dll files except engine.dll
|
||||
for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
|
||||
if /I not "%%~nxD"=="engine.dll" (
|
||||
move "%%D" "%BIN_PATH%"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
echo DLL files moved successfully.
|
||||
47
extensions/inference-cortex-extension/download.sh
Executable file
47
extensions/inference-cortex-extension/download.sh
Executable file
@ -0,0 +1,47 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Read CORTEX_VERSION
|
||||
CORTEX_VERSION=$(cat ./bin/version.txt)
|
||||
CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
|
||||
ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
|
||||
CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
|
||||
# Detect platform
|
||||
OS_TYPE=$(uname)
|
||||
|
||||
if [ "$OS_TYPE" == "Linux" ]; then
|
||||
# Linux downloads
|
||||
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
|
||||
mv ./bin/cortex-server-beta ./bin/cortex-server
|
||||
rm -rf ./bin/cortex
|
||||
rm -rf ./bin/cortex-beta
|
||||
chmod +x "./bin/cortex-server"
|
||||
|
||||
# Download engines for Linux
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
|
||||
download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
|
||||
download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
|
||||
|
||||
elif [ "$OS_TYPE" == "Darwin" ]; then
|
||||
# macOS downloads
|
||||
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1
|
||||
mv ./bin/cortex-server-beta ./bin/cortex-server
|
||||
rm -rf ./bin/cortex
|
||||
rm -rf ./bin/cortex-beta
|
||||
chmod +x "./bin/cortex-server"
|
||||
|
||||
# Download engines for macOS
|
||||
download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
|
||||
download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
|
||||
|
||||
else
|
||||
echo "Unsupported operating system: $OS_TYPE"
|
||||
exit 1
|
||||
fi
|
||||
@ -10,12 +10,12 @@
|
||||
"scripts": {
|
||||
"test": "jest",
|
||||
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
||||
"downloadnitro:linux:darwin": "./download.sh",
|
||||
"downloadnitro:win32": "download.bat",
|
||||
"downloadnitro": "run-script-os",
|
||||
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
"build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
"build:publish": "yarn test && run-script-os"
|
||||
"downloadcortex:linux:darwin": "./download.sh",
|
||||
"downloadcortex:win32": "download.bat",
|
||||
"downloadcortex": "run-script-os",
|
||||
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
"build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||
"build:publish": "run-script-os"
|
||||
},
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
@ -50,6 +50,8 @@
|
||||
"cpu-instructions": "^0.0.13",
|
||||
"decompress": "^4.2.1",
|
||||
"fetch-retry": "^5.0.6",
|
||||
"ky": "^1.7.2",
|
||||
"p-queue": "^8.0.1",
|
||||
"rxjs": "^7.8.1",
|
||||
"tcp-port-used": "^1.0.2",
|
||||
"terminate": "2.6.1",
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["34B", "Finetuned"],
|
||||
"size": 21556982144
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 5056982144
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["Vision"],
|
||||
"size": 5750000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -30,5 +30,5 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4370000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["22B", "Finetuned", "Featured"],
|
||||
"size": 13341237440
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["34B", "Finetuned"],
|
||||
"size": 21500000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["Tiny"],
|
||||
"size": 1430000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["33B"],
|
||||
"size": 19940000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["2B", "Finetuned", "Tiny"],
|
||||
"size": 1630000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 5330000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -37,5 +37,5 @@
|
||||
],
|
||||
"size": 16600000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -38,5 +38,5 @@
|
||||
],
|
||||
"size": 1710000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -37,5 +37,5 @@
|
||||
],
|
||||
"size": 5760000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["70B", "Foundational Model"],
|
||||
"size": 43920000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["7B", "Foundational Model"],
|
||||
"size": 4080000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["8B"],
|
||||
"size": 4920000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -34,5 +34,5 @@
|
||||
],
|
||||
"size": 4920000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -37,5 +37,5 @@
|
||||
],
|
||||
"size": 42500000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -37,5 +37,5 @@
|
||||
],
|
||||
"size": 4920000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["1B", "Featured"],
|
||||
"size": 1320000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["3B", "Featured"],
|
||||
"size": 3420000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -34,5 +34,5 @@
|
||||
],
|
||||
"size": 1170000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -32,5 +32,5 @@
|
||||
"tags": ["Vision"],
|
||||
"size": 7870000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -32,5 +32,5 @@
|
||||
"tags": ["Vision"],
|
||||
"size": 4370000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -32,5 +32,5 @@
|
||||
"size": 4370000000,
|
||||
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png"
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -30,5 +30,5 @@
|
||||
"tags": ["70B", "Foundational Model"],
|
||||
"size": 26440000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4370000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["Recommended", "7B", "Finetuned"],
|
||||
"size": 4370000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -34,5 +34,5 @@
|
||||
],
|
||||
"size": 2320000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -34,5 +34,5 @@
|
||||
],
|
||||
"size": 8366000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["34B", "Finetuned"],
|
||||
"size": 20220000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4770000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4680000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["14B", "Featured"],
|
||||
"size": 8990000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["32B"],
|
||||
"size": 19900000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["72B"],
|
||||
"size": 47400000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["7B", "Featured"],
|
||||
"size": 4680000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["7B", "Featured"],
|
||||
"size": 4680000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["3B", "Finetuned", "Tiny"],
|
||||
"size": 2970000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -30,5 +30,5 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4370000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["Tiny", "Foundation Model"],
|
||||
"size": 669000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"size": 4370000000,
|
||||
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png"
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,6 +31,6 @@
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4410000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["Recommended", "13B", "Finetuned"],
|
||||
"size": 7870000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -31,5 +31,5 @@
|
||||
"tags": ["34B", "Foundational Model"],
|
||||
"size": 20660000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
"engine": "llama-cpp"
|
||||
}
|
||||
@ -114,19 +114,7 @@ export default [
|
||||
]),
|
||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
||||
INFERENCE_URL: JSON.stringify(
|
||||
process.env.INFERENCE_URL ||
|
||||
'http://127.0.0.1:3928/inferences/server/chat_completion'
|
||||
),
|
||||
TROUBLESHOOTING_URL: JSON.stringify(
|
||||
'https://jan.ai/guides/troubleshooting'
|
||||
),
|
||||
JAN_SERVER_INFERENCE_URL: JSON.stringify(
|
||||
'http://localhost:1337/v1/chat/completions'
|
||||
),
|
||||
CUDA_DOWNLOAD_URL: JSON.stringify(
|
||||
'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz'
|
||||
),
|
||||
CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
|
||||
}),
|
||||
// Allow json resolution
|
||||
json(),
|
||||
@ -1,7 +1,5 @@
|
||||
declare const NODE: string
|
||||
declare const INFERENCE_URL: string
|
||||
declare const TROUBLESHOOTING_URL: string
|
||||
declare const JAN_SERVER_INFERENCE_URL: string
|
||||
declare const CORTEX_API_URL: string
|
||||
declare const DEFAULT_SETTINGS: Array<any>
|
||||
declare const MODELS: Array<any>
|
||||
|
||||
168
extensions/inference-cortex-extension/src/index.ts
Normal file
168
extensions/inference-cortex-extension/src/index.ts
Normal file
@ -0,0 +1,168 @@
|
||||
/**
|
||||
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
* @version 1.0.0
|
||||
* @module inference-extension/src/index
|
||||
*/
|
||||
|
||||
import {
|
||||
Model,
|
||||
executeOnMain,
|
||||
systemInformation,
|
||||
joinPath,
|
||||
dirName,
|
||||
LocalOAIEngine,
|
||||
InferenceEngine,
|
||||
getJanDataFolderPath,
|
||||
extractModelLoadParams,
|
||||
} from '@janhq/core'
|
||||
import PQueue from 'p-queue'
|
||||
import ky from 'ky'
|
||||
|
||||
/**
|
||||
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
||||
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||
*/
|
||||
export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
// DEPRECATED
|
||||
nodeModule: string = 'node'
|
||||
|
||||
queue = new PQueue({ concurrency: 1 })
|
||||
|
||||
provider: string = InferenceEngine.cortex
|
||||
|
||||
/**
|
||||
* The URL for making inference requests.
|
||||
*/
|
||||
inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
|
||||
|
||||
/**
|
||||
* Subscribes to events emitted by the @janhq/core package.
|
||||
*/
|
||||
async onLoad() {
|
||||
const models = MODELS as Model[]
|
||||
|
||||
this.registerModels(models)
|
||||
|
||||
super.onLoad()
|
||||
|
||||
// Run the process watchdog
|
||||
const systemInfo = await systemInformation()
|
||||
await this.clean()
|
||||
await executeOnMain(NODE, 'run', systemInfo)
|
||||
|
||||
this.queue.add(() => this.healthz())
|
||||
|
||||
window.addEventListener('beforeunload', () => {
|
||||
this.clean()
|
||||
})
|
||||
}
|
||||
|
||||
onUnload(): void {
|
||||
this.clean()
|
||||
executeOnMain(NODE, 'dispose')
|
||||
super.onUnload()
|
||||
}
|
||||
|
||||
override async loadModel(
|
||||
model: Model & { file_path?: string }
|
||||
): Promise<void> {
|
||||
if (
|
||||
model.engine === InferenceEngine.nitro &&
|
||||
model.settings.llama_model_path
|
||||
) {
|
||||
// Legacy chat model support
|
||||
model.settings = {
|
||||
...model.settings,
|
||||
llama_model_path: await getModelFilePath(
|
||||
model,
|
||||
model.settings.llama_model_path
|
||||
),
|
||||
}
|
||||
} else {
|
||||
const { llama_model_path, ...settings } = model.settings
|
||||
model.settings = settings
|
||||
}
|
||||
|
||||
if (model.engine === InferenceEngine.nitro && model.settings.mmproj) {
|
||||
// Legacy clip vision model support
|
||||
model.settings = {
|
||||
...model.settings,
|
||||
mmproj: await getModelFilePath(model, model.settings.mmproj),
|
||||
}
|
||||
} else {
|
||||
const { mmproj, ...settings } = model.settings
|
||||
model.settings = settings
|
||||
}
|
||||
|
||||
return await ky
|
||||
.post(`${CORTEX_API_URL}/v1/models/start`, {
|
||||
json: {
|
||||
...extractModelLoadParams(model.settings),
|
||||
model: model.id,
|
||||
engine:
|
||||
model.engine === InferenceEngine.nitro // Legacy model cache
|
||||
? InferenceEngine.cortex_llamacpp
|
||||
: model.engine,
|
||||
},
|
||||
})
|
||||
.json()
|
||||
.catch(async (e) => {
|
||||
throw (await e.response?.json()) ?? e
|
||||
})
|
||||
.then()
|
||||
}
|
||||
|
||||
override async unloadModel(model: Model): Promise<void> {
|
||||
return ky
|
||||
.post(`${CORTEX_API_URL}/v1/models/stop`, {
|
||||
json: { model: model.id },
|
||||
})
|
||||
.json()
|
||||
.then()
|
||||
}
|
||||
|
||||
/**
|
||||
* Do health check on cortex.cpp
|
||||
* @returns
|
||||
*/
|
||||
healthz(): Promise<void> {
|
||||
return ky
|
||||
.get(`${CORTEX_API_URL}/healthz`, {
|
||||
retry: {
|
||||
limit: 10,
|
||||
methods: ['get'],
|
||||
},
|
||||
})
|
||||
.then(() => {})
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean cortex processes
|
||||
* @returns
|
||||
*/
|
||||
clean(): Promise<any> {
|
||||
return ky
|
||||
.delete(`${CORTEX_API_URL}/processmanager/destroy`, {
|
||||
timeout: 2000, // maximum 2 seconds
|
||||
})
|
||||
.catch(() => {
|
||||
// Do nothing
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Legacy
|
||||
export const getModelFilePath = async (
|
||||
model: Model,
|
||||
file: string
|
||||
): Promise<string> => {
|
||||
// Symlink to the model file
|
||||
if (!model.sources[0]?.url.startsWith('http')) {
|
||||
return model.sources[0]?.url
|
||||
}
|
||||
return joinPath([await getJanDataFolderPath(), 'models', model.id, file])
|
||||
}
|
||||
///
|
||||
@ -1,5 +1,5 @@
|
||||
import { describe, expect, it } from '@jest/globals'
|
||||
import { executableNitroFile } from './execute'
|
||||
import { executableCortexFile } from './execute'
|
||||
import { GpuSetting } from '@janhq/core'
|
||||
import { cpuInfo } from 'cpu-instructions'
|
||||
|
||||
@ -27,10 +27,10 @@ jest.mock('cpu-instructions', () => ({
|
||||
cpuInfo: jest.fn(),
|
||||
},
|
||||
}))
|
||||
let mock = cpuInfo.cpuInfo as jest.Mock
|
||||
mock.mockReturnValue([])
|
||||
let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
|
||||
mockCpuInfo.mockReturnValue([])
|
||||
|
||||
describe('test executable nitro file', () => {
|
||||
describe('test executable cortex file', () => {
|
||||
afterAll(function () {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: originalPlatform,
|
||||
@ -44,10 +44,14 @@ describe('test executable nitro file', () => {
|
||||
Object.defineProperty(process, 'arch', {
|
||||
value: 'arm64',
|
||||
})
|
||||
expect(executableNitroFile(testSettings)).toEqual(
|
||||
expect(executableCortexFile(testSettings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`mac-arm64`),
|
||||
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(),
|
||||
enginePath: expect.stringContaining(`arm64`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
executablePath:
|
||||
originalPlatform === 'darwin'
|
||||
? expect.stringContaining(`cortex-server`)
|
||||
: expect.anything(),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -55,10 +59,14 @@ describe('test executable nitro file', () => {
|
||||
Object.defineProperty(process, 'arch', {
|
||||
value: 'x64',
|
||||
})
|
||||
expect(executableNitroFile(testSettings)).toEqual(
|
||||
expect(executableCortexFile(testSettings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`mac-x64`),
|
||||
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
|
||||
enginePath: expect.stringContaining(`x64`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
executablePath:
|
||||
originalPlatform === 'darwin'
|
||||
? expect.stringContaining(`cortex-server`)
|
||||
: expect.anything(),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -73,10 +81,12 @@ describe('test executable nitro file', () => {
|
||||
...testSettings,
|
||||
run_mode: 'cpu',
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
mockCpuInfo.mockReturnValue(['avx'])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`win`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||
enginePath: expect.stringContaining(`avx`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -107,10 +117,12 @@ describe('test executable nitro file', () => {
|
||||
},
|
||||
],
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
mockCpuInfo.mockReturnValue(['avx2'])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`win-cuda-11-7`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||
enginePath: expect.stringContaining(`avx2-cuda-11-7`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -141,10 +153,12 @@ describe('test executable nitro file', () => {
|
||||
},
|
||||
],
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
mockCpuInfo.mockReturnValue(['noavx'])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`win-cuda-12-0`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||
enginePath: expect.stringContaining(`noavx-cuda-12-0`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -159,10 +173,11 @@ describe('test executable nitro file', () => {
|
||||
...testSettings,
|
||||
run_mode: 'cpu',
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
mockCpuInfo.mockReturnValue(['noavx'])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`linux`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||
enginePath: expect.stringContaining(`noavx`),
|
||||
executablePath: expect.stringContaining(`cortex-server`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -193,10 +208,11 @@ describe('test executable nitro file', () => {
|
||||
},
|
||||
],
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`linux-cuda-11-7`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||
enginePath: expect.stringContaining(`cuda-11-7`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
executablePath: expect.stringContaining(`cortex-server`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -227,10 +243,11 @@ describe('test executable nitro file', () => {
|
||||
},
|
||||
],
|
||||
}
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`linux-cuda-12-0`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||
enginePath: expect.stringContaining(`cuda-12-0`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
executablePath: expect.stringContaining(`cortex-server`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -249,12 +266,13 @@ describe('test executable nitro file', () => {
|
||||
|
||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mock.mockReturnValue([instruction])
|
||||
mockCpuInfo.mockReturnValue([instruction])
|
||||
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`linux-${instruction}`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||
enginePath: expect.stringContaining(instruction),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
executablePath: expect.stringContaining(`cortex-server`),
|
||||
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
@ -273,11 +291,12 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mock.mockReturnValue([instruction])
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
mockCpuInfo.mockReturnValue([instruction])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`win-${instruction}`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||
enginePath: expect.stringContaining(instruction),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -312,11 +331,12 @@ describe('test executable nitro file', () => {
|
||||
}
|
||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mock.mockReturnValue([instruction])
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
mockCpuInfo.mockReturnValue([instruction])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`win-cuda-12-0`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
||||
enginePath: expect.stringContaining(`cuda-12-0`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -351,11 +371,12 @@ describe('test executable nitro file', () => {
|
||||
],
|
||||
}
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mock.mockReturnValue([instruction])
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
mockCpuInfo.mockReturnValue([instruction])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`linux-cuda-12-0`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||
enginePath: expect.stringContaining(`cuda-12-0`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
executablePath: expect.stringContaining(`cortex-server`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -391,11 +412,12 @@ describe('test executable nitro file', () => {
|
||||
],
|
||||
}
|
||||
cpuInstructions.forEach((instruction) => {
|
||||
mock.mockReturnValue([instruction])
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
mockCpuInfo.mockReturnValue([instruction])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`linux-vulkan`),
|
||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
||||
enginePath: expect.stringContaining(`vulkan`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
executablePath: expect.stringContaining(`cortex-server`),
|
||||
cudaVisibleDevices: '0',
|
||||
vkVisibleDevices: '0',
|
||||
})
|
||||
@ -417,11 +439,15 @@ describe('test executable nitro file', () => {
|
||||
...testSettings,
|
||||
run_mode: 'cpu',
|
||||
}
|
||||
mock.mockReturnValue([])
|
||||
expect(executableNitroFile(settings)).toEqual(
|
||||
mockCpuInfo.mockReturnValue([])
|
||||
expect(executableCortexFile(settings)).toEqual(
|
||||
expect.objectContaining({
|
||||
enginePath: expect.stringContaining(`mac-x64`),
|
||||
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
|
||||
enginePath: expect.stringContaining(`x64`),
|
||||
binPath: expect.stringContaining(`bin`),
|
||||
executablePath:
|
||||
originalPlatform === 'darwin'
|
||||
? expect.stringContaining(`cortex-server`)
|
||||
: expect.anything(),
|
||||
cudaVisibleDevices: '',
|
||||
vkVisibleDevices: '',
|
||||
})
|
||||
@ -2,8 +2,9 @@ import { GpuSetting } from '@janhq/core'
|
||||
import * as path from 'path'
|
||||
import { cpuInfo } from 'cpu-instructions'
|
||||
|
||||
export interface NitroExecutableOptions {
|
||||
export interface CortexExecutableOptions {
|
||||
enginePath: string
|
||||
binPath: string
|
||||
executablePath: string
|
||||
cudaVisibleDevices: string
|
||||
vkVisibleDevices: string
|
||||
@ -36,8 +37,8 @@ const os = (): string => {
|
||||
? 'win'
|
||||
: process.platform === 'darwin'
|
||||
? process.arch === 'arm64'
|
||||
? 'mac-arm64'
|
||||
: 'mac-x64'
|
||||
? 'arm64'
|
||||
: 'x64'
|
||||
: 'linux'
|
||||
}
|
||||
|
||||
@ -66,7 +67,7 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
|
||||
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
|
||||
* @returns
|
||||
*/
|
||||
const cpuInstructions = () => {
|
||||
const cpuInstructions = (): string => {
|
||||
if (process.platform === 'darwin') return ''
|
||||
return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
|
||||
? 'avx512'
|
||||
@ -81,29 +82,32 @@ const cpuInstructions = () => {
|
||||
* Find which executable file to run based on the current platform.
|
||||
* @returns The name of the executable file to run.
|
||||
*/
|
||||
export const executableNitroFile = (
|
||||
export const executableCortexFile = (
|
||||
gpuSetting?: GpuSetting
|
||||
): NitroExecutableOptions => {
|
||||
let engineFolder = [
|
||||
os(),
|
||||
...(gpuSetting?.vulkan
|
||||
? []
|
||||
): CortexExecutableOptions => {
|
||||
const cpuInstruction = cpuInstructions()
|
||||
let engineFolder = gpuSetting?.vulkan
|
||||
? 'vulkan'
|
||||
: process.platform === 'darwin'
|
||||
? os()
|
||||
: [
|
||||
gpuRunMode(gpuSetting) !== 'cuda' ? cpuInstructions() : '',
|
||||
gpuRunMode(gpuSetting) !== 'cuda' ||
|
||||
cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
|
||||
? cpuInstruction
|
||||
: 'noavx',
|
||||
gpuRunMode(gpuSetting),
|
||||
cudaVersion(gpuSetting),
|
||||
]),
|
||||
gpuSetting?.vulkan ? 'vulkan' : undefined,
|
||||
]
|
||||
.filter((e) => !!e)
|
||||
.join('-')
|
||||
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||
let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}`
|
||||
|
||||
let binaryName = `cortex-server${extension()}`
|
||||
const binPath = path.join(__dirname, '..', 'bin')
|
||||
return {
|
||||
enginePath: path.join(__dirname, '..', 'bin', engineFolder),
|
||||
executablePath: path.join(__dirname, '..', 'bin', binaryName),
|
||||
enginePath: path.join(binPath, engineFolder),
|
||||
executablePath: path.join(binPath, binaryName),
|
||||
binPath: binPath,
|
||||
cudaVisibleDevices,
|
||||
vkVisibleDevices,
|
||||
}
|
||||
94
extensions/inference-cortex-extension/src/node/index.test.ts
Normal file
94
extensions/inference-cortex-extension/src/node/index.test.ts
Normal file
@ -0,0 +1,94 @@
|
||||
jest.mock('@janhq/core/node', () => ({
|
||||
...jest.requireActual('@janhq/core/node'),
|
||||
getJanDataFolderPath: () => '',
|
||||
getSystemResourceInfo: () => {
|
||||
return {
|
||||
cpu: {
|
||||
cores: 1,
|
||||
logicalCores: 1,
|
||||
threads: 1,
|
||||
model: 'model',
|
||||
speed: 1,
|
||||
},
|
||||
memory: {
|
||||
total: 1,
|
||||
free: 1,
|
||||
},
|
||||
gpu: {
|
||||
model: 'model',
|
||||
memory: 1,
|
||||
cuda: {
|
||||
version: 'version',
|
||||
devices: 'devices',
|
||||
},
|
||||
vulkan: {
|
||||
version: 'version',
|
||||
devices: 'devices',
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('fs', () => ({
|
||||
default: {
|
||||
readdirSync: () => [],
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('child_process', () => ({
|
||||
exec: () => {
|
||||
return {
|
||||
stdout: { on: jest.fn() },
|
||||
stderr: { on: jest.fn() },
|
||||
on: jest.fn(),
|
||||
}
|
||||
},
|
||||
spawn: () => {
|
||||
return {
|
||||
stdout: { on: jest.fn() },
|
||||
stderr: { on: jest.fn() },
|
||||
on: jest.fn(),
|
||||
pid: '111',
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
jest.mock('./execute', () => ({
|
||||
executableCortexFile: () => {
|
||||
return {
|
||||
enginePath: 'enginePath',
|
||||
executablePath: 'executablePath',
|
||||
cudaVisibleDevices: 'cudaVisibleDevices',
|
||||
vkVisibleDevices: 'vkVisibleDevices',
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
import index from './index'
|
||||
|
||||
describe('dispose', () => {
|
||||
it('should dispose a model successfully on Mac', async () => {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'darwin',
|
||||
})
|
||||
|
||||
// Call the dispose function
|
||||
const result = await index.dispose()
|
||||
|
||||
// Assert that the result is as expected
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('should kill the subprocess successfully on Windows', async () => {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'win32',
|
||||
})
|
||||
|
||||
// Call the killSubprocess function
|
||||
const result = await index.dispose()
|
||||
|
||||
// Assert that the result is as expected
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
})
|
||||
103
extensions/inference-cortex-extension/src/node/index.ts
Normal file
103
extensions/inference-cortex-extension/src/node/index.ts
Normal file
@ -0,0 +1,103 @@
|
||||
import path from 'path'
|
||||
import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
|
||||
import { executableCortexFile } from './execute'
|
||||
import { ProcessWatchdog } from './watchdog'
|
||||
import { appResourcePath } from '@janhq/core/node'
|
||||
|
||||
// The HOST address to use for the Nitro subprocess
|
||||
const LOCAL_PORT = '39291'
|
||||
let watchdog: ProcessWatchdog | undefined = undefined
|
||||
|
||||
/**
|
||||
* Spawns a Nitro subprocess.
|
||||
* @returns A promise that resolves when the Nitro subprocess is started.
|
||||
*/
|
||||
function run(systemInfo?: SystemInformation): Promise<any> {
|
||||
log(`[CORTEX]:: Spawning cortex subprocess...`)
|
||||
|
||||
return new Promise<void>(async (resolve, reject) => {
|
||||
let executableOptions = executableCortexFile(
|
||||
// If ngl is not set or equal to 0, run on CPU with correct instructions
|
||||
systemInfo?.gpuSetting
|
||||
? {
|
||||
...systemInfo.gpuSetting,
|
||||
run_mode: systemInfo.gpuSetting.run_mode,
|
||||
}
|
||||
: undefined
|
||||
)
|
||||
|
||||
// Execute the binary
|
||||
log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
|
||||
log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
|
||||
|
||||
addEnvPaths(path.join(appResourcePath(), 'shared'))
|
||||
addEnvPaths(executableOptions.binPath)
|
||||
addEnvPaths(executableOptions.enginePath)
|
||||
|
||||
const dataFolderPath = getJanDataFolderPath()
|
||||
watchdog = new ProcessWatchdog(
|
||||
executableOptions.executablePath,
|
||||
[
|
||||
'--start-server',
|
||||
'--port',
|
||||
LOCAL_PORT.toString(),
|
||||
'--config_file_path',
|
||||
`${path.join(dataFolderPath, '.janrc')}`,
|
||||
'--data_folder_path',
|
||||
dataFolderPath,
|
||||
],
|
||||
{
|
||||
cwd: executableOptions.enginePath,
|
||||
env: {
|
||||
...process.env,
|
||||
ENGINE_PATH: executableOptions.enginePath,
|
||||
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
||||
// Vulkan - Support 1 device at a time for now
|
||||
...(executableOptions.vkVisibleDevices?.length > 0 && {
|
||||
GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
|
||||
}),
|
||||
},
|
||||
}
|
||||
)
|
||||
watchdog.start()
|
||||
resolve()
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Every module should have a dispose function
|
||||
* This will be called when the extension is unloaded and should clean up any resources
|
||||
* Also called when app is closed
|
||||
*/
|
||||
function dispose() {
|
||||
watchdog?.terminate()
|
||||
}
|
||||
|
||||
function addEnvPaths(dest: string) {
|
||||
// Add engine path to the PATH and LD_LIBRARY_PATH
|
||||
if (process.platform === 'win32') {
|
||||
process.env.PATH = (process.env.PATH || '').concat(
|
||||
path.delimiter,
|
||||
dest,
|
||||
)
|
||||
log(`[CORTEX] PATH: ${process.env.PATH}`)
|
||||
} else {
|
||||
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
|
||||
path.delimiter,
|
||||
dest,
|
||||
)
|
||||
log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cortex process info
|
||||
*/
|
||||
export interface CortexProcessInfo {
|
||||
isRunning: boolean
|
||||
}
|
||||
|
||||
export default {
|
||||
run,
|
||||
dispose,
|
||||
}
|
||||
84
extensions/inference-cortex-extension/src/node/watchdog.ts
Normal file
84
extensions/inference-cortex-extension/src/node/watchdog.ts
Normal file
@ -0,0 +1,84 @@
|
||||
import { log } from '@janhq/core/node'
|
||||
import { spawn, ChildProcess } from 'child_process'
|
||||
import { EventEmitter } from 'events'
|
||||
|
||||
interface WatchdogOptions {
|
||||
cwd?: string
|
||||
restartDelay?: number
|
||||
maxRestarts?: number
|
||||
env?: NodeJS.ProcessEnv
|
||||
}
|
||||
|
||||
export class ProcessWatchdog extends EventEmitter {
|
||||
private command: string
|
||||
private args: string[]
|
||||
private options: WatchdogOptions
|
||||
private process: ChildProcess | null
|
||||
private restartDelay: number
|
||||
private maxRestarts: number
|
||||
private restartCount: number
|
||||
private isTerminating: boolean
|
||||
|
||||
constructor(command: string, args: string[], options: WatchdogOptions = {}) {
|
||||
super()
|
||||
this.command = command
|
||||
this.args = args
|
||||
this.options = options
|
||||
this.process = null
|
||||
this.restartDelay = options.restartDelay || 5000
|
||||
this.maxRestarts = options.maxRestarts || 5
|
||||
this.restartCount = 0
|
||||
this.isTerminating = false
|
||||
}
|
||||
|
||||
start(): void {
|
||||
this.spawnProcess()
|
||||
}
|
||||
|
||||
private spawnProcess(): void {
|
||||
if (this.isTerminating) return
|
||||
|
||||
log(`Starting process: ${this.command} ${this.args.join(' ')}`)
|
||||
this.process = spawn(this.command, this.args, this.options)
|
||||
|
||||
this.process.stdout?.on('data', (data: Buffer) => {
|
||||
log(`Process output: ${data}`)
|
||||
this.emit('output', data.toString())
|
||||
})
|
||||
|
||||
this.process.stderr?.on('data', (data: Buffer) => {
|
||||
log(`Process error: ${data}`)
|
||||
this.emit('error', data.toString())
|
||||
})
|
||||
|
||||
this.process.on('close', (code: number | null) => {
|
||||
log(`Process exited with code ${code}`)
|
||||
this.emit('close', code)
|
||||
if (!this.isTerminating) {
|
||||
this.restartProcess()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
private restartProcess(): void {
|
||||
if (this.restartCount < this.maxRestarts) {
|
||||
this.restartCount++
|
||||
log(
|
||||
`Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})`
|
||||
)
|
||||
setTimeout(() => this.spawnProcess(), this.restartDelay)
|
||||
} else {
|
||||
log('Max restart attempts reached. Exiting watchdog.')
|
||||
this.emit('maxRestartsReached')
|
||||
}
|
||||
}
|
||||
|
||||
terminate(): void {
|
||||
this.isTerminating = true
|
||||
if (this.process) {
|
||||
log('Terminating watched process...')
|
||||
this.process.kill()
|
||||
}
|
||||
this.emit('terminated')
|
||||
}
|
||||
}
|
||||
@ -1,9 +1,8 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"moduleResolution": "node",
|
||||
"target": "ES2015",
|
||||
"module": "ES2020",
|
||||
"lib": ["es2015", "es2016", "es2017", "dom"],
|
||||
"target": "es2016",
|
||||
"module": "esnext",
|
||||
"strict": true,
|
||||
"sourceMap": true,
|
||||
"declaration": true,
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user